97 lines
2.9 KiB
JavaScript
97 lines
2.9 KiB
JavaScript
import crypto from 'node:crypto';
|
|
import fs from 'node:fs';
|
|
import path from 'node:path';
|
|
import cors from 'cors';
|
|
import express from 'express';
|
|
import { MsEdgeTTS, OUTPUT_FORMAT } from 'msedge-tts';
|
|
|
|
const app = express();
|
|
const port = Number(process.env.TTS_PORT ?? 3333);
|
|
const host = process.env.TTS_HOST ?? '0.0.0.0';
|
|
const cacheDir = path.resolve(process.cwd(), '.tts-cache');
|
|
|
|
fs.mkdirSync(cacheDir, { recursive: true });
|
|
|
|
app.use(cors());
|
|
app.use(express.json({ limit: '64kb' }));
|
|
app.use('/audio', express.static(cacheDir, { maxAge: '1h' }));
|
|
|
|
app.get('/health', (_request, response) => {
|
|
response.json({ ok: true, voice: getVoiceFromEnv() });
|
|
});
|
|
|
|
app.get('/voices', async (_request, response) => {
|
|
const tts = new MsEdgeTTS({});
|
|
const voices = await tts.getVoices();
|
|
response.json(
|
|
voices
|
|
.filter((voice) => voice.Locale.startsWith('en-'))
|
|
.map((voice) => ({
|
|
name: voice.ShortName,
|
|
friendlyName: voice.FriendlyName,
|
|
gender: voice.Gender,
|
|
locale: voice.Locale,
|
|
})),
|
|
);
|
|
});
|
|
|
|
app.post('/tts', async (request, response) => {
|
|
const text = sanitizeText(String(request.body?.text ?? ''));
|
|
|
|
if (!text) {
|
|
response.status(400).json({ error: 'Missing text' });
|
|
return;
|
|
}
|
|
|
|
const voice = String(request.body?.voice ?? getVoiceFromEnv());
|
|
const rate = String(request.body?.rate ?? process.env.TTS_RATE ?? '-8%');
|
|
const pitch = String(request.body?.pitch ?? process.env.TTS_PITCH ?? '+0Hz');
|
|
const hash = crypto.createHash('sha256').update(`${voice}|${rate}|${pitch}|${text}`).digest('hex');
|
|
const filename = `${hash}.mp3`;
|
|
const filePath = path.join(cacheDir, filename);
|
|
|
|
if (!fs.existsSync(filePath)) {
|
|
const tts = new MsEdgeTTS({});
|
|
await tts.setMetadata(voice, OUTPUT_FORMAT.AUDIO_24KHZ_96KBITRATE_MONO_MP3);
|
|
const { audioFilePath, metadataFilePath } = await tts.toFile(cacheDir, escapeXml(text), { rate, pitch });
|
|
fs.renameSync(audioFilePath, filePath);
|
|
if (metadataFilePath && fs.existsSync(metadataFilePath)) {
|
|
fs.unlinkSync(metadataFilePath);
|
|
}
|
|
tts.close();
|
|
}
|
|
|
|
response.json({ audioUrl: `${request.protocol}://${request.get('host')}/audio/${filename}` });
|
|
});
|
|
|
|
app.listen(port, host, () => {
|
|
console.log(`TTS server listening on http://${host}:${port}`);
|
|
console.log(`Voice: ${getVoiceFromEnv()}`);
|
|
});
|
|
|
|
function getVoiceFromEnv() {
|
|
return process.env.TTS_VOICE ?? 'en-US-JennyNeural';
|
|
}
|
|
|
|
function sanitizeText(value) {
|
|
return value
|
|
.replace(/<think>[\s\S]*?<\/think>/gi, '')
|
|
.replace(/```[\s\S]*?```/g, ' ')
|
|
.replace(/`([^`]+)`/g, '$1')
|
|
.replace(/\*\*([^*]+)\*\*/g, '$1')
|
|
.replace(/[*_#>~]/g, '')
|
|
.replace(/https?:\/\/\S+/g, '')
|
|
.replace(/\s+/g, ' ')
|
|
.trim()
|
|
.slice(0, 1800);
|
|
}
|
|
|
|
function escapeXml(value) {
|
|
return value
|
|
.replace(/&/g, '&')
|
|
.replace(/</g, '<')
|
|
.replace(/>/g, '>')
|
|
.replace(/"/g, '"')
|
|
.replace(/'/g, ''');
|
|
}
|