Files
english-ai-coach/tools/tts-server.mjs
2026-04-28 15:54:10 +02:00

97 lines
2.9 KiB
JavaScript

import crypto from 'node:crypto';
import fs from 'node:fs';
import path from 'node:path';
import cors from 'cors';
import express from 'express';
import { MsEdgeTTS, OUTPUT_FORMAT } from 'msedge-tts';
const app = express();
const port = Number(process.env.TTS_PORT ?? 3333);
const host = process.env.TTS_HOST ?? '0.0.0.0';
const cacheDir = path.resolve(process.cwd(), '.tts-cache');
fs.mkdirSync(cacheDir, { recursive: true });
app.use(cors());
app.use(express.json({ limit: '64kb' }));
app.use('/audio', express.static(cacheDir, { maxAge: '1h' }));
app.get('/health', (_request, response) => {
response.json({ ok: true, voice: getVoiceFromEnv() });
});
app.get('/voices', async (_request, response) => {
const tts = new MsEdgeTTS({});
const voices = await tts.getVoices();
response.json(
voices
.filter((voice) => voice.Locale.startsWith('en-'))
.map((voice) => ({
name: voice.ShortName,
friendlyName: voice.FriendlyName,
gender: voice.Gender,
locale: voice.Locale,
})),
);
});
app.post('/tts', async (request, response) => {
const text = sanitizeText(String(request.body?.text ?? ''));
if (!text) {
response.status(400).json({ error: 'Missing text' });
return;
}
const voice = String(request.body?.voice ?? getVoiceFromEnv());
const rate = String(request.body?.rate ?? process.env.TTS_RATE ?? '-8%');
const pitch = String(request.body?.pitch ?? process.env.TTS_PITCH ?? '+0Hz');
const hash = crypto.createHash('sha256').update(`${voice}|${rate}|${pitch}|${text}`).digest('hex');
const filename = `${hash}.mp3`;
const filePath = path.join(cacheDir, filename);
if (!fs.existsSync(filePath)) {
const tts = new MsEdgeTTS({});
await tts.setMetadata(voice, OUTPUT_FORMAT.AUDIO_24KHZ_96KBITRATE_MONO_MP3);
const { audioFilePath, metadataFilePath } = await tts.toFile(cacheDir, escapeXml(text), { rate, pitch });
fs.renameSync(audioFilePath, filePath);
if (metadataFilePath && fs.existsSync(metadataFilePath)) {
fs.unlinkSync(metadataFilePath);
}
tts.close();
}
response.json({ audioUrl: `${request.protocol}://${request.get('host')}/audio/${filename}` });
});
app.listen(port, host, () => {
console.log(`TTS server listening on http://${host}:${port}`);
console.log(`Voice: ${getVoiceFromEnv()}`);
});
function getVoiceFromEnv() {
return process.env.TTS_VOICE ?? 'en-US-JennyNeural';
}
function sanitizeText(value) {
return value
.replace(/<think>[\s\S]*?<\/think>/gi, '')
.replace(/```[\s\S]*?```/g, ' ')
.replace(/`([^`]+)`/g, '$1')
.replace(/\*\*([^*]+)\*\*/g, '$1')
.replace(/[*_#>~]/g, '')
.replace(/https?:\/\/\S+/g, '')
.replace(/\s+/g, ' ')
.trim()
.slice(0, 1800);
}
function escapeXml(value) {
return value
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&apos;');
}