diff --git a/PLAN.md b/PLAN.md index 3368bec..ad4e0fd 100644 --- a/PLAN.md +++ b/PLAN.md @@ -68,7 +68,8 @@ Voice-first authoring tool for single-user Ghost blog. Capture audio, refine wit ## Upcoming Next Actions - [x] Backend endpoint for audio upload `/api/media/audio` (accept WebM/PCM) — implemented with MinIO via AWS SDK v3 - [x] S3-compatible adapter using MinIO (`S3_ENDPOINT`, `S3_ACCESS_KEY`, `S3_SECRET_KEY`) -- [ ] Add STT trigger in UI: send blob to backend, call OpenAI STT, render transcript +- [ ] Backend STT endpoint `/api/stt` (download from MinIO, call OpenAI STT, return transcript) +- [ ] Add STT trigger in UI: call `/api/stt` with `{ bucket, key }` and render transcript ## MinIO Integration Checklist - [ ] Deploy MinIO on VPS (console `:9001`, API `:9000`). diff --git a/apps/admin/src/features/recorder/Recorder.tsx b/apps/admin/src/features/recorder/Recorder.tsx index c81088d..10090b6 100644 --- a/apps/admin/src/features/recorder/Recorder.tsx +++ b/apps/admin/src/features/recorder/Recorder.tsx @@ -8,6 +8,8 @@ export default function Recorder() { const [audioUrl, setAudioUrl] = useState(null); const [audioBlob, setAudioBlob] = useState(null); const [uploadKey, setUploadKey] = useState(null); + const [uploadBucket, setUploadBucket] = useState(null); + const [transcript, setTranscript] = useState(''); const [error, setError] = useState(''); const requestStream = async (): Promise => { @@ -59,6 +61,8 @@ export default function Recorder() { try { setError(''); setUploadKey(null); + setUploadBucket(null); + setTranscript(''); if (!audioBlob) { setError('No audio to upload'); return; @@ -75,11 +79,36 @@ export default function Recorder() { } const data = await res.json(); setUploadKey(data.key || 'uploaded'); + setUploadBucket(data.bucket || null); } catch (e: any) { setError(e?.message || 'Upload failed'); } }; + const transcribe = async () => { + try { + setError(''); + setTranscript(''); + if (!uploadKey) { + setError('Upload audio before transcribing'); + return; + } + const res = await fetch('/api/stt', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ bucket: uploadBucket ?? undefined, key: uploadKey }), + }); + if (!res.ok) { + const txt = await res.text(); + throw new Error(`STT failed: ${res.status} ${txt}`); + } + const data = await res.json(); + setTranscript(data.transcript || ''); + } catch (e: any) { + setError(e?.message || 'Transcription failed'); + } + }; + useEffect(() => { return () => { if (audioUrl) URL.revokeObjectURL(audioUrl); @@ -93,6 +122,7 @@ export default function Recorder() { + {error && {error}} {audioUrl && ( @@ -105,6 +135,12 @@ export default function Recorder() { Uploaded as key: {uploadKey} )} + {transcript && ( + + Transcript + {transcript} + + )} ); } diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts index d0ea598..b447d64 100644 --- a/apps/api/src/index.ts +++ b/apps/api/src/index.ts @@ -5,6 +5,7 @@ import express from 'express'; import cors from 'cors'; import authRouter from './auth'; import mediaRouter from './media'; +import sttRouter from './stt'; const app = express(); console.log('ENV ADMIN_PASSWORD loaded:', Boolean(process.env.ADMIN_PASSWORD)); @@ -19,6 +20,7 @@ app.use(express.json()); // API routes app.use('/api/auth', authRouter); app.use('/api/media', mediaRouter); +app.use('/api/stt', sttRouter); app.get('/api/health', (_req, res) => { res.json({ ok: true }); }); diff --git a/apps/api/src/stt.ts b/apps/api/src/stt.ts new file mode 100644 index 0000000..fa20819 --- /dev/null +++ b/apps/api/src/stt.ts @@ -0,0 +1,53 @@ +import express from 'express'; +import { fetch, FormData } from 'undici'; +import { downloadObject } from './storage/s3'; + +const router = express.Router(); + +router.post('/', async (req, res) => { + try { + const { bucket: bodyBucket, key } = req.body as { bucket?: string; key?: string }; + const bucket = bodyBucket || process.env.S3_BUCKET; + if (!bucket || !key) { + return res.status(400).json({ error: 'bucket (or env S3_BUCKET) and key are required' }); + } + if (!process.env.OPENAI_API_KEY) { + return res.status(500).json({ error: 'OPENAI_API_KEY not configured' }); + } + + const { buffer, contentType } = await downloadObject({ bucket, key }); + + // Build multipart form for OpenAI Whisper + const fd = new FormData(); + // Convert Node Buffer -> ArrayBuffer (TS-accurate) for BlobPart + const ab = buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength) as ArrayBuffer; + const blob = new Blob([ab as unknown as ArrayBuffer], { type: contentType || 'audio/webm' }); + // OpenAI expects a filename + fd.append('file', blob, 'audio.webm'); + fd.append('model', 'whisper-1'); + // Optional: language hints, prompt, temperature etc. + + const resp = await fetch('https://api.openai.com/v1/audio/transcriptions', { + method: 'POST', + headers: { + Authorization: `Bearer ${process.env.OPENAI_API_KEY}`, + }, + body: fd, + }); + + if (!resp.ok) { + const text = await resp.text(); + console.error('OpenAI STT error:', resp.status, text); + return res.status(500).json({ error: 'STT failed', details: text }); + } + + const data: any = await resp.json(); + // OpenAI returns { text: "..." } + return res.json({ success: true, transcript: data.text || '' }); + } catch (err: any) { + console.error('STT error:', err); + return res.status(500).json({ error: 'STT error' }); + } +}); + +export default router;