feat(stt): add /api/stt endpoint using OpenAI; mount route; add Transcribe button in Recorder UI; update plan
This commit is contained in:
parent
4ad9c311a2
commit
498b49c474
3
PLAN.md
3
PLAN.md
@ -68,7 +68,8 @@ Voice-first authoring tool for single-user Ghost blog. Capture audio, refine wit
|
||||
## Upcoming Next Actions
|
||||
- [x] Backend endpoint for audio upload `/api/media/audio` (accept WebM/PCM) — implemented with MinIO via AWS SDK v3
|
||||
- [x] S3-compatible adapter using MinIO (`S3_ENDPOINT`, `S3_ACCESS_KEY`, `S3_SECRET_KEY`)
|
||||
- [ ] Add STT trigger in UI: send blob to backend, call OpenAI STT, render transcript
|
||||
- [ ] Backend STT endpoint `/api/stt` (download from MinIO, call OpenAI STT, return transcript)
|
||||
- [ ] Add STT trigger in UI: call `/api/stt` with `{ bucket, key }` and render transcript
|
||||
|
||||
## MinIO Integration Checklist
|
||||
- [ ] Deploy MinIO on VPS (console `:9001`, API `:9000`).
|
||||
|
||||
@ -8,6 +8,8 @@ export default function Recorder() {
|
||||
const [audioUrl, setAudioUrl] = useState<string | null>(null);
|
||||
const [audioBlob, setAudioBlob] = useState<Blob | null>(null);
|
||||
const [uploadKey, setUploadKey] = useState<string | null>(null);
|
||||
const [uploadBucket, setUploadBucket] = useState<string | null>(null);
|
||||
const [transcript, setTranscript] = useState<string>('');
|
||||
const [error, setError] = useState<string>('');
|
||||
|
||||
const requestStream = async (): Promise<MediaStream | null> => {
|
||||
@ -59,6 +61,8 @@ export default function Recorder() {
|
||||
try {
|
||||
setError('');
|
||||
setUploadKey(null);
|
||||
setUploadBucket(null);
|
||||
setTranscript('');
|
||||
if (!audioBlob) {
|
||||
setError('No audio to upload');
|
||||
return;
|
||||
@ -75,11 +79,36 @@ export default function Recorder() {
|
||||
}
|
||||
const data = await res.json();
|
||||
setUploadKey(data.key || 'uploaded');
|
||||
setUploadBucket(data.bucket || null);
|
||||
} catch (e: any) {
|
||||
setError(e?.message || 'Upload failed');
|
||||
}
|
||||
};
|
||||
|
||||
const transcribe = async () => {
|
||||
try {
|
||||
setError('');
|
||||
setTranscript('');
|
||||
if (!uploadKey) {
|
||||
setError('Upload audio before transcribing');
|
||||
return;
|
||||
}
|
||||
const res = await fetch('/api/stt', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ bucket: uploadBucket ?? undefined, key: uploadKey }),
|
||||
});
|
||||
if (!res.ok) {
|
||||
const txt = await res.text();
|
||||
throw new Error(`STT failed: ${res.status} ${txt}`);
|
||||
}
|
||||
const data = await res.json();
|
||||
setTranscript(data.transcript || '');
|
||||
} catch (e: any) {
|
||||
setError(e?.message || 'Transcription failed');
|
||||
}
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
if (audioUrl) URL.revokeObjectURL(audioUrl);
|
||||
@ -93,6 +122,7 @@ export default function Recorder() {
|
||||
<Button variant="contained" disabled={recording} onClick={startRecording}>Start</Button>
|
||||
<Button variant="outlined" disabled={!recording} onClick={stopRecording}>Stop</Button>
|
||||
<Button variant="text" disabled={!audioBlob} onClick={uploadAudio}>Upload</Button>
|
||||
<Button variant="text" disabled={!uploadKey} onClick={transcribe}>Transcribe</Button>
|
||||
</Stack>
|
||||
{error && <Typography color="error" sx={{ mb: 2 }}>{error}</Typography>}
|
||||
{audioUrl && (
|
||||
@ -105,6 +135,12 @@ export default function Recorder() {
|
||||
Uploaded as key: {uploadKey}
|
||||
</Typography>
|
||||
)}
|
||||
{transcript && (
|
||||
<Box sx={{ mt: 2 }}>
|
||||
<Typography variant="subtitle1">Transcript</Typography>
|
||||
<Typography variant="body2" sx={{ whiteSpace: 'pre-wrap' }}>{transcript}</Typography>
|
||||
</Box>
|
||||
)}
|
||||
</Box>
|
||||
);
|
||||
}
|
||||
|
||||
@ -5,6 +5,7 @@ import express from 'express';
|
||||
import cors from 'cors';
|
||||
import authRouter from './auth';
|
||||
import mediaRouter from './media';
|
||||
import sttRouter from './stt';
|
||||
|
||||
const app = express();
|
||||
console.log('ENV ADMIN_PASSWORD loaded:', Boolean(process.env.ADMIN_PASSWORD));
|
||||
@ -19,6 +20,7 @@ app.use(express.json());
|
||||
// API routes
|
||||
app.use('/api/auth', authRouter);
|
||||
app.use('/api/media', mediaRouter);
|
||||
app.use('/api/stt', sttRouter);
|
||||
app.get('/api/health', (_req, res) => {
|
||||
res.json({ ok: true });
|
||||
});
|
||||
|
||||
53
apps/api/src/stt.ts
Normal file
53
apps/api/src/stt.ts
Normal file
@ -0,0 +1,53 @@
|
||||
import express from 'express';
|
||||
import { fetch, FormData } from 'undici';
|
||||
import { downloadObject } from './storage/s3';
|
||||
|
||||
const router = express.Router();
|
||||
|
||||
router.post('/', async (req, res) => {
|
||||
try {
|
||||
const { bucket: bodyBucket, key } = req.body as { bucket?: string; key?: string };
|
||||
const bucket = bodyBucket || process.env.S3_BUCKET;
|
||||
if (!bucket || !key) {
|
||||
return res.status(400).json({ error: 'bucket (or env S3_BUCKET) and key are required' });
|
||||
}
|
||||
if (!process.env.OPENAI_API_KEY) {
|
||||
return res.status(500).json({ error: 'OPENAI_API_KEY not configured' });
|
||||
}
|
||||
|
||||
const { buffer, contentType } = await downloadObject({ bucket, key });
|
||||
|
||||
// Build multipart form for OpenAI Whisper
|
||||
const fd = new FormData();
|
||||
// Convert Node Buffer -> ArrayBuffer (TS-accurate) for BlobPart
|
||||
const ab = buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength) as ArrayBuffer;
|
||||
const blob = new Blob([ab as unknown as ArrayBuffer], { type: contentType || 'audio/webm' });
|
||||
// OpenAI expects a filename
|
||||
fd.append('file', blob, 'audio.webm');
|
||||
fd.append('model', 'whisper-1');
|
||||
// Optional: language hints, prompt, temperature etc.
|
||||
|
||||
const resp = await fetch('https://api.openai.com/v1/audio/transcriptions', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
Authorization: `Bearer ${process.env.OPENAI_API_KEY}`,
|
||||
},
|
||||
body: fd,
|
||||
});
|
||||
|
||||
if (!resp.ok) {
|
||||
const text = await resp.text();
|
||||
console.error('OpenAI STT error:', resp.status, text);
|
||||
return res.status(500).json({ error: 'STT failed', details: text });
|
||||
}
|
||||
|
||||
const data: any = await resp.json();
|
||||
// OpenAI returns { text: "..." }
|
||||
return res.json({ success: true, transcript: data.text || '' });
|
||||
} catch (err: any) {
|
||||
console.error('STT error:', err);
|
||||
return res.status(500).json({ error: 'STT error' });
|
||||
}
|
||||
});
|
||||
|
||||
export default router;
|
||||
Loading…
Reference in New Issue
Block a user