Compare commits

...

7 Commits

7 changed files with 72 additions and 17 deletions

20
PLAN.md
View File

@ -16,16 +16,16 @@ Voice-first authoring tool for single-user Ghost blog. Capture audio, refine wit
- [x] Build password gate (frontend form + backend verification).
- [x] Connect FE<->BE via Vite proxy and enable CORS.
- [x] Load .env in API with explicit path.
- [ ] Bootstrap base admin layout with navigation placeholders.
- [x] Bootstrap base admin layout with navigation placeholders.
- [ ] Document manual test checklist for auth flow.
- **M2 · Voice Capture Pipeline** (Scope: Goal 2)
- [ ] Add browser audio recorder UI & permissions handling.
- [ ] Stream/upload audio blobs to backend endpoint.
- [ ] Persist raw audio (S3/local) with metadata.
- [x] Add browser audio recorder UI & permissions handling.
- [x] Stream/upload audio blobs to backend endpoint.
- [x] Persist raw audio (S3/local) with metadata.
- **M3 · Speech-to-Text Integration** (Scope: Goal 3)
- [ ] Invoke OpenAI STT API server-side.
- [ ] Surface transcript in rich editor state with status feedback.
- [ ] Log conversion lifecycle for debug.
- [x] Invoke OpenAI STT API server-side.
- [x] Surface transcript in rich editor state with status feedback.
- [x] Log conversion lifecycle for debug.
- **M4 · Rich Editor Enhancements** (Scope: Goal 4)
- [ ] Integrate block-based editor (e.g., TipTap/Rich text) with custom nodes.
- [ ] Implement file/image upload widget wired to storage.
@ -44,6 +44,7 @@ Voice-first authoring tool for single-user Ghost blog. Capture audio, refine wit
- [ ] Loading/error states across workflows.
- [ ] Responsive layout tuning & accessibility audit.
- [ ] Smoke test scripts for manual verification.
- [x] Recorder playback compatibility (MediaRecorder mime selection, webm/mp4).
## Environment & Tooling TODOs
- **Core tooling**
@ -51,7 +52,7 @@ Voice-first authoring tool for single-user Ghost blog. Capture audio, refine wit
- [ ] ESLint + Prettier shared config.
- [ ] Commit hooks (lint-staged, Husky) optional.
- **Secrets**
- [ ] `.env.example` for common keys (ADMIN_PASSWORD_HASH, OPENAI_API_KEY, GHOST_ADMIN_API_KEY, S3 credentials).
- [x] `.env.example` for common keys (ADMIN_PASSWORD_HASH, OPENAI_API_KEY, GHOST_ADMIN_API_KEY, S3 credentials).
- [ ] Instructions for local secret population.
## Tooling Decisions
@ -64,6 +65,7 @@ Voice-first authoring tool for single-user Ghost blog. Capture audio, refine wit
- [x] Persist auth state (cookie/localStorage flag after success)
- [x] Add simple health route `/api/health` and error handler
- [x] Begin audio capture UI (mic permission + basic recorder)
- [x] Add concise request logging (morgan) and S3 op logs for visibility
## Upcoming Next Actions
- [x] Backend endpoint for audio upload `/api/media/audio` (accept WebM/PCM) — implemented with MinIO via AWS SDK v3
@ -74,7 +76,7 @@ Voice-first authoring tool for single-user Ghost blog. Capture audio, refine wit
## Next Priorities
- [x] Save transcript into an editor document (draft state) and display in editor.
- [x] Add simple document persistence API (filesystem) at `/api/drafts` (list/get/save).
- [ ] Wire editor to use `/api/drafts` (load/save) instead of only localStorage.
- [x] Wire editor to use `/api/drafts` (load/save) instead of only localStorage.
- [ ] List uploaded media items and allow re-use/deletion.
## Verification Steps

View File

@ -4,6 +4,7 @@ import { Box, Button, Stack, Typography } from '@mui/material';
export default function Recorder({ onTranscript }: { onTranscript?: (t: string) => void }) {
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
const chunksRef = useRef<Blob[]>([]);
const mimeRef = useRef<string>('audio/webm');
const [recording, setRecording] = useState(false);
const [audioUrl, setAudioUrl] = useState<string | null>(null);
const [audioBlob, setAudioBlob] = useState<Blob | null>(null);
@ -11,6 +12,8 @@ export default function Recorder({ onTranscript }: { onTranscript?: (t: string)
const [uploadBucket, setUploadBucket] = useState<string | null>(null);
const [transcript, setTranscript] = useState<string>('');
const [error, setError] = useState<string>('');
const [isUploading, setIsUploading] = useState(false);
const [isTranscribing, setIsTranscribing] = useState(false);
const requestStream = async (): Promise<MediaStream | null> => {
try {
@ -27,7 +30,24 @@ export default function Recorder({ onTranscript }: { onTranscript?: (t: string)
const stream = await requestStream();
if (!stream) return;
const mr = new MediaRecorder(stream);
// Pick a supported mimeType (Safari prefers audio/mp4, Chrome supports audio/webm)
const candidates = [
'audio/webm;codecs=opus',
'audio/webm',
'audio/mp4;codecs=opus',
'audio/mp4'
];
let selected: string | undefined;
for (const c of candidates) {
// @ts-ignore
if ((window as any).MediaRecorder && MediaRecorder.isTypeSupported && MediaRecorder.isTypeSupported(c)) {
selected = c;
break;
}
}
mimeRef.current = selected || 'audio/webm';
const mr = selected ? new MediaRecorder(stream, { mimeType: selected }) : new MediaRecorder(stream);
mediaRecorderRef.current = mr;
chunksRef.current = [];
@ -37,7 +57,7 @@ export default function Recorder({ onTranscript }: { onTranscript?: (t: string)
}
};
mr.onstop = () => {
const blob = new Blob(chunksRef.current, { type: 'audio/webm' });
const blob = new Blob(chunksRef.current, { type: mimeRef.current });
const url = URL.createObjectURL(blob);
setAudioUrl((prev) => {
if (prev) URL.revokeObjectURL(prev);
@ -63,12 +83,14 @@ export default function Recorder({ onTranscript }: { onTranscript?: (t: string)
setUploadKey(null);
setUploadBucket(null);
setTranscript('');
setIsUploading(true);
if (!audioBlob) {
setError('No audio to upload');
return;
}
const form = new FormData();
form.append('audio', audioBlob, 'recording.webm');
const ext = mimeRef.current.includes('mp4') ? 'm4a' : 'webm';
form.append('audio', audioBlob, `recording.${ext}`);
const res = await fetch('/api/media/audio', {
method: 'POST',
body: form,
@ -82,6 +104,8 @@ export default function Recorder({ onTranscript }: { onTranscript?: (t: string)
setUploadBucket(data.bucket || null);
} catch (e: any) {
setError(e?.message || 'Upload failed');
} finally {
setIsUploading(false);
}
};
@ -89,6 +113,7 @@ export default function Recorder({ onTranscript }: { onTranscript?: (t: string)
try {
setError('');
setTranscript('');
setIsTranscribing(true);
if (!uploadKey) {
setError('Upload audio before transcribing');
return;
@ -108,6 +133,8 @@ export default function Recorder({ onTranscript }: { onTranscript?: (t: string)
if (onTranscript) onTranscript(t);
} catch (e: any) {
setError(e?.message || 'Transcription failed');
} finally {
setIsTranscribing(false);
}
};
@ -121,12 +148,17 @@ export default function Recorder({ onTranscript }: { onTranscript?: (t: string)
<Box>
<Typography variant="h6" sx={{ mb: 1 }}>Audio Recorder</Typography>
<Stack direction="row" spacing={2} sx={{ mb: 2 }}>
<Button variant="contained" disabled={recording} onClick={startRecording}>Start</Button>
<Button variant="outlined" disabled={!recording} onClick={stopRecording}>Stop</Button>
<Button variant="text" disabled={!audioBlob} onClick={uploadAudio}>Upload</Button>
<Button variant="text" disabled={!uploadKey} onClick={transcribe}>Transcribe</Button>
<Button variant="contained" disabled={recording || isUploading || isTranscribing} onClick={startRecording}>Start</Button>
<Button variant="outlined" disabled={!recording || isUploading || isTranscribing} onClick={stopRecording}>Stop</Button>
<Button variant="text" disabled={!audioBlob || isUploading || isTranscribing} onClick={uploadAudio}>Upload</Button>
<Button variant="text" disabled={!uploadKey || isUploading || isTranscribing} onClick={transcribe}>Transcribe</Button>
</Stack>
{error && <Typography color="error" sx={{ mb: 2 }}>{error}</Typography>}
{(isUploading || isTranscribing) && (
<Typography variant="body2" sx={{ mb: 1 }}>
{isUploading ? 'Uploading…' : 'Transcribing…'}
</Typography>
)}
{audioUrl && (
<Box>
<audio controls src={audioUrl} />

View File

@ -70,6 +70,7 @@
"devDependencies": {
"@types/cors": "^2.8.19",
"@types/express": "^5.0.3",
"@types/morgan": "^1.9.10",
"@types/multer": "^2.0.0",
"@types/node": "^24.6.0",
"after": "0.8.2",

View File

@ -3,6 +3,7 @@ import dotenv from 'dotenv';
dotenv.config({ path: path.resolve(__dirname, '../../../.env') });
import express from 'express';
import cors from 'cors';
import morgan from 'morgan';
import authRouter from './auth';
import mediaRouter from './media';
import sttRouter from './stt';
@ -16,6 +17,7 @@ app.use(cors({
origin: 'http://localhost:5173',
credentials: true
}));
app.use(morgan('dev'));
app.use(express.json());
// API routes

View File

@ -11,6 +11,7 @@ router.post('/audio', upload.single('audio'), async (
res: express.Response
) => {
try {
console.log('[API] POST /api/media/audio');
const { S3_ENDPOINT, S3_ACCESS_KEY, S3_SECRET_KEY } = process.env;
if (!S3_ENDPOINT || !S3_ACCESS_KEY || !S3_SECRET_KEY) {
console.error('Upload failed: missing S3 config (S3_ENDPOINT/S3_ACCESS_KEY/S3_SECRET_KEY)');
@ -23,6 +24,7 @@ router.post('/audio', upload.single('audio'), async (
const mime = req.file.mimetype || 'application/octet-stream';
const ext = mime === 'audio/webm' ? 'webm' : mime.split('/')[1] || 'bin';
const key = `audio/${new Date().toISOString().slice(0,10)}/${crypto.randomUUID()}.${ext}`;
console.log('[API] Uploading file', { mime, size: req.file.size, bucket, key });
const out = await uploadBuffer({
bucket,
@ -31,6 +33,7 @@ router.post('/audio', upload.single('audio'), async (
contentType: mime,
});
console.log('[API] Upload success', out);
return res.status(200).json({ success: true, ...out });
} catch (err) {
console.error('Upload failed:', err);

View File

@ -25,6 +25,12 @@ export async function uploadBuffer(params: {
contentType?: string;
}) {
const s3 = getS3Client();
console.log('[S3] Upload start', {
bucket: params.bucket,
key: params.key,
bytes: params.body?.length ?? 0,
contentType: params.contentType || 'application/octet-stream',
});
const cmd = new PutObjectCommand({
Bucket: params.bucket,
Key: params.key,
@ -32,11 +38,13 @@ export async function uploadBuffer(params: {
ContentType: params.contentType || 'application/octet-stream',
});
await s3.send(cmd);
console.log('[S3] Upload done', { bucket: params.bucket, key: params.key });
return { bucket: params.bucket, key: params.key };
}
export async function downloadObject(params: { bucket: string; key: string }): Promise<{ buffer: Buffer; contentType: string }> {
const s3 = getS3Client();
console.log('[S3] Download start', { bucket: params.bucket, key: params.key });
const cmd = new GetObjectCommand({ Bucket: params.bucket, Key: params.key });
const res = await s3.send(cmd);
const contentType = res.ContentType || 'application/octet-stream';
@ -47,5 +55,7 @@ export async function downloadObject(params: { bucket: string; key: string }): P
body.on('end', resolve);
body.on('error', reject);
});
return { buffer: Buffer.concat(chunks), contentType };
const buffer = Buffer.concat(chunks);
console.log('[S3] Download done', { bucket: params.bucket, key: params.key, bytes: buffer.length, contentType });
return { buffer, contentType };
}

View File

@ -0,0 +1,5 @@
{
"id": "31ba935b-4424-4226-9f8b-803d401022a2",
"content": "asödknasdkjasdlkasdasdasdasdsd",
"updatedAt": "2025-10-24T01:11:46.059Z"
}