Compare commits
7 Commits
1bade47194
...
54fb0226bf
| Author | SHA1 | Date | |
|---|---|---|---|
| 54fb0226bf | |||
| dde15813be | |||
| d0398408ac | |||
| d07646f2e1 | |||
| 8e76c349eb | |||
| 18dfa5766c | |||
| 1545310945 |
20
PLAN.md
20
PLAN.md
@ -16,16 +16,16 @@ Voice-first authoring tool for single-user Ghost blog. Capture audio, refine wit
|
||||
- [x] Build password gate (frontend form + backend verification).
|
||||
- [x] Connect FE<->BE via Vite proxy and enable CORS.
|
||||
- [x] Load .env in API with explicit path.
|
||||
- [ ] Bootstrap base admin layout with navigation placeholders.
|
||||
- [x] Bootstrap base admin layout with navigation placeholders.
|
||||
- [ ] Document manual test checklist for auth flow.
|
||||
- **M2 · Voice Capture Pipeline** (Scope: Goal 2)
|
||||
- [ ] Add browser audio recorder UI & permissions handling.
|
||||
- [ ] Stream/upload audio blobs to backend endpoint.
|
||||
- [ ] Persist raw audio (S3/local) with metadata.
|
||||
- [x] Add browser audio recorder UI & permissions handling.
|
||||
- [x] Stream/upload audio blobs to backend endpoint.
|
||||
- [x] Persist raw audio (S3/local) with metadata.
|
||||
- **M3 · Speech-to-Text Integration** (Scope: Goal 3)
|
||||
- [ ] Invoke OpenAI STT API server-side.
|
||||
- [ ] Surface transcript in rich editor state with status feedback.
|
||||
- [ ] Log conversion lifecycle for debug.
|
||||
- [x] Invoke OpenAI STT API server-side.
|
||||
- [x] Surface transcript in rich editor state with status feedback.
|
||||
- [x] Log conversion lifecycle for debug.
|
||||
- **M4 · Rich Editor Enhancements** (Scope: Goal 4)
|
||||
- [ ] Integrate block-based editor (e.g., TipTap/Rich text) with custom nodes.
|
||||
- [ ] Implement file/image upload widget wired to storage.
|
||||
@ -44,6 +44,7 @@ Voice-first authoring tool for single-user Ghost blog. Capture audio, refine wit
|
||||
- [ ] Loading/error states across workflows.
|
||||
- [ ] Responsive layout tuning & accessibility audit.
|
||||
- [ ] Smoke test scripts for manual verification.
|
||||
- [x] Recorder playback compatibility (MediaRecorder mime selection, webm/mp4).
|
||||
|
||||
## Environment & Tooling TODOs
|
||||
- **Core tooling**
|
||||
@ -51,7 +52,7 @@ Voice-first authoring tool for single-user Ghost blog. Capture audio, refine wit
|
||||
- [ ] ESLint + Prettier shared config.
|
||||
- [ ] Commit hooks (lint-staged, Husky) optional.
|
||||
- **Secrets**
|
||||
- [ ] `.env.example` for common keys (ADMIN_PASSWORD_HASH, OPENAI_API_KEY, GHOST_ADMIN_API_KEY, S3 credentials).
|
||||
- [x] `.env.example` for common keys (ADMIN_PASSWORD_HASH, OPENAI_API_KEY, GHOST_ADMIN_API_KEY, S3 credentials).
|
||||
- [ ] Instructions for local secret population.
|
||||
|
||||
## Tooling Decisions
|
||||
@ -64,6 +65,7 @@ Voice-first authoring tool for single-user Ghost blog. Capture audio, refine wit
|
||||
- [x] Persist auth state (cookie/localStorage flag after success)
|
||||
- [x] Add simple health route `/api/health` and error handler
|
||||
- [x] Begin audio capture UI (mic permission + basic recorder)
|
||||
- [x] Add concise request logging (morgan) and S3 op logs for visibility
|
||||
|
||||
## Upcoming Next Actions
|
||||
- [x] Backend endpoint for audio upload `/api/media/audio` (accept WebM/PCM) — implemented with MinIO via AWS SDK v3
|
||||
@ -74,7 +76,7 @@ Voice-first authoring tool for single-user Ghost blog. Capture audio, refine wit
|
||||
## Next Priorities
|
||||
- [x] Save transcript into an editor document (draft state) and display in editor.
|
||||
- [x] Add simple document persistence API (filesystem) at `/api/drafts` (list/get/save).
|
||||
- [ ] Wire editor to use `/api/drafts` (load/save) instead of only localStorage.
|
||||
- [x] Wire editor to use `/api/drafts` (load/save) instead of only localStorage.
|
||||
- [ ] List uploaded media items and allow re-use/deletion.
|
||||
|
||||
## Verification Steps
|
||||
|
||||
@ -4,6 +4,7 @@ import { Box, Button, Stack, Typography } from '@mui/material';
|
||||
export default function Recorder({ onTranscript }: { onTranscript?: (t: string) => void }) {
|
||||
const mediaRecorderRef = useRef<MediaRecorder | null>(null);
|
||||
const chunksRef = useRef<Blob[]>([]);
|
||||
const mimeRef = useRef<string>('audio/webm');
|
||||
const [recording, setRecording] = useState(false);
|
||||
const [audioUrl, setAudioUrl] = useState<string | null>(null);
|
||||
const [audioBlob, setAudioBlob] = useState<Blob | null>(null);
|
||||
@ -11,6 +12,8 @@ export default function Recorder({ onTranscript }: { onTranscript?: (t: string)
|
||||
const [uploadBucket, setUploadBucket] = useState<string | null>(null);
|
||||
const [transcript, setTranscript] = useState<string>('');
|
||||
const [error, setError] = useState<string>('');
|
||||
const [isUploading, setIsUploading] = useState(false);
|
||||
const [isTranscribing, setIsTranscribing] = useState(false);
|
||||
|
||||
const requestStream = async (): Promise<MediaStream | null> => {
|
||||
try {
|
||||
@ -27,7 +30,24 @@ export default function Recorder({ onTranscript }: { onTranscript?: (t: string)
|
||||
const stream = await requestStream();
|
||||
if (!stream) return;
|
||||
|
||||
const mr = new MediaRecorder(stream);
|
||||
// Pick a supported mimeType (Safari prefers audio/mp4, Chrome supports audio/webm)
|
||||
const candidates = [
|
||||
'audio/webm;codecs=opus',
|
||||
'audio/webm',
|
||||
'audio/mp4;codecs=opus',
|
||||
'audio/mp4'
|
||||
];
|
||||
let selected: string | undefined;
|
||||
for (const c of candidates) {
|
||||
// @ts-ignore
|
||||
if ((window as any).MediaRecorder && MediaRecorder.isTypeSupported && MediaRecorder.isTypeSupported(c)) {
|
||||
selected = c;
|
||||
break;
|
||||
}
|
||||
}
|
||||
mimeRef.current = selected || 'audio/webm';
|
||||
|
||||
const mr = selected ? new MediaRecorder(stream, { mimeType: selected }) : new MediaRecorder(stream);
|
||||
mediaRecorderRef.current = mr;
|
||||
chunksRef.current = [];
|
||||
|
||||
@ -37,7 +57,7 @@ export default function Recorder({ onTranscript }: { onTranscript?: (t: string)
|
||||
}
|
||||
};
|
||||
mr.onstop = () => {
|
||||
const blob = new Blob(chunksRef.current, { type: 'audio/webm' });
|
||||
const blob = new Blob(chunksRef.current, { type: mimeRef.current });
|
||||
const url = URL.createObjectURL(blob);
|
||||
setAudioUrl((prev) => {
|
||||
if (prev) URL.revokeObjectURL(prev);
|
||||
@ -63,12 +83,14 @@ export default function Recorder({ onTranscript }: { onTranscript?: (t: string)
|
||||
setUploadKey(null);
|
||||
setUploadBucket(null);
|
||||
setTranscript('');
|
||||
setIsUploading(true);
|
||||
if (!audioBlob) {
|
||||
setError('No audio to upload');
|
||||
return;
|
||||
}
|
||||
const form = new FormData();
|
||||
form.append('audio', audioBlob, 'recording.webm');
|
||||
const ext = mimeRef.current.includes('mp4') ? 'm4a' : 'webm';
|
||||
form.append('audio', audioBlob, `recording.${ext}`);
|
||||
const res = await fetch('/api/media/audio', {
|
||||
method: 'POST',
|
||||
body: form,
|
||||
@ -82,6 +104,8 @@ export default function Recorder({ onTranscript }: { onTranscript?: (t: string)
|
||||
setUploadBucket(data.bucket || null);
|
||||
} catch (e: any) {
|
||||
setError(e?.message || 'Upload failed');
|
||||
} finally {
|
||||
setIsUploading(false);
|
||||
}
|
||||
};
|
||||
|
||||
@ -89,6 +113,7 @@ export default function Recorder({ onTranscript }: { onTranscript?: (t: string)
|
||||
try {
|
||||
setError('');
|
||||
setTranscript('');
|
||||
setIsTranscribing(true);
|
||||
if (!uploadKey) {
|
||||
setError('Upload audio before transcribing');
|
||||
return;
|
||||
@ -108,6 +133,8 @@ export default function Recorder({ onTranscript }: { onTranscript?: (t: string)
|
||||
if (onTranscript) onTranscript(t);
|
||||
} catch (e: any) {
|
||||
setError(e?.message || 'Transcription failed');
|
||||
} finally {
|
||||
setIsTranscribing(false);
|
||||
}
|
||||
};
|
||||
|
||||
@ -121,12 +148,17 @@ export default function Recorder({ onTranscript }: { onTranscript?: (t: string)
|
||||
<Box>
|
||||
<Typography variant="h6" sx={{ mb: 1 }}>Audio Recorder</Typography>
|
||||
<Stack direction="row" spacing={2} sx={{ mb: 2 }}>
|
||||
<Button variant="contained" disabled={recording} onClick={startRecording}>Start</Button>
|
||||
<Button variant="outlined" disabled={!recording} onClick={stopRecording}>Stop</Button>
|
||||
<Button variant="text" disabled={!audioBlob} onClick={uploadAudio}>Upload</Button>
|
||||
<Button variant="text" disabled={!uploadKey} onClick={transcribe}>Transcribe</Button>
|
||||
<Button variant="contained" disabled={recording || isUploading || isTranscribing} onClick={startRecording}>Start</Button>
|
||||
<Button variant="outlined" disabled={!recording || isUploading || isTranscribing} onClick={stopRecording}>Stop</Button>
|
||||
<Button variant="text" disabled={!audioBlob || isUploading || isTranscribing} onClick={uploadAudio}>Upload</Button>
|
||||
<Button variant="text" disabled={!uploadKey || isUploading || isTranscribing} onClick={transcribe}>Transcribe</Button>
|
||||
</Stack>
|
||||
{error && <Typography color="error" sx={{ mb: 2 }}>{error}</Typography>}
|
||||
{(isUploading || isTranscribing) && (
|
||||
<Typography variant="body2" sx={{ mb: 1 }}>
|
||||
{isUploading ? 'Uploading…' : 'Transcribing…'}
|
||||
</Typography>
|
||||
)}
|
||||
{audioUrl && (
|
||||
<Box>
|
||||
<audio controls src={audioUrl} />
|
||||
|
||||
@ -70,6 +70,7 @@
|
||||
"devDependencies": {
|
||||
"@types/cors": "^2.8.19",
|
||||
"@types/express": "^5.0.3",
|
||||
"@types/morgan": "^1.9.10",
|
||||
"@types/multer": "^2.0.0",
|
||||
"@types/node": "^24.6.0",
|
||||
"after": "0.8.2",
|
||||
|
||||
@ -3,6 +3,7 @@ import dotenv from 'dotenv';
|
||||
dotenv.config({ path: path.resolve(__dirname, '../../../.env') });
|
||||
import express from 'express';
|
||||
import cors from 'cors';
|
||||
import morgan from 'morgan';
|
||||
import authRouter from './auth';
|
||||
import mediaRouter from './media';
|
||||
import sttRouter from './stt';
|
||||
@ -16,6 +17,7 @@ app.use(cors({
|
||||
origin: 'http://localhost:5173',
|
||||
credentials: true
|
||||
}));
|
||||
app.use(morgan('dev'));
|
||||
app.use(express.json());
|
||||
|
||||
// API routes
|
||||
|
||||
@ -11,6 +11,7 @@ router.post('/audio', upload.single('audio'), async (
|
||||
res: express.Response
|
||||
) => {
|
||||
try {
|
||||
console.log('[API] POST /api/media/audio');
|
||||
const { S3_ENDPOINT, S3_ACCESS_KEY, S3_SECRET_KEY } = process.env;
|
||||
if (!S3_ENDPOINT || !S3_ACCESS_KEY || !S3_SECRET_KEY) {
|
||||
console.error('Upload failed: missing S3 config (S3_ENDPOINT/S3_ACCESS_KEY/S3_SECRET_KEY)');
|
||||
@ -23,6 +24,7 @@ router.post('/audio', upload.single('audio'), async (
|
||||
const mime = req.file.mimetype || 'application/octet-stream';
|
||||
const ext = mime === 'audio/webm' ? 'webm' : mime.split('/')[1] || 'bin';
|
||||
const key = `audio/${new Date().toISOString().slice(0,10)}/${crypto.randomUUID()}.${ext}`;
|
||||
console.log('[API] Uploading file', { mime, size: req.file.size, bucket, key });
|
||||
|
||||
const out = await uploadBuffer({
|
||||
bucket,
|
||||
@ -31,6 +33,7 @@ router.post('/audio', upload.single('audio'), async (
|
||||
contentType: mime,
|
||||
});
|
||||
|
||||
console.log('[API] Upload success', out);
|
||||
return res.status(200).json({ success: true, ...out });
|
||||
} catch (err) {
|
||||
console.error('Upload failed:', err);
|
||||
|
||||
@ -25,6 +25,12 @@ export async function uploadBuffer(params: {
|
||||
contentType?: string;
|
||||
}) {
|
||||
const s3 = getS3Client();
|
||||
console.log('[S3] Upload start', {
|
||||
bucket: params.bucket,
|
||||
key: params.key,
|
||||
bytes: params.body?.length ?? 0,
|
||||
contentType: params.contentType || 'application/octet-stream',
|
||||
});
|
||||
const cmd = new PutObjectCommand({
|
||||
Bucket: params.bucket,
|
||||
Key: params.key,
|
||||
@ -32,11 +38,13 @@ export async function uploadBuffer(params: {
|
||||
ContentType: params.contentType || 'application/octet-stream',
|
||||
});
|
||||
await s3.send(cmd);
|
||||
console.log('[S3] Upload done', { bucket: params.bucket, key: params.key });
|
||||
return { bucket: params.bucket, key: params.key };
|
||||
}
|
||||
|
||||
export async function downloadObject(params: { bucket: string; key: string }): Promise<{ buffer: Buffer; contentType: string }> {
|
||||
const s3 = getS3Client();
|
||||
console.log('[S3] Download start', { bucket: params.bucket, key: params.key });
|
||||
const cmd = new GetObjectCommand({ Bucket: params.bucket, Key: params.key });
|
||||
const res = await s3.send(cmd);
|
||||
const contentType = res.ContentType || 'application/octet-stream';
|
||||
@ -47,5 +55,7 @@ export async function downloadObject(params: { bucket: string; key: string }): P
|
||||
body.on('end', resolve);
|
||||
body.on('error', reject);
|
||||
});
|
||||
return { buffer: Buffer.concat(chunks), contentType };
|
||||
const buffer = Buffer.concat(chunks);
|
||||
console.log('[S3] Download done', { bucket: params.bucket, key: params.key, bytes: buffer.length, contentType });
|
||||
return { buffer, contentType };
|
||||
}
|
||||
|
||||
5
data/drafts/31ba935b-4424-4226-9f8b-803d401022a2.json
Normal file
5
data/drafts/31ba935b-4424-4226-9f8b-803d401022a2.json
Normal file
@ -0,0 +1,5 @@
|
||||
{
|
||||
"id": "31ba935b-4424-4226-9f8b-803d401022a2",
|
||||
"content": "asödknasdkjasdlkasdasdasdasdsd",
|
||||
"updatedAt": "2025-10-24T01:11:46.059Z"
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user