feat: add multi-clip recording and reordering support in audio recorder

This commit is contained in:
Ender 2025-10-24 14:15:37 +02:00
parent cd799a2024
commit 3f2d3f0e8f
3 changed files with 118 additions and 74 deletions

16
PLAN.md
View File

@ -42,6 +42,10 @@ Voice-first authoring tool for single-user Ghost blog. Capture audio, refine wit
- [ ] Frontend: Buttons — "Save as Draft" and "Publish" (calls `/api/ghost/post`)
- [ ] Show status toast and link to view post
- [ ] ENV: `GHOST_ADMIN_API_URL`, `GHOST_ADMIN_API_KEY`, `GHOST_PUBLIC_URL`
- [ ] Media handling on publish:
- If `PUBLIC_MEDIA_BASE_URL` is set, copy each referenced media from `S3_BUCKET/<key>` to `PUBLIC_MEDIA_BUCKET/<key>` and rewrite HTML/`feature_image` to `PUBLIC_MEDIA_BASE_URL/<key>`.
- If `PUBLIC_MEDIA_BASE_URL` is not set, fall back to presigned URLs (SigV4, max 7 days) for private buckets.
- Ensure destination bucket/prefix is publicly readable for anonymous GET (prefer prefix-only like `images/*`).
- **M7 · Media Management** (Scope: Goal 7)
- [x] Centralize media library view with reuse.
- [ ] Background cleanup/retention policies.
@ -63,6 +67,9 @@ Voice-first authoring tool for single-user Ghost blog. Capture audio, refine wit
- **Secrets**
- [x] `.env.example` for common keys (ADMIN_PASSWORD_HASH, OPENAI_API_KEY, GHOST_ADMIN_API_KEY, S3 credentials).
- [ ] Instructions for local secret population.
- [ ] Public media env:
- `PUBLIC_MEDIA_BUCKET` — bucket to store publicly-readable media copies (e.g., `public-media`).
- `PUBLIC_MEDIA_BASE_URL` — public HTTP base mapping directly to keys in `PUBLIC_MEDIA_BUCKET`.
## Tooling Decisions
- **Dependency manager**: Adopt PNPM with workspace support for mono-repo friendliness and fast installs.
@ -93,6 +100,12 @@ Voice-first authoring tool for single-user Ghost blog. Capture audio, refine wit
- [ ] Start Admin: `pnpm run dev -C apps/admin`
- [ ] Record → Stop → Upload → Transcribe; see transcript populate Draft.
- [ ] Save Draft (local) and verify persistence on reload.
- [ ] Public media:
- Set `.env`: `PUBLIC_MEDIA_BUCKET` and `PUBLIC_MEDIA_BASE_URL`.
- Ensure destination bucket/prefix is public (MinIO Console or `mc anonymous set public myminio/<bucket>/images`).
- Create a draft with an image and click Publish (draft or published).
- Check API logs for `[S3] Copy start`/`Copy done` and `[Ghost] Sample replacements` with `PUBLIC_MEDIA_BASE_URL/<key>`.
- `curl -I <rewritten-url>` should return HTTP/200 and `Content-Type: image/*`.
## MinIO Integration Checklist
- [ ] Deploy MinIO on VPS (console `:9001`, API `:9000`).
@ -104,6 +117,9 @@ Voice-first authoring tool for single-user Ghost blog. Capture audio, refine wit
- `S3_ACCESS_KEY=...`
- `S3_SECRET_KEY=...`
- [ ] Optional: Set bucket policy to allow public reads for media.
- [ ] Public media setup (if using a dedicated bucket):
- Create bucket `public-media` (or chosen name) and make `images/*` prefix public (anonymous `s3:GetObject`).
- Set `.env`: `PUBLIC_MEDIA_BUCKET=public-media`, `PUBLIC_MEDIA_BASE_URL=https://<public-host>/public-media` (or path your gateway serves for that bucket).
## Scaffolding Plan (Draft)
- **Frontend (`apps/admin`)**

View File

@ -6,14 +6,20 @@ export default function Recorder({ onTranscript }: { onTranscript?: (t: string)
const chunksRef = useRef<Blob[]>([]);
const mimeRef = useRef<string>('audio/webm');
const [recording, setRecording] = useState(false);
const [audioUrl, setAudioUrl] = useState<string | null>(null);
const [audioBlob, setAudioBlob] = useState<Blob | null>(null);
const [uploadKey, setUploadKey] = useState<string | null>(null);
const [uploadBucket, setUploadBucket] = useState<string | null>(null);
const [transcript, setTranscript] = useState<string>('');
const [error, setError] = useState<string>('');
const [isUploading, setIsUploading] = useState(false);
const [isTranscribing, setIsTranscribing] = useState(false);
type Clip = {
id: string;
url: string;
blob: Blob;
mime: string;
uploadedKey?: string;
uploadedBucket?: string | null;
transcript?: string;
isUploading?: boolean;
isTranscribing?: boolean;
error?: string;
};
const [clips, setClips] = useState<Clip[]>([]);
const requestStream = async (): Promise<MediaStream | null> => {
try {
@ -59,12 +65,8 @@ export default function Recorder({ onTranscript }: { onTranscript?: (t: string)
mr.onstop = () => {
const blob = new Blob(chunksRef.current, { type: mimeRef.current });
const url = URL.createObjectURL(blob);
setAudioUrl((prev) => {
if (prev) URL.revokeObjectURL(prev);
return url;
});
setAudioBlob(blob);
// stop all tracks to release mic
const id = (globalThis.crypto && 'randomUUID' in crypto) ? crypto.randomUUID() : `${Date.now()}_${Math.random().toString(36).slice(2)}`;
setClips((prev) => [...prev, { id, url, blob, mime: mimeRef.current }]);
stream.getTracks().forEach(t => t.stop());
};
@ -77,51 +79,40 @@ export default function Recorder({ onTranscript }: { onTranscript?: (t: string)
setRecording(false);
};
const uploadAudio = async () => {
const uploadClip = async (idx: number) => {
const c = clips[idx];
if (!c) return;
setClips((prev) => prev.map((x, i) => i === idx ? { ...x, isUploading: true, error: '' } : x));
try {
setError('');
setUploadKey(null);
setUploadBucket(null);
setTranscript('');
setIsUploading(true);
if (!audioBlob) {
setError('No audio to upload');
return;
}
const form = new FormData();
const ext = mimeRef.current.includes('mp4') ? 'm4a' : 'webm';
form.append('audio', audioBlob, `recording.${ext}`);
const res = await fetch('/api/media/audio', {
method: 'POST',
body: form,
});
const ext = c.mime.includes('mp4') ? 'm4a' : 'webm';
form.append('audio', c.blob, `recording.${ext}`);
const res = await fetch('/api/media/audio', { method: 'POST', body: form });
if (!res.ok) {
const txt = await res.text();
throw new Error(`Upload failed: ${res.status} ${txt}`);
}
const data = await res.json();
setUploadKey(data.key || 'uploaded');
setUploadBucket(data.bucket || null);
setClips((prev) => prev.map((x, i) => i === idx ? { ...x, uploadedKey: data.key || 'uploaded', uploadedBucket: data.bucket || null } : x));
} catch (e: any) {
setError(e?.message || 'Upload failed');
setClips((prev) => prev.map((x, i) => i === idx ? { ...x, error: e?.message || 'Upload failed' } : x));
} finally {
setIsUploading(false);
setClips((prev) => prev.map((x, i) => i === idx ? { ...x, isUploading: false } : x));
}
};
const transcribe = async () => {
const transcribeClip = async (idx: number) => {
const c = clips[idx];
if (!c) return;
setClips((prev) => prev.map((x, i) => i === idx ? { ...x, isTranscribing: true, error: '' } : x));
try {
setError('');
setTranscript('');
setIsTranscribing(true);
if (!uploadKey) {
setError('Upload audio before transcribing');
return;
if (!c.uploadedKey) {
throw new Error('Upload before transcribing');
}
const res = await fetch('/api/stt', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ bucket: uploadBucket ?? undefined, key: uploadKey }),
body: JSON.stringify({ bucket: c.uploadedBucket ?? undefined, key: c.uploadedKey }),
});
if (!res.ok) {
const txt = await res.text();
@ -129,52 +120,89 @@ export default function Recorder({ onTranscript }: { onTranscript?: (t: string)
}
const data = await res.json();
const t: string = data.transcript || '';
setTranscript(t);
if (onTranscript) onTranscript(t);
setClips((prev) => prev.map((x, i) => i === idx ? { ...x, transcript: t } : x));
} catch (e: any) {
setError(e?.message || 'Transcription failed');
setClips((prev) => prev.map((x, i) => i === idx ? { ...x, error: e?.message || 'Transcription failed' } : x));
} finally {
setIsTranscribing(false);
setClips((prev) => prev.map((x, i) => i === idx ? { ...x, isTranscribing: false } : x));
}
};
const moveClip = (from: number, to: number) => {
setClips((prev) => {
if (to < 0 || to >= prev.length) return prev;
const arr = prev.slice();
const [item] = arr.splice(from, 1);
arr.splice(to, 0, item);
return arr;
});
};
const removeClip = (idx: number) => {
setClips((prev) => {
const arr = prev.slice();
const [item] = arr.splice(idx, 1);
if (item?.url) URL.revokeObjectURL(item.url);
return arr;
});
};
const applyTranscriptsToDraft = () => {
const text = clips.map(c => c.transcript || '').filter(Boolean).join('\n\n');
if (onTranscript) onTranscript(text);
};
useEffect(() => {
return () => {
if (audioUrl) URL.revokeObjectURL(audioUrl);
clips.forEach(c => c.url && URL.revokeObjectURL(c.url));
};
}, [audioUrl]);
}, [clips]);
return (
<Box>
<Typography variant="h6" sx={{ mb: 1 }}>Audio Recorder</Typography>
<Stack direction="row" spacing={2} sx={{ mb: 2 }}>
<Button variant="contained" disabled={recording || isUploading || isTranscribing} onClick={startRecording}>Start</Button>
<Button variant="outlined" disabled={!recording || isUploading || isTranscribing} onClick={stopRecording}>Stop</Button>
<Button variant="text" disabled={!audioBlob || isUploading || isTranscribing} onClick={uploadAudio}>Upload</Button>
<Button variant="text" disabled={!uploadKey || isUploading || isTranscribing} onClick={transcribe}>Transcribe</Button>
<Stack direction="row" spacing={2} sx={{ mb: 2, flexWrap: 'wrap' }}>
<Button variant="contained" disabled={recording} onClick={startRecording}>Start</Button>
<Button variant="outlined" disabled={!recording} onClick={stopRecording}>Stop</Button>
<Button variant="text" disabled={clips.every(c => !c.transcript)} onClick={applyTranscriptsToDraft}>Apply transcripts to draft</Button>
<Typography variant="body2" sx={{ alignSelf: 'center' }}>{recording ? 'Recording…' : ''}</Typography>
</Stack>
{error && <Typography color="error" sx={{ mb: 2 }}>{error}</Typography>}
{(isUploading || isTranscribing) && (
<Typography variant="body2" sx={{ mb: 1 }}>
{isUploading ? 'Uploading…' : 'Transcribing…'}
</Typography>
{clips.length === 0 && (
<Typography variant="body2">No recordings yet.</Typography>
)}
{audioUrl && (
<Box>
<audio controls src={audioUrl} />
<Stack spacing={2} sx={{ mt: 1 }}>
{clips.map((c, idx) => (
<Box key={c.id} sx={{ border: '1px solid #ddd', borderRadius: 2, p: 1 }}>
<Stack direction="row" spacing={1} sx={{ justifyContent: 'space-between', alignItems: 'center', mb: 1 }}>
<Typography variant="subtitle2">Clip {idx + 1}</Typography>
<Stack direction="row" spacing={1}>
<Button size="small" variant="outlined" disabled={idx === 0} onClick={() => moveClip(idx, idx - 1)}>Up</Button>
<Button size="small" variant="outlined" disabled={idx === clips.length - 1} onClick={() => moveClip(idx, idx + 1)}>Down</Button>
<Button size="small" variant="outlined" color="error" onClick={() => removeClip(idx)}>Remove</Button>
</Stack>
</Stack>
<audio controls src={c.url} />
<Stack direction="row" spacing={1} sx={{ mt: 1, flexWrap: 'wrap' }}>
<Button size="small" variant="text" disabled={!!c.isUploading} onClick={() => uploadClip(idx)}>
{c.isUploading ? 'Uploading…' : (c.uploadedKey ? 'Re-upload' : 'Upload')}
</Button>
<Button size="small" variant="text" disabled={!c.uploadedKey || !!c.isTranscribing} onClick={() => transcribeClip(idx)}>
{c.isTranscribing ? 'Transcribing…' : (c.transcript ? 'Retranscribe' : 'Transcribe')}
</Button>
{c.uploadedKey && (
<Typography variant="caption" sx={{ alignSelf: 'center' }}>key: {c.uploadedKey}</Typography>
)}
</Stack>
{c.error && <Typography color="error" variant="body2" sx={{ mt: 1 }}>{c.error}</Typography>}
{c.transcript && (
<Box sx={{ mt: 1 }}>
<Typography variant="body2" sx={{ whiteSpace: 'pre-wrap' }}>{c.transcript}</Typography>
</Box>
)}
{uploadKey && (
<Typography variant="body2" sx={{ mt: 1 }}>
Uploaded as key: {uploadKey}
</Typography>
)}
{transcript && (
<Box sx={{ mt: 2 }}>
<Typography variant="subtitle1">Transcript</Typography>
<Typography variant="body2" sx={{ whiteSpace: 'pre-wrap' }}>{transcript}</Typography>
</Box>
)}
))}
</Stack>
</Box>
);
}

View File

@ -1,5 +1,5 @@
{
"id": "31ba935b-4424-4226-9f8b-803d401022a2",
"content": "<pre><code>enasdasdasd</code></pre><p>zdfsdfsadsdfsdfsdf</p><p></p><p></p><p>sdfsdfs</p><img src=\"/api/media/obj?bucket=voxblog&amp;key=images%2F2025-10-24%2F15962af6-52ae-4c16-918d-86b9e6488bfa.png\" alt=\"Vector-2.png\"><p></p><p></p><ul><li><p>df</p></li></ul><p></p><p></p><p></p><p></p><p></p>",
"updatedAt": "2025-10-24T09:28:18.204Z"
"content": "<pre><code>enasdasdasd</code></pre><p>zdfsdfsadsdfsdfsdf</p><p></p><p></p><p><a target=\"_blank\" rel=\"noopener noreferrer nofollow\" href=\"abc\">asdasd</a></p><img src=\"/api/media/obj?bucket=voxblog&amp;key=images%2F2025-10-24%2F15962af6-52ae-4c16-918d-86b9e6488bfa.png\" alt=\"Vector-2.png\"><p></p><p></p><ul><li><p>df</p></li></ul><p></p><p></p><p></p><p></p><p></p>",
"updatedAt": "2025-10-24T12:11:46.031Z"
}