feat: add multi-clip recording and reordering support in audio recorder
This commit is contained in:
parent
cd799a2024
commit
3f2d3f0e8f
16
PLAN.md
16
PLAN.md
@ -42,6 +42,10 @@ Voice-first authoring tool for single-user Ghost blog. Capture audio, refine wit
|
||||
- [ ] Frontend: Buttons — "Save as Draft" and "Publish" (calls `/api/ghost/post`)
|
||||
- [ ] Show status toast and link to view post
|
||||
- [ ] ENV: `GHOST_ADMIN_API_URL`, `GHOST_ADMIN_API_KEY`, `GHOST_PUBLIC_URL`
|
||||
- [ ] Media handling on publish:
|
||||
- If `PUBLIC_MEDIA_BASE_URL` is set, copy each referenced media from `S3_BUCKET/<key>` to `PUBLIC_MEDIA_BUCKET/<key>` and rewrite HTML/`feature_image` to `PUBLIC_MEDIA_BASE_URL/<key>`.
|
||||
- If `PUBLIC_MEDIA_BASE_URL` is not set, fall back to presigned URLs (SigV4, max 7 days) for private buckets.
|
||||
- Ensure destination bucket/prefix is publicly readable for anonymous GET (prefer prefix-only like `images/*`).
|
||||
- **M7 · Media Management** (Scope: Goal 7)
|
||||
- [x] Centralize media library view with reuse.
|
||||
- [ ] Background cleanup/retention policies.
|
||||
@ -63,6 +67,9 @@ Voice-first authoring tool for single-user Ghost blog. Capture audio, refine wit
|
||||
- **Secrets**
|
||||
- [x] `.env.example` for common keys (ADMIN_PASSWORD_HASH, OPENAI_API_KEY, GHOST_ADMIN_API_KEY, S3 credentials).
|
||||
- [ ] Instructions for local secret population.
|
||||
- [ ] Public media env:
|
||||
- `PUBLIC_MEDIA_BUCKET` — bucket to store publicly-readable media copies (e.g., `public-media`).
|
||||
- `PUBLIC_MEDIA_BASE_URL` — public HTTP base mapping directly to keys in `PUBLIC_MEDIA_BUCKET`.
|
||||
|
||||
## Tooling Decisions
|
||||
- **Dependency manager**: Adopt PNPM with workspace support for mono-repo friendliness and fast installs.
|
||||
@ -93,6 +100,12 @@ Voice-first authoring tool for single-user Ghost blog. Capture audio, refine wit
|
||||
- [ ] Start Admin: `pnpm run dev -C apps/admin`
|
||||
- [ ] Record → Stop → Upload → Transcribe; see transcript populate Draft.
|
||||
- [ ] Save Draft (local) and verify persistence on reload.
|
||||
- [ ] Public media:
|
||||
- Set `.env`: `PUBLIC_MEDIA_BUCKET` and `PUBLIC_MEDIA_BASE_URL`.
|
||||
- Ensure destination bucket/prefix is public (MinIO Console or `mc anonymous set public myminio/<bucket>/images`).
|
||||
- Create a draft with an image and click Publish (draft or published).
|
||||
- Check API logs for `[S3] Copy start`/`Copy done` and `[Ghost] Sample replacements` with `PUBLIC_MEDIA_BASE_URL/<key>`.
|
||||
- `curl -I <rewritten-url>` should return HTTP/200 and `Content-Type: image/*`.
|
||||
|
||||
## MinIO Integration Checklist
|
||||
- [ ] Deploy MinIO on VPS (console `:9001`, API `:9000`).
|
||||
@ -104,6 +117,9 @@ Voice-first authoring tool for single-user Ghost blog. Capture audio, refine wit
|
||||
- `S3_ACCESS_KEY=...`
|
||||
- `S3_SECRET_KEY=...`
|
||||
- [ ] Optional: Set bucket policy to allow public reads for media.
|
||||
- [ ] Public media setup (if using a dedicated bucket):
|
||||
- Create bucket `public-media` (or chosen name) and make `images/*` prefix public (anonymous `s3:GetObject`).
|
||||
- Set `.env`: `PUBLIC_MEDIA_BUCKET=public-media`, `PUBLIC_MEDIA_BASE_URL=https://<public-host>/public-media` (or path your gateway serves for that bucket).
|
||||
|
||||
## Scaffolding Plan (Draft)
|
||||
- **Frontend (`apps/admin`)**
|
||||
|
||||
@ -6,14 +6,20 @@ export default function Recorder({ onTranscript }: { onTranscript?: (t: string)
|
||||
const chunksRef = useRef<Blob[]>([]);
|
||||
const mimeRef = useRef<string>('audio/webm');
|
||||
const [recording, setRecording] = useState(false);
|
||||
const [audioUrl, setAudioUrl] = useState<string | null>(null);
|
||||
const [audioBlob, setAudioBlob] = useState<Blob | null>(null);
|
||||
const [uploadKey, setUploadKey] = useState<string | null>(null);
|
||||
const [uploadBucket, setUploadBucket] = useState<string | null>(null);
|
||||
const [transcript, setTranscript] = useState<string>('');
|
||||
const [error, setError] = useState<string>('');
|
||||
const [isUploading, setIsUploading] = useState(false);
|
||||
const [isTranscribing, setIsTranscribing] = useState(false);
|
||||
type Clip = {
|
||||
id: string;
|
||||
url: string;
|
||||
blob: Blob;
|
||||
mime: string;
|
||||
uploadedKey?: string;
|
||||
uploadedBucket?: string | null;
|
||||
transcript?: string;
|
||||
isUploading?: boolean;
|
||||
isTranscribing?: boolean;
|
||||
error?: string;
|
||||
};
|
||||
const [clips, setClips] = useState<Clip[]>([]);
|
||||
|
||||
const requestStream = async (): Promise<MediaStream | null> => {
|
||||
try {
|
||||
@ -59,12 +65,8 @@ export default function Recorder({ onTranscript }: { onTranscript?: (t: string)
|
||||
mr.onstop = () => {
|
||||
const blob = new Blob(chunksRef.current, { type: mimeRef.current });
|
||||
const url = URL.createObjectURL(blob);
|
||||
setAudioUrl((prev) => {
|
||||
if (prev) URL.revokeObjectURL(prev);
|
||||
return url;
|
||||
});
|
||||
setAudioBlob(blob);
|
||||
// stop all tracks to release mic
|
||||
const id = (globalThis.crypto && 'randomUUID' in crypto) ? crypto.randomUUID() : `${Date.now()}_${Math.random().toString(36).slice(2)}`;
|
||||
setClips((prev) => [...prev, { id, url, blob, mime: mimeRef.current }]);
|
||||
stream.getTracks().forEach(t => t.stop());
|
||||
};
|
||||
|
||||
@ -77,51 +79,40 @@ export default function Recorder({ onTranscript }: { onTranscript?: (t: string)
|
||||
setRecording(false);
|
||||
};
|
||||
|
||||
const uploadAudio = async () => {
|
||||
const uploadClip = async (idx: number) => {
|
||||
const c = clips[idx];
|
||||
if (!c) return;
|
||||
setClips((prev) => prev.map((x, i) => i === idx ? { ...x, isUploading: true, error: '' } : x));
|
||||
try {
|
||||
setError('');
|
||||
setUploadKey(null);
|
||||
setUploadBucket(null);
|
||||
setTranscript('');
|
||||
setIsUploading(true);
|
||||
if (!audioBlob) {
|
||||
setError('No audio to upload');
|
||||
return;
|
||||
}
|
||||
const form = new FormData();
|
||||
const ext = mimeRef.current.includes('mp4') ? 'm4a' : 'webm';
|
||||
form.append('audio', audioBlob, `recording.${ext}`);
|
||||
const res = await fetch('/api/media/audio', {
|
||||
method: 'POST',
|
||||
body: form,
|
||||
});
|
||||
const ext = c.mime.includes('mp4') ? 'm4a' : 'webm';
|
||||
form.append('audio', c.blob, `recording.${ext}`);
|
||||
const res = await fetch('/api/media/audio', { method: 'POST', body: form });
|
||||
if (!res.ok) {
|
||||
const txt = await res.text();
|
||||
throw new Error(`Upload failed: ${res.status} ${txt}`);
|
||||
}
|
||||
const data = await res.json();
|
||||
setUploadKey(data.key || 'uploaded');
|
||||
setUploadBucket(data.bucket || null);
|
||||
setClips((prev) => prev.map((x, i) => i === idx ? { ...x, uploadedKey: data.key || 'uploaded', uploadedBucket: data.bucket || null } : x));
|
||||
} catch (e: any) {
|
||||
setError(e?.message || 'Upload failed');
|
||||
setClips((prev) => prev.map((x, i) => i === idx ? { ...x, error: e?.message || 'Upload failed' } : x));
|
||||
} finally {
|
||||
setIsUploading(false);
|
||||
setClips((prev) => prev.map((x, i) => i === idx ? { ...x, isUploading: false } : x));
|
||||
}
|
||||
};
|
||||
|
||||
const transcribe = async () => {
|
||||
const transcribeClip = async (idx: number) => {
|
||||
const c = clips[idx];
|
||||
if (!c) return;
|
||||
setClips((prev) => prev.map((x, i) => i === idx ? { ...x, isTranscribing: true, error: '' } : x));
|
||||
try {
|
||||
setError('');
|
||||
setTranscript('');
|
||||
setIsTranscribing(true);
|
||||
if (!uploadKey) {
|
||||
setError('Upload audio before transcribing');
|
||||
return;
|
||||
if (!c.uploadedKey) {
|
||||
throw new Error('Upload before transcribing');
|
||||
}
|
||||
const res = await fetch('/api/stt', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ bucket: uploadBucket ?? undefined, key: uploadKey }),
|
||||
body: JSON.stringify({ bucket: c.uploadedBucket ?? undefined, key: c.uploadedKey }),
|
||||
});
|
||||
if (!res.ok) {
|
||||
const txt = await res.text();
|
||||
@ -129,52 +120,89 @@ export default function Recorder({ onTranscript }: { onTranscript?: (t: string)
|
||||
}
|
||||
const data = await res.json();
|
||||
const t: string = data.transcript || '';
|
||||
setTranscript(t);
|
||||
if (onTranscript) onTranscript(t);
|
||||
setClips((prev) => prev.map((x, i) => i === idx ? { ...x, transcript: t } : x));
|
||||
} catch (e: any) {
|
||||
setError(e?.message || 'Transcription failed');
|
||||
setClips((prev) => prev.map((x, i) => i === idx ? { ...x, error: e?.message || 'Transcription failed' } : x));
|
||||
} finally {
|
||||
setIsTranscribing(false);
|
||||
setClips((prev) => prev.map((x, i) => i === idx ? { ...x, isTranscribing: false } : x));
|
||||
}
|
||||
};
|
||||
|
||||
const moveClip = (from: number, to: number) => {
|
||||
setClips((prev) => {
|
||||
if (to < 0 || to >= prev.length) return prev;
|
||||
const arr = prev.slice();
|
||||
const [item] = arr.splice(from, 1);
|
||||
arr.splice(to, 0, item);
|
||||
return arr;
|
||||
});
|
||||
};
|
||||
|
||||
const removeClip = (idx: number) => {
|
||||
setClips((prev) => {
|
||||
const arr = prev.slice();
|
||||
const [item] = arr.splice(idx, 1);
|
||||
if (item?.url) URL.revokeObjectURL(item.url);
|
||||
return arr;
|
||||
});
|
||||
};
|
||||
|
||||
const applyTranscriptsToDraft = () => {
|
||||
const text = clips.map(c => c.transcript || '').filter(Boolean).join('\n\n');
|
||||
if (onTranscript) onTranscript(text);
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
if (audioUrl) URL.revokeObjectURL(audioUrl);
|
||||
clips.forEach(c => c.url && URL.revokeObjectURL(c.url));
|
||||
};
|
||||
}, [audioUrl]);
|
||||
}, [clips]);
|
||||
|
||||
return (
|
||||
<Box>
|
||||
<Typography variant="h6" sx={{ mb: 1 }}>Audio Recorder</Typography>
|
||||
<Stack direction="row" spacing={2} sx={{ mb: 2 }}>
|
||||
<Button variant="contained" disabled={recording || isUploading || isTranscribing} onClick={startRecording}>Start</Button>
|
||||
<Button variant="outlined" disabled={!recording || isUploading || isTranscribing} onClick={stopRecording}>Stop</Button>
|
||||
<Button variant="text" disabled={!audioBlob || isUploading || isTranscribing} onClick={uploadAudio}>Upload</Button>
|
||||
<Button variant="text" disabled={!uploadKey || isUploading || isTranscribing} onClick={transcribe}>Transcribe</Button>
|
||||
<Stack direction="row" spacing={2} sx={{ mb: 2, flexWrap: 'wrap' }}>
|
||||
<Button variant="contained" disabled={recording} onClick={startRecording}>Start</Button>
|
||||
<Button variant="outlined" disabled={!recording} onClick={stopRecording}>Stop</Button>
|
||||
<Button variant="text" disabled={clips.every(c => !c.transcript)} onClick={applyTranscriptsToDraft}>Apply transcripts to draft</Button>
|
||||
<Typography variant="body2" sx={{ alignSelf: 'center' }}>{recording ? 'Recording…' : ''}</Typography>
|
||||
</Stack>
|
||||
{error && <Typography color="error" sx={{ mb: 2 }}>{error}</Typography>}
|
||||
{(isUploading || isTranscribing) && (
|
||||
<Typography variant="body2" sx={{ mb: 1 }}>
|
||||
{isUploading ? 'Uploading…' : 'Transcribing…'}
|
||||
</Typography>
|
||||
{clips.length === 0 && (
|
||||
<Typography variant="body2">No recordings yet.</Typography>
|
||||
)}
|
||||
{audioUrl && (
|
||||
<Box>
|
||||
<audio controls src={audioUrl} />
|
||||
<Stack spacing={2} sx={{ mt: 1 }}>
|
||||
{clips.map((c, idx) => (
|
||||
<Box key={c.id} sx={{ border: '1px solid #ddd', borderRadius: 2, p: 1 }}>
|
||||
<Stack direction="row" spacing={1} sx={{ justifyContent: 'space-between', alignItems: 'center', mb: 1 }}>
|
||||
<Typography variant="subtitle2">Clip {idx + 1}</Typography>
|
||||
<Stack direction="row" spacing={1}>
|
||||
<Button size="small" variant="outlined" disabled={idx === 0} onClick={() => moveClip(idx, idx - 1)}>Up</Button>
|
||||
<Button size="small" variant="outlined" disabled={idx === clips.length - 1} onClick={() => moveClip(idx, idx + 1)}>Down</Button>
|
||||
<Button size="small" variant="outlined" color="error" onClick={() => removeClip(idx)}>Remove</Button>
|
||||
</Stack>
|
||||
</Stack>
|
||||
<audio controls src={c.url} />
|
||||
<Stack direction="row" spacing={1} sx={{ mt: 1, flexWrap: 'wrap' }}>
|
||||
<Button size="small" variant="text" disabled={!!c.isUploading} onClick={() => uploadClip(idx)}>
|
||||
{c.isUploading ? 'Uploading…' : (c.uploadedKey ? 'Re-upload' : 'Upload')}
|
||||
</Button>
|
||||
<Button size="small" variant="text" disabled={!c.uploadedKey || !!c.isTranscribing} onClick={() => transcribeClip(idx)}>
|
||||
{c.isTranscribing ? 'Transcribing…' : (c.transcript ? 'Retranscribe' : 'Transcribe')}
|
||||
</Button>
|
||||
{c.uploadedKey && (
|
||||
<Typography variant="caption" sx={{ alignSelf: 'center' }}>key: {c.uploadedKey}</Typography>
|
||||
)}
|
||||
</Stack>
|
||||
{c.error && <Typography color="error" variant="body2" sx={{ mt: 1 }}>{c.error}</Typography>}
|
||||
{c.transcript && (
|
||||
<Box sx={{ mt: 1 }}>
|
||||
<Typography variant="body2" sx={{ whiteSpace: 'pre-wrap' }}>{c.transcript}</Typography>
|
||||
</Box>
|
||||
)}
|
||||
{uploadKey && (
|
||||
<Typography variant="body2" sx={{ mt: 1 }}>
|
||||
Uploaded as key: {uploadKey}
|
||||
</Typography>
|
||||
)}
|
||||
{transcript && (
|
||||
<Box sx={{ mt: 2 }}>
|
||||
<Typography variant="subtitle1">Transcript</Typography>
|
||||
<Typography variant="body2" sx={{ whiteSpace: 'pre-wrap' }}>{transcript}</Typography>
|
||||
</Box>
|
||||
)}
|
||||
))}
|
||||
</Stack>
|
||||
</Box>
|
||||
);
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
{
|
||||
"id": "31ba935b-4424-4226-9f8b-803d401022a2",
|
||||
"content": "<pre><code>enasdasdasd</code></pre><p>zdfsdfsadsdfsdfsdf</p><p></p><p></p><p>sdfsdfs</p><img src=\"/api/media/obj?bucket=voxblog&key=images%2F2025-10-24%2F15962af6-52ae-4c16-918d-86b9e6488bfa.png\" alt=\"Vector-2.png\"><p></p><p></p><ul><li><p>df</p></li></ul><p></p><p></p><p></p><p></p><p></p>",
|
||||
"updatedAt": "2025-10-24T09:28:18.204Z"
|
||||
"content": "<pre><code>enasdasdasd</code></pre><p>zdfsdfsadsdfsdfsdf</p><p></p><p></p><p><a target=\"_blank\" rel=\"noopener noreferrer nofollow\" href=\"abc\">asdasd</a></p><img src=\"/api/media/obj?bucket=voxblog&key=images%2F2025-10-24%2F15962af6-52ae-4c16-918d-86b9e6488bfa.png\" alt=\"Vector-2.png\"><p></p><p></p><ul><li><p>df</p></li></ul><p></p><p></p><p></p><p></p><p></p>",
|
||||
"updatedAt": "2025-10-24T12:11:46.031Z"
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user