feat: add multi-clip recording and reordering support in audio recorder
This commit is contained in:
		
							parent
							
								
									cd799a2024
								
							
						
					
					
						commit
						3f2d3f0e8f
					
				
							
								
								
									
										16
									
								
								PLAN.md
									
									
									
									
									
								
							
							
						
						
									
										16
									
								
								PLAN.md
									
									
									
									
									
								
							| @ -42,6 +42,10 @@ Voice-first authoring tool for single-user Ghost blog. Capture audio, refine wit | ||||
|   - [ ] Frontend: Buttons — "Save as Draft" and "Publish" (calls `/api/ghost/post`) | ||||
|   - [ ] Show status toast and link to view post | ||||
|   - [ ] ENV: `GHOST_ADMIN_API_URL`, `GHOST_ADMIN_API_KEY`, `GHOST_PUBLIC_URL` | ||||
|   - [ ] Media handling on publish: | ||||
|     - If `PUBLIC_MEDIA_BASE_URL` is set, copy each referenced media from `S3_BUCKET/<key>` to `PUBLIC_MEDIA_BUCKET/<key>` and rewrite HTML/`feature_image` to `PUBLIC_MEDIA_BASE_URL/<key>`. | ||||
|     - If `PUBLIC_MEDIA_BASE_URL` is not set, fall back to presigned URLs (SigV4, max 7 days) for private buckets. | ||||
|     - Ensure destination bucket/prefix is publicly readable for anonymous GET (prefer prefix-only like `images/*`). | ||||
| - **M7 · Media Management** (Scope: Goal 7) | ||||
|   - [x] Centralize media library view with reuse. | ||||
|   - [ ] Background cleanup/retention policies. | ||||
| @ -63,6 +67,9 @@ Voice-first authoring tool for single-user Ghost blog. Capture audio, refine wit | ||||
| - **Secrets** | ||||
|   - [x] `.env.example` for common keys (ADMIN_PASSWORD_HASH, OPENAI_API_KEY, GHOST_ADMIN_API_KEY, S3 credentials). | ||||
|   - [ ] Instructions for local secret population. | ||||
|   - [ ] Public media env: | ||||
|     - `PUBLIC_MEDIA_BUCKET` — bucket to store publicly-readable media copies (e.g., `public-media`). | ||||
|     - `PUBLIC_MEDIA_BASE_URL` — public HTTP base mapping directly to keys in `PUBLIC_MEDIA_BUCKET`. | ||||
| 
 | ||||
| ## Tooling Decisions | ||||
| - **Dependency manager**: Adopt PNPM with workspace support for mono-repo friendliness and fast installs. | ||||
| @ -93,6 +100,12 @@ Voice-first authoring tool for single-user Ghost blog. Capture audio, refine wit | ||||
| - [ ] Start Admin: `pnpm run dev -C apps/admin` | ||||
| - [ ] Record → Stop → Upload → Transcribe; see transcript populate Draft. | ||||
| - [ ] Save Draft (local) and verify persistence on reload. | ||||
|  - [ ] Public media: | ||||
|    - Set `.env`: `PUBLIC_MEDIA_BUCKET` and `PUBLIC_MEDIA_BASE_URL`. | ||||
|    - Ensure destination bucket/prefix is public (MinIO Console or `mc anonymous set public myminio/<bucket>/images`). | ||||
|    - Create a draft with an image and click Publish (draft or published). | ||||
|    - Check API logs for `[S3] Copy start`/`Copy done` and `[Ghost] Sample replacements` with `PUBLIC_MEDIA_BASE_URL/<key>`. | ||||
|    - `curl -I <rewritten-url>` should return HTTP/200 and `Content-Type: image/*`. | ||||
| 
 | ||||
| ## MinIO Integration Checklist | ||||
| - [ ] Deploy MinIO on VPS (console `:9001`, API `:9000`). | ||||
| @ -104,6 +117,9 @@ Voice-first authoring tool for single-user Ghost blog. Capture audio, refine wit | ||||
|   - `S3_ACCESS_KEY=...` | ||||
|   - `S3_SECRET_KEY=...` | ||||
| - [ ] Optional: Set bucket policy to allow public reads for media. | ||||
|  - [ ] Public media setup (if using a dedicated bucket): | ||||
|    - Create bucket `public-media` (or chosen name) and make `images/*` prefix public (anonymous `s3:GetObject`). | ||||
|    - Set `.env`: `PUBLIC_MEDIA_BUCKET=public-media`, `PUBLIC_MEDIA_BASE_URL=https://<public-host>/public-media` (or path your gateway serves for that bucket). | ||||
| 
 | ||||
| ## Scaffolding Plan (Draft) | ||||
| - **Frontend (`apps/admin`)** | ||||
|  | ||||
| @ -6,14 +6,20 @@ export default function Recorder({ onTranscript }: { onTranscript?: (t: string) | ||||
|   const chunksRef = useRef<Blob[]>([]); | ||||
|   const mimeRef = useRef<string>('audio/webm'); | ||||
|   const [recording, setRecording] = useState(false); | ||||
|   const [audioUrl, setAudioUrl] = useState<string | null>(null); | ||||
|   const [audioBlob, setAudioBlob] = useState<Blob | null>(null); | ||||
|   const [uploadKey, setUploadKey] = useState<string | null>(null); | ||||
|   const [uploadBucket, setUploadBucket] = useState<string | null>(null); | ||||
|   const [transcript, setTranscript] = useState<string>(''); | ||||
|   const [error, setError] = useState<string>(''); | ||||
|   const [isUploading, setIsUploading] = useState(false); | ||||
|   const [isTranscribing, setIsTranscribing] = useState(false); | ||||
|   type Clip = { | ||||
|     id: string; | ||||
|     url: string; | ||||
|     blob: Blob; | ||||
|     mime: string; | ||||
|     uploadedKey?: string; | ||||
|     uploadedBucket?: string | null; | ||||
|     transcript?: string; | ||||
|     isUploading?: boolean; | ||||
|     isTranscribing?: boolean; | ||||
|     error?: string; | ||||
|   }; | ||||
|   const [clips, setClips] = useState<Clip[]>([]); | ||||
| 
 | ||||
|   const requestStream = async (): Promise<MediaStream | null> => { | ||||
|     try { | ||||
| @ -59,12 +65,8 @@ export default function Recorder({ onTranscript }: { onTranscript?: (t: string) | ||||
|     mr.onstop = () => { | ||||
|       const blob = new Blob(chunksRef.current, { type: mimeRef.current }); | ||||
|       const url = URL.createObjectURL(blob); | ||||
|       setAudioUrl((prev) => { | ||||
|         if (prev) URL.revokeObjectURL(prev); | ||||
|         return url; | ||||
|       }); | ||||
|       setAudioBlob(blob); | ||||
|       // stop all tracks to release mic
 | ||||
|       const id = (globalThis.crypto && 'randomUUID' in crypto) ? crypto.randomUUID() : `${Date.now()}_${Math.random().toString(36).slice(2)}`; | ||||
|       setClips((prev) => [...prev, { id, url, blob, mime: mimeRef.current }]); | ||||
|       stream.getTracks().forEach(t => t.stop()); | ||||
|     }; | ||||
| 
 | ||||
| @ -77,51 +79,40 @@ export default function Recorder({ onTranscript }: { onTranscript?: (t: string) | ||||
|     setRecording(false); | ||||
|   }; | ||||
| 
 | ||||
|   const uploadAudio = async () => { | ||||
|   const uploadClip = async (idx: number) => { | ||||
|     const c = clips[idx]; | ||||
|     if (!c) return; | ||||
|     setClips((prev) => prev.map((x, i) => i === idx ? { ...x, isUploading: true, error: '' } : x)); | ||||
|     try { | ||||
|       setError(''); | ||||
|       setUploadKey(null); | ||||
|       setUploadBucket(null); | ||||
|       setTranscript(''); | ||||
|       setIsUploading(true); | ||||
|       if (!audioBlob) { | ||||
|         setError('No audio to upload'); | ||||
|         return; | ||||
|       } | ||||
|       const form = new FormData(); | ||||
|       const ext = mimeRef.current.includes('mp4') ? 'm4a' : 'webm'; | ||||
|       form.append('audio', audioBlob, `recording.${ext}`); | ||||
|       const res = await fetch('/api/media/audio', { | ||||
|         method: 'POST', | ||||
|         body: form, | ||||
|       }); | ||||
|       const ext = c.mime.includes('mp4') ? 'm4a' : 'webm'; | ||||
|       form.append('audio', c.blob, `recording.${ext}`); | ||||
|       const res = await fetch('/api/media/audio', { method: 'POST', body: form }); | ||||
|       if (!res.ok) { | ||||
|         const txt = await res.text(); | ||||
|         throw new Error(`Upload failed: ${res.status} ${txt}`); | ||||
|       } | ||||
|       const data = await res.json(); | ||||
|       setUploadKey(data.key || 'uploaded'); | ||||
|       setUploadBucket(data.bucket || null); | ||||
|       setClips((prev) => prev.map((x, i) => i === idx ? { ...x, uploadedKey: data.key || 'uploaded', uploadedBucket: data.bucket || null } : x)); | ||||
|     } catch (e: any) { | ||||
|       setError(e?.message || 'Upload failed'); | ||||
|       setClips((prev) => prev.map((x, i) => i === idx ? { ...x, error: e?.message || 'Upload failed' } : x)); | ||||
|     } finally { | ||||
|       setIsUploading(false); | ||||
|       setClips((prev) => prev.map((x, i) => i === idx ? { ...x, isUploading: false } : x)); | ||||
|     } | ||||
|   }; | ||||
| 
 | ||||
|   const transcribe = async () => { | ||||
|   const transcribeClip = async (idx: number) => { | ||||
|     const c = clips[idx]; | ||||
|     if (!c) return; | ||||
|     setClips((prev) => prev.map((x, i) => i === idx ? { ...x, isTranscribing: true, error: '' } : x)); | ||||
|     try { | ||||
|       setError(''); | ||||
|       setTranscript(''); | ||||
|       setIsTranscribing(true); | ||||
|       if (!uploadKey) { | ||||
|         setError('Upload audio before transcribing'); | ||||
|         return; | ||||
|       if (!c.uploadedKey) { | ||||
|         throw new Error('Upload before transcribing'); | ||||
|       } | ||||
|       const res = await fetch('/api/stt', { | ||||
|         method: 'POST', | ||||
|         headers: { 'Content-Type': 'application/json' }, | ||||
|         body: JSON.stringify({ bucket: uploadBucket ?? undefined, key: uploadKey }), | ||||
|         body: JSON.stringify({ bucket: c.uploadedBucket ?? undefined, key: c.uploadedKey }), | ||||
|       }); | ||||
|       if (!res.ok) { | ||||
|         const txt = await res.text(); | ||||
| @ -129,52 +120,89 @@ export default function Recorder({ onTranscript }: { onTranscript?: (t: string) | ||||
|       } | ||||
|       const data = await res.json(); | ||||
|       const t: string = data.transcript || ''; | ||||
|       setTranscript(t); | ||||
|       if (onTranscript) onTranscript(t); | ||||
|       setClips((prev) => prev.map((x, i) => i === idx ? { ...x, transcript: t } : x)); | ||||
|     } catch (e: any) { | ||||
|       setError(e?.message || 'Transcription failed'); | ||||
|       setClips((prev) => prev.map((x, i) => i === idx ? { ...x, error: e?.message || 'Transcription failed' } : x)); | ||||
|     } finally { | ||||
|       setIsTranscribing(false); | ||||
|       setClips((prev) => prev.map((x, i) => i === idx ? { ...x, isTranscribing: false } : x)); | ||||
|     } | ||||
|   }; | ||||
| 
 | ||||
|   const moveClip = (from: number, to: number) => { | ||||
|     setClips((prev) => { | ||||
|       if (to < 0 || to >= prev.length) return prev; | ||||
|       const arr = prev.slice(); | ||||
|       const [item] = arr.splice(from, 1); | ||||
|       arr.splice(to, 0, item); | ||||
|       return arr; | ||||
|     }); | ||||
|   }; | ||||
| 
 | ||||
|   const removeClip = (idx: number) => { | ||||
|     setClips((prev) => { | ||||
|       const arr = prev.slice(); | ||||
|       const [item] = arr.splice(idx, 1); | ||||
|       if (item?.url) URL.revokeObjectURL(item.url); | ||||
|       return arr; | ||||
|     }); | ||||
|   }; | ||||
| 
 | ||||
|   const applyTranscriptsToDraft = () => { | ||||
|     const text = clips.map(c => c.transcript || '').filter(Boolean).join('\n\n'); | ||||
|     if (onTranscript) onTranscript(text); | ||||
|   }; | ||||
| 
 | ||||
|   useEffect(() => { | ||||
|     return () => { | ||||
|       if (audioUrl) URL.revokeObjectURL(audioUrl); | ||||
|       clips.forEach(c => c.url && URL.revokeObjectURL(c.url)); | ||||
|     }; | ||||
|   }, [audioUrl]); | ||||
|   }, [clips]); | ||||
| 
 | ||||
|   return ( | ||||
|     <Box> | ||||
|       <Typography variant="h6" sx={{ mb: 1 }}>Audio Recorder</Typography> | ||||
|       <Stack direction="row" spacing={2} sx={{ mb: 2 }}> | ||||
|         <Button variant="contained" disabled={recording || isUploading || isTranscribing} onClick={startRecording}>Start</Button> | ||||
|         <Button variant="outlined" disabled={!recording || isUploading || isTranscribing} onClick={stopRecording}>Stop</Button> | ||||
|         <Button variant="text" disabled={!audioBlob || isUploading || isTranscribing} onClick={uploadAudio}>Upload</Button> | ||||
|         <Button variant="text" disabled={!uploadKey || isUploading || isTranscribing} onClick={transcribe}>Transcribe</Button> | ||||
|       <Stack direction="row" spacing={2} sx={{ mb: 2, flexWrap: 'wrap' }}> | ||||
|         <Button variant="contained" disabled={recording} onClick={startRecording}>Start</Button> | ||||
|         <Button variant="outlined" disabled={!recording} onClick={stopRecording}>Stop</Button> | ||||
|         <Button variant="text" disabled={clips.every(c => !c.transcript)} onClick={applyTranscriptsToDraft}>Apply transcripts to draft</Button> | ||||
|         <Typography variant="body2" sx={{ alignSelf: 'center' }}>{recording ? 'Recording…' : ''}</Typography> | ||||
|       </Stack> | ||||
|       {error && <Typography color="error" sx={{ mb: 2 }}>{error}</Typography>} | ||||
|       {(isUploading || isTranscribing) && ( | ||||
|         <Typography variant="body2" sx={{ mb: 1 }}> | ||||
|           {isUploading ? 'Uploading…' : 'Transcribing…'} | ||||
|         </Typography> | ||||
|       )} | ||||
|       {audioUrl && ( | ||||
|         <Box> | ||||
|           <audio controls src={audioUrl} /> | ||||
|         </Box> | ||||
|       )} | ||||
|       {uploadKey && ( | ||||
|         <Typography variant="body2" sx={{ mt: 1 }}> | ||||
|           Uploaded as key: {uploadKey} | ||||
|         </Typography> | ||||
|       )} | ||||
|       {transcript && ( | ||||
|         <Box sx={{ mt: 2 }}> | ||||
|           <Typography variant="subtitle1">Transcript</Typography> | ||||
|           <Typography variant="body2" sx={{ whiteSpace: 'pre-wrap' }}>{transcript}</Typography> | ||||
|         </Box> | ||||
|       {clips.length === 0 && ( | ||||
|         <Typography variant="body2">No recordings yet.</Typography> | ||||
|       )} | ||||
|       <Stack spacing={2} sx={{ mt: 1 }}> | ||||
|         {clips.map((c, idx) => ( | ||||
|           <Box key={c.id} sx={{ border: '1px solid #ddd', borderRadius: 2, p: 1 }}> | ||||
|             <Stack direction="row" spacing={1} sx={{ justifyContent: 'space-between', alignItems: 'center', mb: 1 }}> | ||||
|               <Typography variant="subtitle2">Clip {idx + 1}</Typography> | ||||
|               <Stack direction="row" spacing={1}> | ||||
|                 <Button size="small" variant="outlined" disabled={idx === 0} onClick={() => moveClip(idx, idx - 1)}>Up</Button> | ||||
|                 <Button size="small" variant="outlined" disabled={idx === clips.length - 1} onClick={() => moveClip(idx, idx + 1)}>Down</Button> | ||||
|                 <Button size="small" variant="outlined" color="error" onClick={() => removeClip(idx)}>Remove</Button> | ||||
|               </Stack> | ||||
|             </Stack> | ||||
|             <audio controls src={c.url} /> | ||||
|             <Stack direction="row" spacing={1} sx={{ mt: 1, flexWrap: 'wrap' }}> | ||||
|               <Button size="small" variant="text" disabled={!!c.isUploading} onClick={() => uploadClip(idx)}> | ||||
|                 {c.isUploading ? 'Uploading…' : (c.uploadedKey ? 'Re-upload' : 'Upload')} | ||||
|               </Button> | ||||
|               <Button size="small" variant="text" disabled={!c.uploadedKey || !!c.isTranscribing} onClick={() => transcribeClip(idx)}> | ||||
|                 {c.isTranscribing ? 'Transcribing…' : (c.transcript ? 'Retranscribe' : 'Transcribe')} | ||||
|               </Button> | ||||
|               {c.uploadedKey && ( | ||||
|                 <Typography variant="caption" sx={{ alignSelf: 'center' }}>key: {c.uploadedKey}</Typography> | ||||
|               )} | ||||
|             </Stack> | ||||
|             {c.error && <Typography color="error" variant="body2" sx={{ mt: 1 }}>{c.error}</Typography>} | ||||
|             {c.transcript && ( | ||||
|               <Box sx={{ mt: 1 }}> | ||||
|                 <Typography variant="body2" sx={{ whiteSpace: 'pre-wrap' }}>{c.transcript}</Typography> | ||||
|               </Box> | ||||
|             )} | ||||
|           </Box> | ||||
|         ))} | ||||
|       </Stack> | ||||
|     </Box> | ||||
|   ); | ||||
| } | ||||
|  | ||||
| @ -1,5 +1,5 @@ | ||||
| { | ||||
|   "id": "31ba935b-4424-4226-9f8b-803d401022a2", | ||||
|   "content": "<pre><code>enasdasdasd</code></pre><p>zdfsdfsadsdfsdfsdf</p><p></p><p></p><p>sdfsdfs</p><img src=\"/api/media/obj?bucket=voxblog&key=images%2F2025-10-24%2F15962af6-52ae-4c16-918d-86b9e6488bfa.png\" alt=\"Vector-2.png\"><p></p><p></p><ul><li><p>df</p></li></ul><p></p><p></p><p></p><p></p><p></p>", | ||||
|   "updatedAt": "2025-10-24T09:28:18.204Z" | ||||
|   "content": "<pre><code>enasdasdasd</code></pre><p>zdfsdfsadsdfsdfsdf</p><p></p><p></p><p><a target=\"_blank\" rel=\"noopener noreferrer nofollow\" href=\"abc\">asdasd</a></p><img src=\"/api/media/obj?bucket=voxblog&key=images%2F2025-10-24%2F15962af6-52ae-4c16-918d-86b9e6488bfa.png\" alt=\"Vector-2.png\"><p></p><p></p><ul><li><p>df</p></li></ul><p></p><p></p><p></p><p></p><p></p>", | ||||
|   "updatedAt": "2025-10-24T12:11:46.031Z" | ||||
| } | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user