53 lines
1.4 KiB
JavaScript
53 lines
1.4 KiB
JavaScript
function parseVtt(vtt, options = {}) {
|
|
const { mergeConsecutive = false } = options;
|
|
const lines = vtt.split(/\r?\n/);
|
|
const cues = [];
|
|
let i = 0;
|
|
|
|
while (i < lines.length && !/-->/.test(lines[i])) i++;
|
|
|
|
while (i < lines.length) {
|
|
const timeLine = lines[i];
|
|
const timeMatch = timeLine.match(/(\d\d:\d\d:\d\d\.\d{3})\s+-->\s+(\d\d:\d\d:\d\d\.\d{3})/);
|
|
if (!timeMatch) { i++; continue; }
|
|
const start = timeMatch[1];
|
|
const end = timeMatch[2];
|
|
i++;
|
|
|
|
const textLines = [];
|
|
while (i < lines.length && lines[i].trim() !== '' && !/-->/.test(lines[i])) {
|
|
textLines.push(lines[i]);
|
|
i++;
|
|
}
|
|
const raw = textLines.join(' ').trim();
|
|
if (!raw) continue;
|
|
|
|
const speakerMatch = raw.match(/^<v\s+([^>]+)>([\s\S]*?)<\/v>$/);
|
|
if (speakerMatch) {
|
|
cues.push({ speaker: speakerMatch[1].trim(), text: speakerMatch[2].trim(), start, end });
|
|
} else {
|
|
cues.push({ speaker: null, text: raw, start, end });
|
|
}
|
|
}
|
|
|
|
if (!mergeConsecutive) return cues;
|
|
|
|
const merged = [];
|
|
for (const cue of cues) {
|
|
const last = merged[merged.length - 1];
|
|
if (last && last.speaker === cue.speaker) {
|
|
last.text = `${last.text} ${cue.text}`;
|
|
last.end = cue.end;
|
|
} else {
|
|
merged.push({ ...cue });
|
|
}
|
|
}
|
|
return merged;
|
|
}
|
|
|
|
function formatTranscript(cues) {
|
|
return cues.map(c => `${c.speaker || '[unknown]'}: ${c.text}`).join('\n');
|
|
}
|
|
|
|
module.exports = { parseVtt, formatTranscript };
|