diff --git a/scripts/lib/vtt-parser.js b/scripts/lib/vtt-parser.js new file mode 100644 index 0000000..be7e7bd --- /dev/null +++ b/scripts/lib/vtt-parser.js @@ -0,0 +1,52 @@ +function parseVtt(vtt, options = {}) { + const { mergeConsecutive = false } = options; + const lines = vtt.split(/\r?\n/); + const cues = []; + let i = 0; + + while (i < lines.length && !/-->/.test(lines[i])) i++; + + while (i < lines.length) { + const timeLine = lines[i]; + const timeMatch = timeLine.match(/(\d\d:\d\d:\d\d\.\d{3})\s+-->\s+(\d\d:\d\d:\d\d\.\d{3})/); + if (!timeMatch) { i++; continue; } + const start = timeMatch[1]; + const end = timeMatch[2]; + i++; + + const textLines = []; + while (i < lines.length && lines[i].trim() !== '' && !/-->/.test(lines[i])) { + textLines.push(lines[i]); + i++; + } + const raw = textLines.join(' ').trim(); + if (!raw) continue; + + const speakerMatch = raw.match(/^]+)>([\s\S]*?)<\/v>$/); + if (speakerMatch) { + cues.push({ speaker: speakerMatch[1].trim(), text: speakerMatch[2].trim(), start, end }); + } else { + cues.push({ speaker: null, text: raw, start, end }); + } + } + + if (!mergeConsecutive) return cues; + + const merged = []; + for (const cue of cues) { + const last = merged[merged.length - 1]; + if (last && last.speaker === cue.speaker) { + last.text = `${last.text} ${cue.text}`; + last.end = cue.end; + } else { + merged.push({ ...cue }); + } + } + return merged; +} + +function formatTranscript(cues) { + return cues.map(c => `${c.speaker || '[unknown]'}: ${c.text}`).join('\n'); +} + +module.exports = { parseVtt, formatTranscript }; diff --git a/scripts/test/vtt-parser.test.js b/scripts/test/vtt-parser.test.js new file mode 100644 index 0000000..616ce72 --- /dev/null +++ b/scripts/test/vtt-parser.test.js @@ -0,0 +1,64 @@ +const { describe, it } = require('node:test'); +const assert = require('node:assert/strict'); +const { parseVtt } = require('../lib/vtt-parser.js'); + +describe('parseVtt', () => { + it('parses single cue with speaker tag', () => { + const vtt = `WEBVTT + +00:00:01.000 --> 00:00:04.000 +Hallo zusammen. +`; + const result = parseVtt(vtt); + assert.deepEqual(result, [ + { speaker: 'Christian Kauer', text: 'Hallo zusammen.', start: '00:00:01.000', end: '00:00:04.000' } + ]); + }); + + it('merges consecutive cues from same speaker', () => { + const vtt = `WEBVTT + +00:00:01.000 --> 00:00:04.000 +Wir können das prüfen. + +00:00:04.500 --> 00:00:07.000 +Lass uns Borgstedt fragen. +`; + const result = parseVtt(vtt, { mergeConsecutive: true }); + assert.equal(result.length, 1); + assert.equal(result[0].speaker, 'Frank Herberg'); + assert.equal(result[0].text, 'Wir können das prüfen. Lass uns Borgstedt fragen.'); + }); + + it('handles cues without speaker tag', () => { + const vtt = `WEBVTT + +00:00:01.000 --> 00:00:02.000 +[Hintergrundgeräusch] +`; + const result = parseVtt(vtt); + assert.equal(result[0].speaker, null); + assert.equal(result[0].text, '[Hintergrundgeräusch]'); + }); + + it('returns empty array for empty vtt', () => { + assert.deepEqual(parseVtt('WEBVTT\n\n'), []); + }); +}); + +describe('formatTranscript', () => { + const { formatTranscript } = require('../lib/vtt-parser.js'); + + it('formats cues as speaker: text lines', () => { + const cues = [ + { speaker: 'Christian Kauer', text: 'Hallo.' }, + { speaker: 'Frank Herberg', text: 'Hi.' } + ]; + assert.equal(formatTranscript(cues), 'Christian Kauer: Hallo.\nFrank Herberg: Hi.'); + }); + + it('uses [unknown] for null speaker', () => { + const cues = [{ speaker: null, text: '[Lärm]' }]; + assert.equal(formatTranscript(cues), '[unknown]: [Lärm]'); + }); +});