Categories We Write About

Write a tool to convert subtitles between formats

Here’s a simple Python tool that converts subtitles between two popular formats: SRT (SubRip Subtitle) and VTT (WebVTT). It can be extended for other formats as well.

python
import re class SubtitleConverter: def __init__(self, input_file, output_file, input_format, output_format): self.input_file = input_file self.output_file = output_file self.input_format = input_format.lower() self.output_format = output_format.lower() def convert(self): if self.input_format == 'srt' and self.output_format == 'vtt': subs = self.read_srt() self.write_vtt(subs) elif self.input_format == 'vtt' and self.output_format == 'srt': subs = self.read_vtt() self.write_srt(subs) else: raise ValueError(f"Conversion from {self.input_format} to {self.output_format} not supported.") def read_srt(self): subs = [] with open(self.input_file, 'r', encoding='utf-8') as f: content = f.read().strip() # Split by blocks separated by two newlines blocks = re.split(r'ns*n', content) for block in blocks: lines = block.strip().split('n') if len(lines) >= 3: index = lines[0].strip() timecode = lines[1].strip() text = 'n'.join(lines[2:]) start, end = timecode.split(' --> ') subs.append({'index': index, 'start': start, 'end': end, 'text': text}) return subs def write_vtt(self, subs): with open(self.output_file, 'w', encoding='utf-8') as f: f.write("WEBVTTnn") for sub in subs: # VTT uses . instead of , for milliseconds start = sub['start'].replace(',', '.') end = sub['end'].replace(',', '.') f.write(f"{start} --> {end}n{sub['text']}nn") def read_vtt(self): subs = [] with open(self.input_file, 'r', encoding='utf-8') as f: lines = f.readlines() index = 1 i = 0 # skip first line (WEBVTT header) while i < len(lines): line = lines[i].strip() if re.match(r'd{2}:d{2}:d{2}.d{3} --> d{2}:d{2}:d{2}.d{3}', line): timecode = line start, end = timecode.split(' --> ') text_lines = [] i += 1 while i < len(lines) and lines[i].strip() != '': text_lines.append(lines[i].strip()) i += 1 text = 'n'.join(text_lines) # convert . to , for SRT format start = start.replace('.', ',') end = end.replace('.', ',') subs.append({'index': str(index), 'start': start, 'end': end, 'text': text}) index += 1 i += 1 return subs def write_srt(self, subs): with open(self.output_file, 'w', encoding='utf-8') as f: for sub in subs: f.write(f"{sub['index']}n") f.write(f"{sub['start']} --> {sub['end']}n") f.write(f"{sub['text']}nn") if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description="Convert subtitles between SRT and VTT formats.") parser.add_argument("input_file", help="Input subtitle file") parser.add_argument("output_file", help="Output subtitle file") parser.add_argument("input_format", choices=['srt', 'vtt'], help="Input subtitle format") parser.add_argument("output_format", choices=['srt', 'vtt'], help="Output subtitle format") args = parser.parse_args() converter = SubtitleConverter(args.input_file, args.output_file, args.input_format, args.output_format) try: converter.convert() print(f"Converted {args.input_file} from {args.input_format.upper()} to {args.output_format.upper()} as {args.output_file}") except Exception as e: print(f"Error: {e}")

How it works

  • Reads the input file in either SRT or VTT format.

  • Parses timestamps and text blocks.

  • Converts the timestamp format (e.g., commas to dots for VTT).

  • Writes the output file in the desired format.

Usage example

bash
python subtitle_converter.py input.srt output.vtt srt vtt python subtitle_converter.py input.vtt output.srt vtt srt

Let me know if you want me to add support for other formats or features!

Share This Page:

Enter your email below to join The Palos Publishing Company Email List

We respect your email privacy

Comments

Leave a Reply

Your email address will not be published. Required fields are marked *

Categories We Write About