Here’s a simple Python tool that converts subtitles between two popular formats: SRT (SubRip Subtitle) and VTT (WebVTT). It can be extended for other formats as well.
pythonimport re
class SubtitleConverter:
def __init__(self, input_file, output_file, input_format, output_format):
self.input_file = input_file
self.output_file = output_file
self.input_format = input_format.lower()
self.output_format = output_format.lower()
def convert(self):
if self.input_format == 'srt' and self.output_format == 'vtt':
subs = self.read_srt()
self.write_vtt(subs)
elif self.input_format == 'vtt' and self.output_format == 'srt':
subs = self.read_vtt()
self.write_srt(subs)
else:
raise ValueError(f"Conversion from {self.input_format} to {self.output_format} not supported.")
def read_srt(self):
subs = []
with open(self.input_file, 'r', encoding='utf-8') as f:
content = f.read().strip()
# Split by blocks separated by two newlines
blocks = re.split(r'ns*n', content)
for block in blocks:
lines = block.strip().split('n')
if len(lines) >= 3:
index = lines[0].strip()
timecode = lines[1].strip()
text = 'n'.join(lines[2:])
start, end = timecode.split(' --> ')
subs.append({'index': index, 'start': start, 'end': end, 'text': text})
return subs
def write_vtt(self, subs):
with open(self.output_file, 'w', encoding='utf-8') as f:
f.write("WEBVTTnn")
for sub in subs:
# VTT uses . instead of , for milliseconds
start = sub['start'].replace(',', '.')
end = sub['end'].replace(',', '.')
f.write(f"{start} --> {end}n{sub['text']}nn")
def read_vtt(self):
subs = []
with open(self.input_file, 'r', encoding='utf-8') as f:
lines = f.readlines()
index = 1
i = 0
# skip first line (WEBVTT header)
while i < len(lines):
line = lines[i].strip()
if re.match(r'd{2}:d{2}:d{2}.d{3} --> d{2}:d{2}:d{2}.d{3}', line):
timecode = line
start, end = timecode.split(' --> ')
text_lines = []
i += 1
while i < len(lines) and lines[i].strip() != '':
text_lines.append(lines[i].strip())
i += 1
text = 'n'.join(text_lines)
# convert . to , for SRT format
start = start.replace('.', ',')
end = end.replace('.', ',')
subs.append({'index': str(index), 'start': start, 'end': end, 'text': text})
index += 1
i += 1
return subs
def write_srt(self, subs):
with open(self.output_file, 'w', encoding='utf-8') as f:
for sub in subs:
f.write(f"{sub['index']}n")
f.write(f"{sub['start']} --> {sub['end']}n")
f.write(f"{sub['text']}nn")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Convert subtitles between SRT and VTT formats.")
parser.add_argument("input_file", help="Input subtitle file")
parser.add_argument("output_file", help="Output subtitle file")
parser.add_argument("input_format", choices=['srt', 'vtt'], help="Input subtitle format")
parser.add_argument("output_format", choices=['srt', 'vtt'], help="Output subtitle format")
args = parser.parse_args()
converter = SubtitleConverter(args.input_file, args.output_file, args.input_format, args.output_format)
try:
converter.convert()
print(f"Converted {args.input_file} from {args.input_format.upper()} to {args.output_format.upper()} as {args.output_file}")
except Exception as e:
print(f"Error: {e}")
How it works
-
Reads the input file in either SRT or VTT format.
-
Parses timestamps and text blocks.
-
Converts the timestamp format (e.g., commas to dots for VTT).
-
Writes the output file in the desired format.
Usage example
bashpython subtitle_converter.py input.srt output.vtt srt vtt python subtitle_converter.py input.vtt output.srt vtt srt
Let me know if you want me to add support for other formats or features!
Leave a Reply