1
0
Fork 0
mirror of https://github.com/MatomoCamp/recording-subtitles.git synced 2024-09-19 16:03:52 +02:00
recording-subtitles/simple_fixes.py

41 lines
1 KiB
Python
Raw Normal View History

2022-10-21 15:36:19 +02:00
"""
fix a few common mistakes
based on
https://stackoverflow.com/a/15448887
"""
import re
from pathlib import Path
current_dir = Path(".")
replacements = {
"Mitoma": "Matomo",
2022-10-26 18:25:48 +02:00
"Matobo": "Matomo",
2022-10-21 15:36:19 +02:00
"Matoma": "Matomo",
"matoma": "matomo",
2022-10-26 18:25:48 +02:00
"Matamow": "Matomo",
"Matahomo": "Matomo",
2022-12-03 22:45:44 +01:00
"Matamua": "Matomo",
"Mitsuama": "Matomo",
"Biwik": "Piwik",
2022-10-25 14:29:24 +02:00
"Matomo Camp": "MatomoCamp",
2022-12-03 22:45:44 +01:00
"Matomo camp": "MatomoCamp",
2022-11-24 12:36:28 +01:00
"matamocamp": "matomocamp",
"matumocamp": "matomocamp",
"motomocamp": "matomocamp",
2022-12-03 22:45:44 +01:00
"MatomoCam ": "MatomoCamp ",
"MatomoCampp": "MatomoCamp",
2022-10-26 18:25:48 +02:00
"Big Blue Button": "BigBlueButton",
"Lucas": "Lukas"
2022-10-21 15:36:19 +02:00
}
search_strings = [re.escape(k) for k in sorted(replacements, key=len, reverse=True)]
pattern = re.compile("|".join(search_strings), flags=re.DOTALL)
for srt_file in current_dir.glob("**/*.srt"):
text = srt_file.read_text()
2022-12-03 22:45:44 +01:00
fixed_text, n = pattern.subn(lambda x: replacements[x.group(0)], text)
if n:
print(f"replaced {n} words in {srt_file}")
2022-10-21 15:36:19 +02:00
srt_file.write_text(fixed_text)