Initial commit of scripts for importing & exporting translations from weblate

2024-03-19 01:08:21 -07:00
parent d48534f1a5
commit 56c847e791
6 changed files with 290 additions and 94 deletions
--- a/localization/stringscan.py
+++ b/localization/stringscan.py
@@ -1,11 +1,14 @@
 import ast
 import glob
 import os
+import pathlib
+
+import polib

 # Walk the directory and open all .py files using glob
-strings = set()
+strings = dict()
+root_path = pathlib.Path('../io_scene_psk_psa').resolve()
 for file in glob.glob('../io_scene_psk_psa/**/*.py', recursive=True):
-    print(file)
    with open(os.path.join(file), 'r') as f:
        if file.endswith('i18n.py'):
            # TODO: Don't parse the i18n files.
@@ -15,37 +18,39 @@ for file in glob.glob('../io_scene_psk_psa/**/*.py', recursive=True):
            a = ast.parse(f.read())
            for node in ast.walk(a):
                if isinstance(node, ast.Constant) and isinstance(node.value, str):
-                    strings.add(node.s)
+                    a = pathlib.Path(file).resolve()
+                    filepath = a.relative_to(root_path)
+                    print(filepath)
+                    if node.s not in strings:
+                        strings[node.s] = filepath, node.lineno, node.col_offset
        except UnicodeDecodeError as e:
            print(f'Error reading file {file}: {e}')

-# Remove all strings that are empty or contain only whitespace.
-strings = set(filter(lambda x: x.strip(), strings))
+string_keys = set(strings.keys())
+
+# Remove all keys from the dictionary that are empty or contain only whitespace.
+string_keys = set(filter(lambda x: x.strip(), string_keys))

 # Remove all strings that have no alphabetic characters.
-strings = set(filter(lambda x: any(c.isalpha() for c in x), strings))
+string_keys = set(filter(lambda x: any(c.isalpha() for c in x), string_keys))

 # Remove any strings that have '@return: ' in them.
-strings = set(filter(lambda x: '@return: ' not in x, strings))
+string_keys = set(filter(lambda x: '@return: ' not in x, string_keys))

 # Remove any strings that are entirely lowercase and have no whitespace.
-strings = set(filter(lambda x: not x.islower() or ' ' in x, strings))
+string_keys = set(filter(lambda x: not x.islower() or ' ' in x, string_keys))

 # Remove any strings that are in SCREAMING_SNAKE_CASE.
-strings = set(filter(lambda x: not x.isupper(), strings))
+string_keys = set(filter(lambda x: not x.isupper(), string_keys))

 # Remove any strings that have underscores.
-strings = set(filter(lambda x: '_' not in x, strings))
+string_keys = set(filter(lambda x: '_' not in x, string_keys))

 # Remove any string that starts with a newline.
-strings = set(filter(lambda x: not x.startswith('\n'), strings))
+string_keys = set(filter(lambda x: not x.startswith('\n'), string_keys))

 # Remove any string that looks like a regular expression.
-strings = set(filter(lambda x: not any(c in x for c in '^'), strings))
-
-# Convert the set to a list and sort it.
-strings = list(strings)
-strings.sort()
+string_keys = set(filter(lambda x: not any(c in x for c in '^'), string_keys))

 def write_multiline_string(f, string):
    f.write(f'msgid ""\n')
@@ -84,7 +89,6 @@ exclude_strings = {
    'Select',
    'RemoveTracks'
    'Source',
-    'Stash',
    'Move Up',
    'Move Down',
    'Unassigned',
@@ -92,41 +96,38 @@ exclude_strings = {
    'Suffix',
    'Timeline Markers',
    'Pose Markers',
-    'Actions'
+    'Actions',
+    'sRGBA',
 }

 # Remove any strings that are in the exclude_strings set.
-strings = set(filter(lambda x: x not in exclude_strings, strings))
+string_keys = set(filter(lambda x: x not in exclude_strings, string_keys))

-with open('./artifacts/io_scene_psk_psa.en.po', 'w') as f:
-    # Write the header (language, mime-version, content-type & content-transfer-encoding).
-    f.write('msgid ""\n'
-            'msgstr ""\n'
-            '"Language: en\\n"\n'
-            '"MIME-Version: 1.0\\n"\n'
-            '"Content-Type: text/plain\\n"\n'
-            '"Content-Transfer-Encoding: 8bit; charset=UTF-8\\n"\n\n'
-            )
-    for string in strings:
-        if is_multi_line := '\n' in string:
-            f.write(f'msgid ""\n')
-            # Split the string into lines and write each line as a separate msgid.
-            for line in string.split('\n'):
-                f.write(f'"{line}"\n')
-            f.write(f'msgstr ""\n')
-            # Split the string into lines and write each line as a separate msgid.
-            for line in string.split('\n'):
-                f.write(f'"{line}"\n')
-        else:
-            f.write(f'msgid "{string}"\n')
-            f.write(f'msgstr "{string}"\n')
-        f.write('\n')
+# Make a new PO file and write the strings to it.
+pofile = polib.POFile()
+
+pofile.header = '''msgid ""
+msgstr ""
+"Language: en\\n"
+"MIME-Version: 1.0\\n"
+"Content-Type: text/plain; charset=UTF-8\\n"
+"Content-Transfer-Encoding: 8bit\\n"
+'''
+
+# Sort the string keys into a list.
+string_keys = list(string_keys)
+string_keys.sort()
+
+for string_key in string_keys:
+    file, line, col = strings[string_key]
+    entry = polib.POEntry(
+        msgid=string_key,
+        msgstr=string_key,
+        comment=f'{file}:{line}',
+    )
+    pofile.append(entry)
+
+pofile.save('../extern/io_scene_psk_psa-translations/io_scene_psk_psa.en.po')

 # Print the # of strings.
-print(f'Found {len(strings)} strings.')
-
-# Zip the file.
-import zipfile
-
-with zipfile.ZipFile('./artifacts/io_scene_psk_psa.po.zip', 'w') as z:
-    z.write('./artifacts/io_scene_psk_psa.en.po')
+print(f'Found {len(string_keys)} strings.')