- #!/usr/bin/python3
- # -*- coding: UTF-8 -*-
- import io
- import os
- import struct
- import json
- def decode_string(byte_array):
- byte_array = list(map(lambda x: (x - 0x26 + 0x100) % 0x100, byte_array))
- assert byte_array[-2:] == [0, 0]
- return bytes(byte_array[:-2]).decode("utf-16-le", "replace")
- def hash_format(f):
- _hash, = struct.unpack("<I", f.read(4))
- return f"{_hash:08x}"
- def export_ptd(f: io.BufferedReader):
- assert f.read(4) == b"PTD\0"
- _1, _2, hash_count, hash_data_pos, _4, string_data_pos = struct.unpack("<6I", f.read(24))
- assert _1 == 0x02
- assert _2 == 0x26
- assert _4 == 0x01
- data = {
- "_1": _1,
- "_2": _2,
- "hash_count": hash_count,
- "hash_data_pos": hash_data_pos,
- "_4": _4,
- "string_data_pos": string_data_pos,
- "hash_table": {},
- "texts": {}
- }
- # hash 表
- hash_name_pos = hash_data_pos + hash_count * 0x10
- for i in range(hash_count):
- f.seek(hash_data_pos + i * 0x10)
- hash = hash_format(f)
- name_pos, char_length, byte_length = struct.unpack("<3I", f.read(12))
- assert byte_length == char_length * 2
- f.seek(hash_name_pos)
- name = decode_string(f.read(byte_length))
- hash_name_pos = f.tell()
- data["hash_table"][hash] = name
- assert string_data_pos == f.tell()
- # assert "3b8ba7c7" in data["hash_table"]
- # assert data["hash_table"]["3b8ba7c7"]["name"] == "Text"
- # 文件信息
- hash = hash_format(f)
- has_groupid, __2, __3, text_pos = struct.unpack("<4I", f.read(16))
- assert has_groupid in [0, 1]
- assert __2 == 0x14
- assert __3 == 0x02
- # groupid
- if "7805ac12" in data["hash_table"]:
- hash = hash_format(f)
- assert hash == "7805ac12"
- groupid_count, __2 = struct.unpack("<2I", f.read(8))
- assert __2 == 0x0c
- groupids = list(map(lambda x: data["hash_table"][hash_format(f)], range(groupid_count)))
- # data["groupids"] = groupids
- else:
- # 没有 groupid,无文本
- assert has_groupid == 0
- pass
-
- # Text
- hash = hash_format(f)
- assert hash == "3b8ba7c7"
- text_count, __2 = struct.unpack("<2I", f.read(8))
- if has_groupid == 0:
- assert text_count == 0
- else:
- assert text_count > 0
- assert groupid_count == text_count
- assert __2 == 0x18
- # CharName
- hash = hash_format(f)
- assert hash == "3cc2b4f9"
- __1, __2 = struct.unpack("<2I", f.read(8))
- assert __1 == text_count
- text_data_pos = f.tell()
- text_pos = text_data_pos + text_count * 0x10
- for i in range(text_count):
- f.seek(text_data_pos + i * 0x10)
- hash = hash_format(f)
- name_pos, char_length, byte_length = struct.unpack("<3I", f.read(12))
- assert byte_length == char_length * 2
- f.seek(text_pos)
- text = decode_string(f.read(byte_length))
- text_pos = f.tell()
- data["texts"][data["hash_table"][hash]] = text
-
- if has_groupid == 1:
- data["texts"] = {
- k: data["texts"][k] for k in sorted(data["texts"].keys(), key=groupids.index)
- }
- return data
- if __name__ == "__main__":
- writers = { lang: open(f"{lang}.txt", "w", -1, "utf8") for lang in ["CNzh", "TWzh", "USen", "JPja"] }
- for root, dirs, files in os.walk("Text_000"):
- for file in files:
- with open(os.path.join(root, file), "rb") as f:
- if not file.endswith(".bin"):
- continue
- data = export_ptd(f)
- with open(os.path.join(root, file.replace(".bin", ".json")), "w", -1, "utf8") as f:
- json.dump(data, f, ensure_ascii=False, indent=2)
- file_path = "/".join(os.path.join(root, file).replace("\\", "/").split("/")[-2:])
- *_, lang = file_path.split(".")[0].split("_")
- file_name = "_".join(_)
- if lang in writers:
- writers[lang].write(f"{file_name}\n")
- for name, text in data["texts"].items():
- text = text.replace("\t", "\\t").replace("\r\n", "\\n").replace("\n", "\\n")
- writers[lang].write(f"{name}\t{text}\n")
- for lang in writers:
- writers[lang].close()