vibe-coding-cn/libs/external/l10n-tool/bulk_translate.py

141 lines
4.6 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
批量翻译 i18n/zh 下的所有文本文件到其他语言目录。
- 保持目录结构,遇到代码块自动跳过翻译。
- 目标语言集合:自动读取 i18n 下的子目录,排除 zh。
用法:
python bulk_translate.py --src-root ../../i18n/zh --dst-root ../../i18n --src-lang zh
可选:
--langs en es ... # 指定目标语言;默认自动扫描
--overwrite # 允许覆盖已有文件
"""
import argparse
import os
import sys
from pathlib import Path
from typing import Iterable, List
try:
from deep_translator import GoogleTranslator
except ImportError:
sys.stderr.write("[错误] 缺少 deep-translator请先 pip install -r requirements.txt\n")
sys.exit(1)
def translate_blocks(text: str, translator: GoogleTranslator) -> str:
lines = text.splitlines()
translated: List[str] = []
in_code = False
buffer: List[str] = []
def flush_buffer():
if not buffer:
return
chunk = "\n".join(buffer)
try:
result = translator.translate(chunk)
if result is None:
raise RuntimeError("翻译返回空结果")
translated.extend(result.split("\n"))
except Exception:
# 兜底:按行逐条翻译,避免整段失败
for line in buffer:
try:
res_line = translator.translate(line) or line
except Exception:
res_line = line # 保留原文,留待人工校对
translated.append(res_line)
buffer.clear()
for line in lines:
if line.strip().startswith("```"):
flush_buffer()
in_code = not in_code
translated.append(line)
continue
if in_code:
translated.append(line)
continue
if not line.strip():
flush_buffer()
translated.append(line)
continue
buffer.append(line)
flush_buffer()
return "\n".join(translated)
def iter_source_files(src_root: Path) -> Iterable[Path]:
for path in src_root.rglob('*'):
if path.is_file():
yield path
def main() -> int:
parser = argparse.ArgumentParser(description="批量翻译 i18n/zh -> 其他语言")
parser.add_argument('--src-root', default='../../i18n/zh', help='源语言根目录')
parser.add_argument('--dst-root', default='../../i18n', help='目标语言根目录集合')
parser.add_argument('--src-lang', default='zh-CN', help='源语言代码')
parser.add_argument('--langs', nargs='*', help='指定目标语言,不含源语言')
parser.add_argument('--overwrite', action='store_true', help='允许覆盖已有文件')
args = parser.parse_args()
src_root = Path(args.src_root).resolve()
dst_root = Path(args.dst_root).resolve()
if not src_root.exists():
sys.stderr.write(f"[错误] 源目录不存在: {src_root}\n")
return 1
code_overrides = {
'zh': 'zh-CN',
'zh-CN': 'zh-CN',
'he': 'iw', # Google 使用旧代码
}
def map_code(lang: str) -> str:
return code_overrides.get(lang, lang)
if args.langs:
target_langs = [lang for lang in args.langs if map_code(lang) != args.src_lang]
else:
target_langs = [p.name for p in dst_root.iterdir() if p.is_dir() and map_code(p.name) != args.src_lang]
if not target_langs:
sys.stderr.write("[错误] 无目标语言目录\n")
return 1
for lang in target_langs:
translator = GoogleTranslator(source=args.src_lang, target=map_code(lang))
print(f"==== 开始翻译 -> {lang} ====")
for src_file in iter_source_files(src_root):
rel_path = src_file.relative_to(src_root)
dst_file = dst_root / lang / rel_path
dst_file.parent.mkdir(parents=True, exist_ok=True)
if dst_file.exists() and not args.overwrite:
print(f"跳过已存在 {dst_file}")
continue
with open(src_file, 'r', encoding='utf-8') as f:
content = f.read()
try:
translated = translate_blocks(content, translator)
except Exception as exc:
print(f"[失败] {src_file} -> {dst_file}: {exc}")
continue
with open(dst_file, 'w', encoding='utf-8') as f:
f.write(translated + '\n')
print(f"[OK] {src_file} -> {dst_file}")
print("全部翻译完成")
return 0
if __name__ == '__main__':
sys.exit(main())