| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387 |
- #!/usr/bin/env tsx
- /**
- * Translation String Extraction Script
- *
- * This script scans TSX files for hardcoded Chinese strings and extracts them
- * into translation JSON files with semantic keys.
- *
- * Usage:
- * tsx scripts/extract-translations.ts [options]
- *
- * Options:
- * --dry-run Preview extraction without modifying files
- * --verbose Show detailed extraction process
- * --target Target directory to scan (default: src/app/[locale])
- */
- import * as fs from "node:fs";
- import * as path from "node:path";
- // Translation key naming convention: namespace.section.key
- // Example: dashboard.stats.totalRequests, settings.providers.addButton
- interface ExtractedString {
- original: string;
- key: string;
- file: string;
- line: number;
- context: string;
- namespace: string;
- needsReview: boolean;
- }
- interface ExtractionReport {
- totalFiles: number;
- totalStrings: number;
- extractedStrings: ExtractedString[];
- namespaceStats: Record<string, number>;
- }
- // Chinese character detection regex
- const _CHINESE_REGEX = /[\u4e00-\u9fa5]+/g;
- // Namespace mapping based on file path
- const NAMESPACE_MAP: Record<string, string> = {
- "/login/": "auth",
- "/dashboard/": "dashboard",
- "/settings/": "settings",
- "/usage-doc/": "usage",
- "page.tsx": "common",
- };
- // Common Chinese phrases mapping
- const PHRASE_MAP: Record<string, string> = {
- 仪表盘: "dashboard",
- 设置: "settings",
- 用户: "users",
- 供应商: "providers",
- 模型: "models",
- 请求: "requests",
- 成本: "cost",
- 统计: "stats",
- 日志: "logs",
- 配额: "quotas",
- 会话: "sessions",
- 密钥: "keys",
- 价格: "prices",
- 配置: "config",
- 数据: "data",
- 通知: "notifications",
- 版本: "versions",
- 敏感词: "sensitiveWords",
- 登录: "login",
- 退出: "logout",
- 保存: "save",
- 取消: "cancel",
- 删除: "delete",
- 编辑: "edit",
- 添加: "add",
- 刷新: "refresh",
- 搜索: "search",
- 导出: "export",
- 导入: "import",
- 确认: "confirm",
- 提交: "submit",
- 重置: "reset",
- 查看: "view",
- 复制: "copy",
- 下载: "download",
- 上传: "upload",
- 启用: "enabled",
- 禁用: "disabled",
- 成功: "success",
- 失败: "failed",
- 错误: "error",
- 警告: "warning",
- 信息: "info",
- 加载中: "loading",
- 标题: "title",
- 描述: "description",
- 名称: "name",
- 状态: "status",
- 时间: "time",
- 操作: "actions",
- 详情: "details",
- 列表: "list",
- 表单: "form",
- 按钮: "button",
- 输入: "input",
- 选择: "select",
- 选项: "options",
- 全部: "all",
- 无: "none",
- 是: "yes",
- 否: "no",
- 开: "on",
- 关: "off",
- };
- /**
- * Determine namespace from file path
- */
- function getNamespace(filePath: string): string {
- for (const [pattern, namespace] of Object.entries(NAMESPACE_MAP)) {
- if (filePath.includes(pattern)) {
- return namespace;
- }
- }
- return "common";
- }
- /**
- * Generate semantic key from Chinese string
- */
- function generateKey(
- chineseText: string,
- namespace: string,
- context: string
- ): { key: string; needsReview: boolean } {
- // Check if it's a common phrase
- const mapped = PHRASE_MAP[chineseText];
- if (mapped) {
- return { key: `${namespace}.${mapped}`, needsReview: false };
- }
- // Try to infer section from context
- let section = "misc";
- const contextLower = context.toLowerCase();
- if (contextLower.includes("title")) section = "title";
- else if (contextLower.includes("description")) section = "description";
- else if (contextLower.includes("button") || contextLower.includes("btn")) section = "actions";
- else if (contextLower.includes("label")) section = "labels";
- else if (contextLower.includes("placeholder")) section = "placeholders";
- else if (contextLower.includes("error")) section = "errors";
- else if (contextLower.includes("toast")) section = "toasts";
- else if (contextLower.includes("dialog")) section = "dialogs";
- // Generate a safe key from Chinese text
- const safeKey = chineseText
- .substring(0, 30)
- .replace(/[^\u4e00-\u9fa5]/g, "")
- .substring(0, 10);
- // Use Pinyin-like mapping for key (simplified)
- const keyPart = Array.from(safeKey)
- .map((_char, i) => `key${i}`)
- .join("");
- return {
- key: `${namespace}.${section}.${keyPart || "text"}`,
- needsReview: true, // Manual review needed for auto-generated keys
- };
- }
- /**
- * Extract Chinese strings from a file
- */
- function extractFromFile(filePath: string): ExtractedString[] {
- const content = fs.readFileSync(filePath, "utf-8");
- const lines = content.split("\n");
- const extracted: ExtractedString[] = [];
- const namespace = getNamespace(filePath);
- // Simple regex-based extraction
- // This is a simplified version - a full AST parser would be more robust
- lines.forEach((line, lineIndex) => {
- // Skip imports, comments
- if (
- line.trim().startsWith("import ") ||
- line.trim().startsWith("//") ||
- line.trim().startsWith("/*")
- ) {
- return;
- }
- // Find all Chinese strings
- const matches = line.matchAll(/["'`]([^"'`]*[\u4e00-\u9fa5]+[^"'`]*)["'`]/g);
- for (const match of matches) {
- const original = match[1];
- if (!original || original.length < 2) continue;
- // Get surrounding context (30 chars before and after)
- const matchIndex = line.indexOf(match[0]);
- const contextStart = Math.max(0, matchIndex - 30);
- const contextEnd = Math.min(line.length, matchIndex + match[0].length + 30);
- const context = line.substring(contextStart, contextEnd).trim();
- const { key, needsReview } = generateKey(original, namespace, context);
- extracted.push({
- original,
- key,
- file: filePath,
- line: lineIndex + 1,
- context,
- namespace,
- needsReview,
- });
- }
- });
- return extracted;
- }
- /**
- * Group extracted strings by namespace
- */
- function groupByNamespace(strings: ExtractedString[]): Record<string, ExtractedString[]> {
- const grouped: Record<string, ExtractedString[]> = {};
- for (const str of strings) {
- if (!grouped[str.namespace]) {
- grouped[str.namespace] = [];
- }
- grouped[str.namespace].push(str);
- }
- return grouped;
- }
- /**
- * Generate translation JSON files
- */
- function generateTranslationFiles(strings: ExtractedString[], dryRun: boolean): void {
- const grouped = groupByNamespace(strings);
- for (const [namespace, items] of Object.entries(grouped)) {
- const translationFile = path.join(process.cwd(), `messages/zh-CN/${namespace}.json`);
- // Read existing translations
- let translations: any = {};
- if (fs.existsSync(translationFile)) {
- translations = JSON.parse(fs.readFileSync(translationFile, "utf-8"));
- }
- // Add new translations (preserving existing structure)
- for (const item of items) {
- const keyParts = item.key.replace(`${namespace}.`, "").split(".");
- let current = translations;
- for (let i = 0; i < keyParts.length - 1; i++) {
- const part = keyParts[i];
- if (!current[part] || typeof current[part] !== "object") {
- current[part] = {};
- }
- current = current[part];
- }
- const finalKey = keyParts[keyParts.length - 1];
- // Only add if key doesn't exist or is not an object (avoid overwriting nested objects)
- if (!current[finalKey]) {
- current[finalKey] = item.original;
- }
- }
- if (!dryRun) {
- fs.writeFileSync(translationFile, `${JSON.stringify(translations, null, 2)}\n`, "utf-8");
- console.log(`✓ Updated ${namespace}.json with ${items.length} strings`);
- } else {
- console.log(`[DRY RUN] Would update ${namespace}.json with ${items.length} strings`);
- }
- }
- }
- /**
- * Generate extraction report
- */
- function generateReport(strings: ExtractedString[]): ExtractionReport {
- const namespaceStats: Record<string, number> = {};
- const files = new Set<string>();
- for (const str of strings) {
- files.add(str.file);
- namespaceStats[str.namespace] = (namespaceStats[str.namespace] || 0) + 1;
- }
- return {
- totalFiles: files.size,
- totalStrings: strings.length,
- extractedStrings: strings,
- namespaceStats,
- };
- }
- /**
- * Find all page.tsx files recursively
- */
- function findPageFiles(dir: string): string[] {
- const files: string[] = [];
- const entries = fs.readdirSync(dir, { withFileTypes: true });
- for (const entry of entries) {
- const fullPath = path.join(dir, entry.name);
- if (entry.isDirectory()) {
- if (!entry.name.startsWith(".") && entry.name !== "node_modules") {
- files.push(...findPageFiles(fullPath));
- }
- } else if (entry.name === "page.tsx") {
- files.push(fullPath);
- }
- }
- return files;
- }
- /**
- * Main extraction process
- */
- async function main() {
- const args = process.argv.slice(2);
- const dryRun = args.includes("--dry-run");
- const verbose = args.includes("--verbose");
- const targetIdx = args.indexOf("--target");
- const targetDir = targetIdx >= 0 ? args[targetIdx + 1] : "src/app/[locale]";
- console.log("🔍 Translation String Extraction");
- console.log("================================\n");
- // Find all page.tsx files
- const files = findPageFiles(path.join(process.cwd(), targetDir));
- console.log(`Found ${files.length} files to scan\n`);
- // Extract strings from all files
- const allStrings: ExtractedString[] = [];
- for (const file of files) {
- const extracted = extractFromFile(file);
- allStrings.push(...extracted);
- if (verbose && extracted.length > 0) {
- console.log(`📄 ${path.relative(process.cwd(), file)}: ${extracted.length} strings`);
- }
- }
- // Generate report
- const report = generateReport(allStrings);
- console.log("\n📊 Extraction Report");
- console.log("-------------------");
- console.log(`Total files scanned: ${report.totalFiles}`);
- console.log(`Total strings found: ${report.totalStrings}`);
- console.log(`\nNamespace breakdown:`);
- for (const [ns, count] of Object.entries(report.namespaceStats)) {
- console.log(` ${ns}: ${count} strings`);
- }
- // Show strings needing review
- const needsReview = allStrings.filter((s) => s.needsReview);
- if (needsReview.length > 0) {
- console.log(`\n⚠️ ${needsReview.length} strings need manual review`);
- if (verbose) {
- console.log("\nStrings needing review:");
- needsReview.slice(0, 10).forEach((s) => {
- console.log(` ${s.key}: "${s.original}" (${path.basename(s.file)}:${s.line})`);
- });
- if (needsReview.length > 10) {
- console.log(` ... and ${needsReview.length - 10} more`);
- }
- }
- }
- // Generate translation files
- console.log("\n📝 Generating translation files...");
- generateTranslationFiles(allStrings, dryRun);
- console.log("\nExtraction complete!");
- if (dryRun) {
- console.log(" (Run without --dry-run to save changes)");
- }
- }
- main().catch(console.error);
|