audit-messages-emoji.js 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212
  1. const fs = require("node:fs");
  2. const path = require("node:path");
  3. const EMOJI_RE =
  4. /(\p{Extended_Pictographic}|\p{Regional_Indicator}{2}|[0-9#*]\uFE0F?\u20E3)/gu;
  5. function isObject(v) {
  6. return v && typeof v === "object" && !Array.isArray(v);
  7. }
  8. function flattenLeafStrings(value, prefix = "") {
  9. if (typeof value === "string") return [{ key: prefix, value }];
  10. if (!value || typeof value !== "object") return [];
  11. if (Array.isArray(value)) {
  12. return value.flatMap((v, index) => {
  13. const key = prefix ? `${prefix}.${index}` : String(index);
  14. return flattenLeafStrings(v, key);
  15. });
  16. }
  17. return Object.entries(value).flatMap(([k, v]) => {
  18. const key = prefix ? `${prefix}.${k}` : k;
  19. if (isObject(v) || Array.isArray(v)) return flattenLeafStrings(v, key);
  20. return flattenLeafStrings(v, key);
  21. });
  22. }
  23. function listJsonFiles(dir) {
  24. const out = [];
  25. const walk = (d) => {
  26. for (const entry of fs.readdirSync(d, { withFileTypes: true })) {
  27. const full = path.join(d, entry.name);
  28. if (entry.isDirectory()) {
  29. walk(full);
  30. continue;
  31. }
  32. if (entry.isFile() && entry.name.endsWith(".json")) out.push(full);
  33. }
  34. };
  35. if (fs.existsSync(dir)) walk(dir);
  36. return out.sort((a, b) => a.localeCompare(b));
  37. }
  38. function fileToKeyPrefix(relFile) {
  39. const segs = relFile.replace(/\.json$/, "").split(path.sep);
  40. if (segs[segs.length - 1] === "strings") return segs.slice(0, -1).join(".");
  41. return segs.join(".");
  42. }
  43. function loadJson(p) {
  44. return JSON.parse(fs.readFileSync(p, "utf8"));
  45. }
  46. function countEmojiCodepoints(s) {
  47. EMOJI_RE.lastIndex = 0;
  48. let count = 0;
  49. // eslint-disable-next-line no-unused-vars
  50. for (const _ of s.matchAll(EMOJI_RE)) count += 1;
  51. return count;
  52. }
  53. function maskEmoji(s) {
  54. EMOJI_RE.lastIndex = 0;
  55. return s.replace(EMOJI_RE, "<emoji>");
  56. }
  57. function normalizeLocales(messagesRoot, locales) {
  58. if (typeof locales === "string") return normalizeLocales(messagesRoot, [locales]);
  59. if (Array.isArray(locales) && locales.length > 0) {
  60. return locales
  61. .flatMap((s) => String(s).split(","))
  62. .map((s) => s.trim())
  63. .filter(Boolean);
  64. }
  65. const dirs = fs.readdirSync(messagesRoot, { withFileTypes: true });
  66. return dirs
  67. .filter((d) => d.isDirectory() && !d.name.startsWith("."))
  68. .map((d) => d.name)
  69. .sort((a, b) => a.localeCompare(b));
  70. }
  71. function findMessagesEmoji({ messagesDir, locales }) {
  72. const root = messagesDir || path.join(process.cwd(), "messages");
  73. const targets = normalizeLocales(root, locales);
  74. const rows = [];
  75. for (const locale of targets) {
  76. const localeDir = path.join(root, locale);
  77. if (!fs.existsSync(localeDir) || !fs.statSync(localeDir).isDirectory()) continue;
  78. const files = listJsonFiles(localeDir);
  79. for (const file of files) {
  80. const relFile = path.relative(localeDir, file);
  81. const keyPrefix = fileToKeyPrefix(relFile);
  82. const obj = loadJson(file);
  83. for (const leaf of flattenLeafStrings(obj)) {
  84. if (typeof leaf.value !== "string") continue;
  85. const emojiCount = countEmojiCodepoints(leaf.value);
  86. if (emojiCount === 0) continue;
  87. const fullKey = keyPrefix
  88. ? leaf.key
  89. ? `${keyPrefix}.${leaf.key}`
  90. : keyPrefix
  91. : leaf.key;
  92. rows.push({
  93. locale,
  94. relFile: relFile.replaceAll(path.sep, "/"),
  95. key: fullKey,
  96. emojiCount,
  97. preview: maskEmoji(leaf.value),
  98. });
  99. }
  100. }
  101. }
  102. const byLocaleCount = {};
  103. const byFileCount = {};
  104. let totalEmojiCount = 0;
  105. for (const r of rows) {
  106. byLocaleCount[r.locale] = (byLocaleCount[r.locale] || 0) + 1;
  107. const fileKey = `${r.locale}/${r.relFile}`;
  108. byFileCount[fileKey] = (byFileCount[fileKey] || 0) + 1;
  109. totalEmojiCount += r.emojiCount;
  110. }
  111. const sortedRows = rows.sort((a, b) => {
  112. const c0 = a.locale.localeCompare(b.locale);
  113. if (c0 !== 0) return c0;
  114. const c1 = a.relFile.localeCompare(b.relFile);
  115. if (c1 !== 0) return c1;
  116. return a.key.localeCompare(b.key);
  117. });
  118. return {
  119. rows: sortedRows,
  120. totalRowCount: sortedRows.length,
  121. totalEmojiCount,
  122. byLocaleCount,
  123. byFileCount,
  124. };
  125. }
  126. function topFiles(byFileCount, limit = 10) {
  127. return Object.entries(byFileCount)
  128. .map(([k, v]) => ({ key: k, count: v }))
  129. .sort((a, b) => b.count - a.count || a.key.localeCompare(b.key))
  130. .slice(0, limit)
  131. .map(({ key, count }) => {
  132. const [locale, ...rest] = key.split("/");
  133. return { locale, relFile: rest.join("/"), count };
  134. });
  135. }
  136. function run(argv) {
  137. const messagesDirArg = argv.find((a) => a.startsWith("--messagesDir="));
  138. const messagesDir = messagesDirArg ? messagesDirArg.split("=", 2)[1] : undefined;
  139. const localesArg = argv.find((a) => a.startsWith("--locales="));
  140. const locales = localesArg ? localesArg.split("=", 2)[1] : undefined;
  141. const formatArg = argv.find((a) => a.startsWith("--format="));
  142. const format = formatArg ? formatArg.split("=", 2)[1] : "text";
  143. const report = findMessagesEmoji({ messagesDir, locales });
  144. const total = report.totalRowCount;
  145. if (total === 0) {
  146. return { exitCode: 0, lines: ["OK: no emoji found in messages JSON."] };
  147. }
  148. if (format === "json") {
  149. return { exitCode: 0, lines: [JSON.stringify(report.rows, null, 2)] };
  150. }
  151. if (format === "tsv") {
  152. const lines = ["locale\trelFile\tkey\temojiCount\tpreview"];
  153. for (const r of report.rows) {
  154. lines.push(`${r.locale}\t${r.relFile}\t${r.key}\t${r.emojiCount}\t${r.preview}`);
  155. }
  156. return { exitCode: 0, lines };
  157. }
  158. const lines = [
  159. `Found ${total} messages strings containing emoji (${report.totalEmojiCount} total emoji codepoints).`,
  160. "Top files by row count (locale\trelFile\trows):",
  161. ...topFiles(report.byFileCount).map((r) => `${r.locale}\t${r.relFile}\t${r.count}`),
  162. "Rows (locale\trelFile\tkey):",
  163. ...report.rows.map((r) => `${r.locale}\t${r.relFile}\t${r.key}`),
  164. ];
  165. return { exitCode: 0, lines };
  166. }
  167. module.exports = {
  168. countEmojiCodepoints,
  169. fileToKeyPrefix,
  170. findMessagesEmoji,
  171. flattenLeafStrings,
  172. listJsonFiles,
  173. maskEmoji,
  174. run,
  175. };
  176. if (require.main === module) {
  177. const out = run(process.argv.slice(2));
  178. for (const line of out.lines) console.log(line); // eslint-disable-line no-console
  179. process.exit(out.exitCode);
  180. }