download_graph_db.js 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246
  1. #!/usr/bin/env node
  2. const Database = require("better-sqlite3");
  3. const fs = require("node:fs");
  4. const path = require("node:path");
  5. const zlib = require("node:zlib");
  6. const { parseArgs } = require("node:util");
  7. const transit = require("transit-js");
  8. const { fail } = require("./graph_user_lib");
  9. const defaultBaseUrl = "https://api.logseq.com";
  10. function printHelp() {
  11. console.log(`Download a graph snapshot and store it as a local sqlite debug DB.
  12. Usage:
  13. node worker/scripts/download_graph_db.js --graph-id <graph-id> --admin-token <token>
  14. node worker/scripts/download_graph_db.js --graph-id <graph-id> --output ./tmp/my-graph.sqlite
  15. Options:
  16. --graph-id <id> Target graph id. Required.
  17. --admin-token <token> Admin token. Defaults to DB_SYNC_ADMIN_TOKEN.
  18. --base-url <url> Worker base URL. Defaults to DB_SYNC_BASE_URL or https://api.logseq.com.
  19. --output <path> SQLite output path. Defaults to ./tmp/graph-<graph-id>.snapshot.sqlite.
  20. --help Show this message.
  21. Notes:
  22. The output sqlite matches local graph DB schema and contains only:
  23. kvs(addr, content, addresses).
  24. `);
  25. }
  26. function sanitizeGraphIdForFilename(graphId) {
  27. return graphId.replaceAll(/[^a-zA-Z0-9.-]/g, "_");
  28. }
  29. function parseCliArgs(argv) {
  30. const { values } = parseArgs({
  31. args: argv,
  32. options: {
  33. "graph-id": { type: "string" },
  34. "admin-token": { type: "string", default: process.env.DB_SYNC_ADMIN_TOKEN },
  35. "base-url": { type: "string", default: process.env.DB_SYNC_BASE_URL || defaultBaseUrl },
  36. output: { type: "string" },
  37. help: { type: "boolean", default: false },
  38. },
  39. strict: true,
  40. allowPositionals: false,
  41. });
  42. if (values.help) {
  43. printHelp();
  44. process.exit(0);
  45. }
  46. if (!values["graph-id"]) {
  47. fail("Missing required --graph-id.");
  48. }
  49. if (!values["admin-token"]) {
  50. fail("Missing admin token. Pass --admin-token or set DB_SYNC_ADMIN_TOKEN.");
  51. }
  52. const output = values.output
  53. ? path.resolve(values.output)
  54. : path.resolve("tmp", `graph-${sanitizeGraphIdForFilename(values["graph-id"])}.snapshot.sqlite`);
  55. return {
  56. graphId: values["graph-id"],
  57. adminToken: values["admin-token"],
  58. baseUrl: values["base-url"],
  59. output,
  60. };
  61. }
  62. function authHeaders(adminToken) {
  63. return {
  64. "x-db-sync-admin-token": adminToken,
  65. };
  66. }
  67. function normalizeBaseUrl(baseUrl) {
  68. return baseUrl.replace(/\/+$/, "");
  69. }
  70. async function fetchJson(url, adminToken) {
  71. const response = await fetch(url, {
  72. method: "GET",
  73. headers: authHeaders(adminToken),
  74. });
  75. if (!response.ok) {
  76. const body = await response.text();
  77. throw new Error(`Request failed (${response.status}) for ${url}: ${body}`);
  78. }
  79. return response.json();
  80. }
  81. async function fetchSnapshotDescriptor(options) {
  82. const baseUrl = normalizeBaseUrl(options.baseUrl);
  83. const url = `${baseUrl}/sync/${encodeURIComponent(options.graphId)}/snapshot/download`;
  84. return fetchJson(url, options.adminToken);
  85. }
  86. async function fetchSnapshotBytes(url, adminToken) {
  87. const response = await fetch(url, {
  88. method: "GET",
  89. headers: authHeaders(adminToken),
  90. });
  91. if (!response.ok) {
  92. const body = await response.text();
  93. throw new Error(`Snapshot download failed (${response.status}) for ${url}: ${body}`);
  94. }
  95. const buffer = Buffer.from(await response.arrayBuffer());
  96. const contentEncoding = response.headers.get("content-encoding");
  97. return {
  98. buffer,
  99. contentEncoding,
  100. };
  101. }
  102. function hasGzipMagic(buffer) {
  103. return buffer.length >= 2 && buffer[0] === 0x1f && buffer[1] === 0x8b;
  104. }
  105. function maybeDecompressBuffer(buffer, contentEncoding) {
  106. if (contentEncoding === "gzip" && hasGzipMagic(buffer)) {
  107. return zlib.gunzipSync(buffer);
  108. }
  109. return buffer;
  110. }
  111. function parseFramedRows(buffer) {
  112. const rows = [];
  113. const reader = transit.reader("json");
  114. let offset = 0;
  115. while (offset < buffer.length) {
  116. if (buffer.length - offset < 4) {
  117. throw new Error("Invalid snapshot payload: incomplete frame header");
  118. }
  119. const frameLength = buffer.readUInt32BE(offset);
  120. offset += 4;
  121. if (buffer.length - offset < frameLength) {
  122. throw new Error("Invalid snapshot payload: incomplete frame payload");
  123. }
  124. const payload = buffer.subarray(offset, offset + frameLength);
  125. offset += frameLength;
  126. const batch = reader.read(payload.toString("utf8"));
  127. if (!Array.isArray(batch)) {
  128. throw new Error("Invalid snapshot payload: decoded frame is not an array");
  129. }
  130. for (const row of batch) {
  131. if (!Array.isArray(row) || row.length < 2) {
  132. throw new Error("Invalid snapshot payload: row must be [addr, content, addresses?]");
  133. }
  134. rows.push(row);
  135. }
  136. }
  137. return rows;
  138. }
  139. function writeSnapshotSqlite({
  140. outputPath,
  141. rows,
  142. }) {
  143. fs.mkdirSync(path.dirname(outputPath), { recursive: true });
  144. if (fs.existsSync(outputPath)) {
  145. fs.rmSync(outputPath);
  146. }
  147. const db = new Database(outputPath);
  148. try {
  149. db.exec(`
  150. create table if not exists kvs (
  151. addr INTEGER primary key,
  152. content TEXT,
  153. addresses JSON
  154. );
  155. `);
  156. const upsertKvs = db.prepare(
  157. "insert into kvs (addr, content, addresses) values (?, ?, ?) on conflict(addr) do update set content = excluded.content, addresses = excluded.addresses",
  158. );
  159. const writeAll = db.transaction(() => {
  160. for (const row of rows) {
  161. const [addr, content, addresses] = row;
  162. upsertKvs.run(addr, content, addresses ?? null);
  163. }
  164. });
  165. writeAll();
  166. } finally {
  167. db.close();
  168. }
  169. }
  170. async function main() {
  171. const options = parseCliArgs(process.argv.slice(2));
  172. const descriptor = await fetchSnapshotDescriptor(options);
  173. if (!descriptor || !descriptor.url) {
  174. fail("Snapshot download response missing URL.");
  175. }
  176. const snapshot = await fetchSnapshotBytes(descriptor.url, options.adminToken);
  177. const effectiveEncoding = descriptor["content-encoding"] || snapshot.contentEncoding || "";
  178. const decompressed = maybeDecompressBuffer(snapshot.buffer, effectiveEncoding);
  179. const rows = parseFramedRows(decompressed);
  180. writeSnapshotSqlite({
  181. outputPath: options.output,
  182. rows,
  183. });
  184. console.log(`Saved graph snapshot sqlite to ${options.output}`);
  185. console.log(`Graph: ${options.graphId}`);
  186. console.log(`Rows: ${rows.length}`);
  187. if (descriptor.key) {
  188. console.log(`Snapshot key: ${descriptor.key}`);
  189. }
  190. }
  191. if (require.main === module) {
  192. main().catch((error) => {
  193. fail(error instanceof Error ? error.message : String(error));
  194. });
  195. }
  196. module.exports = {
  197. parseCliArgs,
  198. parseFramedRows,
  199. sanitizeGraphIdForFilename,
  200. writeSnapshotSqlite,
  201. };