1
0

sitemap_update.js 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. const axios = require("axios");
  2. const fastXML = require("fast-xml-parser");
  3. const fs = require("fs/promises");
  4. const execa = require("execa");
  5. const xmlPath = "./sitemap.xml";
  6. const getData = async ()=>{
  7. const xmlRaw = await fs.readFile(xmlPath, "utf8");
  8. const { XMLParser, XMLBuilder, XMLValidator } = fastXML;
  9. const parser = new XMLParser({
  10. ignoreAttributes: false,
  11. ignoreNameSpace: false,
  12. });
  13. let jsObj = parser.parse(xmlRaw);
  14. return jsObj;
  15. };
  16. const writeData = async (jsObj)=>{
  17. const { XMLBuilder } = fastXML;
  18. const builder = new XMLBuilder({
  19. indentBy: " ",
  20. format: true,
  21. ignoreAttributes: false,
  22. ignoreNameSpace: false,
  23. });
  24. const xmlContent = builder.build(jsObj);
  25. await fs.writeFile(xmlPath, xmlContent);
  26. };
  27. const main = async ()=>{
  28. const data = await getData();
  29. const urlMap = {};
  30. data['urlset'].url.forEach(item=>{
  31. urlMap[item.loc] = item;
  32. });
  33. const promiseList = [];
  34. let count = 0;
  35. const urls = Object.keys(urlMap);
  36. const updatedArr = [];
  37. urls.forEach((url)=>{
  38. const item = urlMap[url];
  39. promiseList.push(new Promise(async (resolve, reject)=>{
  40. try {
  41. if (!url.startsWith("https://semi.design")) {
  42. console.log(`SiteMap jump over ${url} ${count}/${urls.length}`);
  43. count++;
  44. resolve();
  45. return;
  46. }
  47. const res = await axios.get(url);
  48. if (url.startsWith("https://semi.design/zh-CN") || url.startsWith("https://semi.design/en-US")) {
  49. const lang = url.startsWith("https://semi.design/zh-CN") ? "zh-CN" : "en-US";
  50. const mdRelativePath = url.replace(`https://semi.design/${lang}/`, "");
  51. const mdPath = `./content/${mdRelativePath}/${lang==="zh-CN"?"index.md":"index-en-US.md"}`;
  52. const seconds = execa.commandSync(`echo $(git log -1 --pretty="format:%ct" ${mdPath})`, { shell: true }).stdout;
  53. item.lastmod = new Date(seconds * 1000).toISOString();
  54. } else {
  55. const scm = res.headers['X-Deploy-Scm-Version'] || res.headers['X-Deploy-Scm-Version'.toLowerCase()] || res.headers['X-Deploy-Scm-Version'.toUpperCase()];
  56. if (item['scm'] && item['scm']!==scm || !item['scm']) {
  57. item['scm'] = scm;
  58. item.lastmod = new Date().toISOString();
  59. }
  60. }
  61. count++;
  62. console.log(`SiteMap processed ${url} ${count}/${urls.length}`);
  63. resolve();
  64. } catch (e) {
  65. reject(e);
  66. }
  67. }).catch(e=>{
  68. console.log("error", e, url);
  69. }).finally(()=>{
  70. updatedArr.push(item);
  71. }));
  72. });
  73. await Promise.all(promiseList);
  74. updatedArr.sort((itemA, itemB)=>{
  75. return itemA.loc.localeCompare(itemB.loc);
  76. });
  77. data['urlset'].url = updatedArr;
  78. await writeData(data);
  79. };
  80. main();