mirror of
https://github.com/cloudflare/cloudflare-docs.git
synced 2026-01-11 20:06:58 +00:00
dx: use a single git cmd for lastmod calc (#26722)
* dx: use a single git cmd for lastmod calc * fix: use commiter date for lastmod
This commit is contained in:
parent
e201c05f5e
commit
b4697aa180
2 changed files with 129 additions and 134 deletions
137
astro.config.ts
137
astro.config.ts
|
|
@ -6,13 +6,11 @@ import liveCode from "astro-live-code";
|
||||||
import starlightLinksValidator from "starlight-links-validator";
|
import starlightLinksValidator from "starlight-links-validator";
|
||||||
import starlightScrollToTop from "starlight-scroll-to-top";
|
import starlightScrollToTop from "starlight-scroll-to-top";
|
||||||
import icon from "astro-icon";
|
import icon from "astro-icon";
|
||||||
import sitemap, { type SitemapItem } from "@astrojs/sitemap";
|
import sitemap from "@astrojs/sitemap";
|
||||||
import react from "@astrojs/react";
|
import react from "@astrojs/react";
|
||||||
|
|
||||||
import { readdir } from "fs/promises";
|
import { readdir } from "fs/promises";
|
||||||
import { fileURLToPath } from "url";
|
import { fileURLToPath } from "url";
|
||||||
import { execSync } from "child_process";
|
|
||||||
import { existsSync } from "fs";
|
|
||||||
|
|
||||||
import remarkValidateImages from "./src/plugins/remark/validate-images";
|
import remarkValidateImages from "./src/plugins/remark/validate-images";
|
||||||
|
|
||||||
|
|
@ -22,6 +20,7 @@ import rehypeAutolinkHeadings from "./src/plugins/rehype/autolink-headings.ts";
|
||||||
import rehypeExternalLinks from "./src/plugins/rehype/external-links.ts";
|
import rehypeExternalLinks from "./src/plugins/rehype/external-links.ts";
|
||||||
import rehypeHeadingSlugs from "./src/plugins/rehype/heading-slugs.ts";
|
import rehypeHeadingSlugs from "./src/plugins/rehype/heading-slugs.ts";
|
||||||
import rehypeShiftHeadings from "./src/plugins/rehype/shift-headings.ts";
|
import rehypeShiftHeadings from "./src/plugins/rehype/shift-headings.ts";
|
||||||
|
import { createSitemapLastmodSerializer } from "./sitemap.serializer.ts";
|
||||||
|
|
||||||
async function autogenSections() {
|
async function autogenSections() {
|
||||||
const sections = (
|
const sections = (
|
||||||
|
|
@ -62,134 +61,6 @@ const customCss = await autogenStyles();
|
||||||
const RUN_LINK_CHECK =
|
const RUN_LINK_CHECK =
|
||||||
process.env.RUN_LINK_CHECK?.toLowerCase() === "true" || false;
|
process.env.RUN_LINK_CHECK?.toLowerCase() === "true" || false;
|
||||||
|
|
||||||
/**
|
|
||||||
* Build a cache of all git last-modified dates in one batch
|
|
||||||
*/
|
|
||||||
function buildGitDateCache(): Map<string, string> | null {
|
|
||||||
try {
|
|
||||||
console.time("[sitemap] Building git date cache");
|
|
||||||
|
|
||||||
// Use git log with --name-only and --diff-filter to get all files with their last commit
|
|
||||||
// The format outputs the commit date followed by the list of files changed in that commit
|
|
||||||
// e.g.
|
|
||||||
// 2025-10-01T12:34:56-07:00
|
|
||||||
// src/content/docs/file1.mdx
|
|
||||||
// src/content/docs/file2.mdx
|
|
||||||
//
|
|
||||||
// 2025-09-25T09:15:30-07:00
|
|
||||||
// src/content/docs/file3.mdx
|
|
||||||
|
|
||||||
const result = execSync(
|
|
||||||
'git log --pretty=format:"%cI" --name-only --diff-filter=AMR src/content/docs',
|
|
||||||
{
|
|
||||||
encoding: "utf-8",
|
|
||||||
maxBuffer: 100 * 1024 * 1024,
|
|
||||||
},
|
|
||||||
);
|
|
||||||
|
|
||||||
const cache = new Map<string, string>();
|
|
||||||
const lines = result.split("\n");
|
|
||||||
|
|
||||||
let currentDate: string | null = null;
|
|
||||||
for (const line of lines) {
|
|
||||||
const trimmed = line.trim();
|
|
||||||
if (!trimmed) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// Lines are either dates or file paths
|
|
||||||
// Date lines match ISO format
|
|
||||||
if (/^\d{4}-\d{2}-\d{2}T/.test(trimmed)) {
|
|
||||||
currentDate = trimmed;
|
|
||||||
} else if (currentDate) {
|
|
||||||
const filePath = `./${trimmed}`; // fileURLToPath includes leading ./, so we do the same here
|
|
||||||
if (!cache.has(filePath)) {
|
|
||||||
cache.set(filePath, currentDate); // e.g., "src/content/docs/file.mdx"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
console.timeEnd("[sitemap] Building git date cache");
|
|
||||||
console.log(`[sitemap] Loaded git dates for ${cache.size} files`);
|
|
||||||
return cache;
|
|
||||||
} catch (error) {
|
|
||||||
console.warn("[sitemap] Failed to build git date cache:", error);
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const gitDateCache = buildGitDateCache();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the last Git modification date for a file (from cache)
|
|
||||||
* @param filePath - Path to the file
|
|
||||||
* @returns ISO date string or null if not available
|
|
||||||
*/
|
|
||||||
function getGitLastModified(filePath: string): string | undefined {
|
|
||||||
if (!gitDateCache) {
|
|
||||||
console.warn("[sitemap] Git date cache is not initialized");
|
|
||||||
return undefined;
|
|
||||||
}
|
|
||||||
|
|
||||||
const result = gitDateCache.get(filePath);
|
|
||||||
|
|
||||||
if (!result) {
|
|
||||||
console.log(`[sitemap] Last modified not found in git for: "${filePath}"`);
|
|
||||||
}
|
|
||||||
|
|
||||||
return result ?? undefined;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Convert a sitemap URL to the corresponding source file path
|
|
||||||
* @param url - The full URL from the sitemap
|
|
||||||
* @returns Absolute file path or null if not found
|
|
||||||
*/
|
|
||||||
function urlToFilePath(url: string): string | null {
|
|
||||||
try {
|
|
||||||
const urlObj = new URL(url);
|
|
||||||
const pathname = urlObj.pathname.replace(/\/$/, ""); // Remove trailing slash
|
|
||||||
|
|
||||||
// Try different file extensions and paths
|
|
||||||
const possiblePaths = [
|
|
||||||
`./src/content/docs${pathname}.md`,
|
|
||||||
`./src/content/docs${pathname}.mdx`,
|
|
||||||
`./src/content/docs${pathname}/index.md`,
|
|
||||||
`./src/content/docs${pathname}/index.mdx`,
|
|
||||||
];
|
|
||||||
|
|
||||||
for (const path of possiblePaths) {
|
|
||||||
if (existsSync(path)) {
|
|
||||||
return path;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
} catch (_error) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function addLastModDate(item: SitemapItem) {
|
|
||||||
const filePath = urlToFilePath(item.url);
|
|
||||||
if (filePath) {
|
|
||||||
const gitDate = getGitLastModified(filePath);
|
|
||||||
if (gitDate) {
|
|
||||||
item.lastmod = gitDate;
|
|
||||||
} else {
|
|
||||||
console.warn(
|
|
||||||
`[sitemap] No git last mod date found for ${filePath} (${item.url}) - setting to now`,
|
|
||||||
);
|
|
||||||
item.lastmod = new Date().toISOString();
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
console.warn(
|
|
||||||
`[sitemap] Could not find source file for ${item.url} - setting last modified to now`,
|
|
||||||
);
|
|
||||||
item.lastmod = new Date().toISOString();
|
|
||||||
}
|
|
||||||
return item;
|
|
||||||
}
|
|
||||||
|
|
||||||
// https://astro.build/config
|
// https://astro.build/config
|
||||||
export default defineConfig({
|
export default defineConfig({
|
||||||
site: "https://developers.cloudflare.com",
|
site: "https://developers.cloudflare.com",
|
||||||
|
|
@ -324,9 +195,7 @@ export default defineConfig({
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
},
|
},
|
||||||
serialize(item) {
|
serialize: createSitemapLastmodSerializer(),
|
||||||
return addLastModDate(item);
|
|
||||||
},
|
|
||||||
}),
|
}),
|
||||||
react(),
|
react(),
|
||||||
],
|
],
|
||||||
|
|
|
||||||
126
sitemap.serializer.ts
Normal file
126
sitemap.serializer.ts
Normal file
|
|
@ -0,0 +1,126 @@
|
||||||
|
import { spawn } from "node:child_process";
|
||||||
|
import * as readline from "node:readline";
|
||||||
|
import type { SitemapItem } from "@astrojs/sitemap";
|
||||||
|
import { existsSync } from "fs";
|
||||||
|
import { green, blue, dim } from "kleur/colors";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Meant to mimic how Astro prints duration during logging
|
||||||
|
* (i.e. build/util.ts from astro/core).
|
||||||
|
*/
|
||||||
|
export function readableMsDuration(duration: number) {
|
||||||
|
return duration < 1000
|
||||||
|
? `${Math.round(duration)}ms`
|
||||||
|
: `${(duration / 1000).toFixed(2)}s`;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A helper function that uses git shell commands to get last modified dates.
|
||||||
|
* Note: it is important that this is only called once with all relevant paths as opposed to calling this separately for individual paths.
|
||||||
|
*/
|
||||||
|
async function getLastmodViaGitShell(...dirs: string[]) {
|
||||||
|
const gitArgs = [
|
||||||
|
"log",
|
||||||
|
"--pretty=format:DATE: %cI",
|
||||||
|
"--diff-filter=AMR",
|
||||||
|
"--name-only",
|
||||||
|
...dirs,
|
||||||
|
];
|
||||||
|
|
||||||
|
return new Promise<Map<string, Date>>((resolve, reject) => {
|
||||||
|
const git = spawn("git", gitArgs, { cwd: process.cwd() });
|
||||||
|
// Use readline to process output to reduce memory usage since output will be large.
|
||||||
|
const rl = readline.createInterface({
|
||||||
|
input: git.stdout!,
|
||||||
|
crlfDelay: Infinity,
|
||||||
|
});
|
||||||
|
|
||||||
|
const lastmodMetadata = new Map<string, Date>();
|
||||||
|
let currentDate: Date;
|
||||||
|
|
||||||
|
rl.on("line", (rawLine) => {
|
||||||
|
const line = rawLine.trim();
|
||||||
|
if (!line) return;
|
||||||
|
if (line.startsWith("DATE: ")) {
|
||||||
|
// Cut off the 'DATE: ' prefix and use Date class to handle time zones
|
||||||
|
currentDate = new Date(line.slice(6));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const file = line;
|
||||||
|
if (!lastmodMetadata.has(file) && currentDate) {
|
||||||
|
lastmodMetadata.set(file, currentDate);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
let stderr = "";
|
||||||
|
git.stderr?.on("data", (chunk) => (stderr += chunk.toString()));
|
||||||
|
git.on("error", reject);
|
||||||
|
|
||||||
|
git.on("close", (code) => {
|
||||||
|
rl.close();
|
||||||
|
if (code !== 0)
|
||||||
|
return reject(new Error(`git exited with code ${code}: ${stderr}`));
|
||||||
|
resolve(lastmodMetadata);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Attempt to find corresponding source file path for a sitemap item.
|
||||||
|
* This only looks for Starlight files in `src/content/docs`.
|
||||||
|
* @todo Leverage Astro's IntegrationResolvedRoute.entrypoint to account for pages in `src/pages`.
|
||||||
|
* @returns Relative file path or null if not found
|
||||||
|
*/
|
||||||
|
function getSourceFile(item: SitemapItem) {
|
||||||
|
const url = new URL(item.url);
|
||||||
|
// Remove trailing slash
|
||||||
|
const pathname = url.pathname.replace(/\/$/, "");
|
||||||
|
|
||||||
|
// Try different file extensions and paths
|
||||||
|
const possiblePaths = [
|
||||||
|
`src/content/docs${pathname}.md`,
|
||||||
|
`src/content/docs${pathname}.mdx`,
|
||||||
|
`src/content/docs${pathname}/index.md`,
|
||||||
|
`src/content/docs${pathname}/index.mdx`,
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const path of possiblePaths) {
|
||||||
|
if (existsSync(path)) {
|
||||||
|
return path;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function createSitemapLastmodSerializer() {
|
||||||
|
let lastModMetadata: Map<string, Date> | undefined = undefined;
|
||||||
|
const currentDateString = new Date().toISOString();
|
||||||
|
return async (item: SitemapItem) => {
|
||||||
|
const filePath = getSourceFile(item);
|
||||||
|
// Only calculate metadata once
|
||||||
|
if (lastModMetadata === undefined) {
|
||||||
|
const startTime = performance.now();
|
||||||
|
lastModMetadata = await getLastmodViaGitShell(
|
||||||
|
"src/content/docs",
|
||||||
|
"src/pages/*.astro",
|
||||||
|
);
|
||||||
|
const endTime = performance.now();
|
||||||
|
|
||||||
|
// Mimic the Astro logger output
|
||||||
|
console.log(
|
||||||
|
dim(new Date().toLocaleTimeString("en-US", { hour12: false })),
|
||||||
|
blue("[@cloudflare/lastmod-serializer]"),
|
||||||
|
green(
|
||||||
|
`✓ Lastmod metadata calculated in ${readableMsDuration(endTime - startTime)}.`,
|
||||||
|
),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if (filePath && lastModMetadata.has(filePath)) {
|
||||||
|
item.lastmod = lastModMetadata.get(filePath)!.toISOString();
|
||||||
|
} else {
|
||||||
|
item.lastmod = currentDateString;
|
||||||
|
}
|
||||||
|
return item;
|
||||||
|
};
|
||||||
|
}
|
||||||
Loading…
Add table
Reference in a new issue