mirror of
https://github.com/cloudflare/cloudflare-docs.git
synced 2026-01-11 20:06:58 +00:00
dx: use a single git cmd for lastmod calc (#26722)
* dx: use a single git cmd for lastmod calc * fix: use commiter date for lastmod
This commit is contained in:
parent
e201c05f5e
commit
b4697aa180
2 changed files with 129 additions and 134 deletions
137
astro.config.ts
137
astro.config.ts
|
|
@ -6,13 +6,11 @@ import liveCode from "astro-live-code";
|
|||
import starlightLinksValidator from "starlight-links-validator";
|
||||
import starlightScrollToTop from "starlight-scroll-to-top";
|
||||
import icon from "astro-icon";
|
||||
import sitemap, { type SitemapItem } from "@astrojs/sitemap";
|
||||
import sitemap from "@astrojs/sitemap";
|
||||
import react from "@astrojs/react";
|
||||
|
||||
import { readdir } from "fs/promises";
|
||||
import { fileURLToPath } from "url";
|
||||
import { execSync } from "child_process";
|
||||
import { existsSync } from "fs";
|
||||
|
||||
import remarkValidateImages from "./src/plugins/remark/validate-images";
|
||||
|
||||
|
|
@ -22,6 +20,7 @@ import rehypeAutolinkHeadings from "./src/plugins/rehype/autolink-headings.ts";
|
|||
import rehypeExternalLinks from "./src/plugins/rehype/external-links.ts";
|
||||
import rehypeHeadingSlugs from "./src/plugins/rehype/heading-slugs.ts";
|
||||
import rehypeShiftHeadings from "./src/plugins/rehype/shift-headings.ts";
|
||||
import { createSitemapLastmodSerializer } from "./sitemap.serializer.ts";
|
||||
|
||||
async function autogenSections() {
|
||||
const sections = (
|
||||
|
|
@ -62,134 +61,6 @@ const customCss = await autogenStyles();
|
|||
const RUN_LINK_CHECK =
|
||||
process.env.RUN_LINK_CHECK?.toLowerCase() === "true" || false;
|
||||
|
||||
/**
|
||||
* Build a cache of all git last-modified dates in one batch
|
||||
*/
|
||||
function buildGitDateCache(): Map<string, string> | null {
|
||||
try {
|
||||
console.time("[sitemap] Building git date cache");
|
||||
|
||||
// Use git log with --name-only and --diff-filter to get all files with their last commit
|
||||
// The format outputs the commit date followed by the list of files changed in that commit
|
||||
// e.g.
|
||||
// 2025-10-01T12:34:56-07:00
|
||||
// src/content/docs/file1.mdx
|
||||
// src/content/docs/file2.mdx
|
||||
//
|
||||
// 2025-09-25T09:15:30-07:00
|
||||
// src/content/docs/file3.mdx
|
||||
|
||||
const result = execSync(
|
||||
'git log --pretty=format:"%cI" --name-only --diff-filter=AMR src/content/docs',
|
||||
{
|
||||
encoding: "utf-8",
|
||||
maxBuffer: 100 * 1024 * 1024,
|
||||
},
|
||||
);
|
||||
|
||||
const cache = new Map<string, string>();
|
||||
const lines = result.split("\n");
|
||||
|
||||
let currentDate: string | null = null;
|
||||
for (const line of lines) {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed) {
|
||||
continue;
|
||||
}
|
||||
// Lines are either dates or file paths
|
||||
// Date lines match ISO format
|
||||
if (/^\d{4}-\d{2}-\d{2}T/.test(trimmed)) {
|
||||
currentDate = trimmed;
|
||||
} else if (currentDate) {
|
||||
const filePath = `./${trimmed}`; // fileURLToPath includes leading ./, so we do the same here
|
||||
if (!cache.has(filePath)) {
|
||||
cache.set(filePath, currentDate); // e.g., "src/content/docs/file.mdx"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
console.timeEnd("[sitemap] Building git date cache");
|
||||
console.log(`[sitemap] Loaded git dates for ${cache.size} files`);
|
||||
return cache;
|
||||
} catch (error) {
|
||||
console.warn("[sitemap] Failed to build git date cache:", error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
const gitDateCache = buildGitDateCache();
|
||||
|
||||
/**
|
||||
* Get the last Git modification date for a file (from cache)
|
||||
* @param filePath - Path to the file
|
||||
* @returns ISO date string or null if not available
|
||||
*/
|
||||
function getGitLastModified(filePath: string): string | undefined {
|
||||
if (!gitDateCache) {
|
||||
console.warn("[sitemap] Git date cache is not initialized");
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const result = gitDateCache.get(filePath);
|
||||
|
||||
if (!result) {
|
||||
console.log(`[sitemap] Last modified not found in git for: "${filePath}"`);
|
||||
}
|
||||
|
||||
return result ?? undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a sitemap URL to the corresponding source file path
|
||||
* @param url - The full URL from the sitemap
|
||||
* @returns Absolute file path or null if not found
|
||||
*/
|
||||
function urlToFilePath(url: string): string | null {
|
||||
try {
|
||||
const urlObj = new URL(url);
|
||||
const pathname = urlObj.pathname.replace(/\/$/, ""); // Remove trailing slash
|
||||
|
||||
// Try different file extensions and paths
|
||||
const possiblePaths = [
|
||||
`./src/content/docs${pathname}.md`,
|
||||
`./src/content/docs${pathname}.mdx`,
|
||||
`./src/content/docs${pathname}/index.md`,
|
||||
`./src/content/docs${pathname}/index.mdx`,
|
||||
];
|
||||
|
||||
for (const path of possiblePaths) {
|
||||
if (existsSync(path)) {
|
||||
return path;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
} catch (_error) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function addLastModDate(item: SitemapItem) {
|
||||
const filePath = urlToFilePath(item.url);
|
||||
if (filePath) {
|
||||
const gitDate = getGitLastModified(filePath);
|
||||
if (gitDate) {
|
||||
item.lastmod = gitDate;
|
||||
} else {
|
||||
console.warn(
|
||||
`[sitemap] No git last mod date found for ${filePath} (${item.url}) - setting to now`,
|
||||
);
|
||||
item.lastmod = new Date().toISOString();
|
||||
}
|
||||
} else {
|
||||
console.warn(
|
||||
`[sitemap] Could not find source file for ${item.url} - setting last modified to now`,
|
||||
);
|
||||
item.lastmod = new Date().toISOString();
|
||||
}
|
||||
return item;
|
||||
}
|
||||
|
||||
// https://astro.build/config
|
||||
export default defineConfig({
|
||||
site: "https://developers.cloudflare.com",
|
||||
|
|
@ -324,9 +195,7 @@ export default defineConfig({
|
|||
|
||||
return true;
|
||||
},
|
||||
serialize(item) {
|
||||
return addLastModDate(item);
|
||||
},
|
||||
serialize: createSitemapLastmodSerializer(),
|
||||
}),
|
||||
react(),
|
||||
],
|
||||
|
|
|
|||
126
sitemap.serializer.ts
Normal file
126
sitemap.serializer.ts
Normal file
|
|
@ -0,0 +1,126 @@
|
|||
import { spawn } from "node:child_process";
|
||||
import * as readline from "node:readline";
|
||||
import type { SitemapItem } from "@astrojs/sitemap";
|
||||
import { existsSync } from "fs";
|
||||
import { green, blue, dim } from "kleur/colors";
|
||||
|
||||
/**
|
||||
* Meant to mimic how Astro prints duration during logging
|
||||
* (i.e. build/util.ts from astro/core).
|
||||
*/
|
||||
export function readableMsDuration(duration: number) {
|
||||
return duration < 1000
|
||||
? `${Math.round(duration)}ms`
|
||||
: `${(duration / 1000).toFixed(2)}s`;
|
||||
}
|
||||
|
||||
/**
|
||||
* A helper function that uses git shell commands to get last modified dates.
|
||||
* Note: it is important that this is only called once with all relevant paths as opposed to calling this separately for individual paths.
|
||||
*/
|
||||
async function getLastmodViaGitShell(...dirs: string[]) {
|
||||
const gitArgs = [
|
||||
"log",
|
||||
"--pretty=format:DATE: %cI",
|
||||
"--diff-filter=AMR",
|
||||
"--name-only",
|
||||
...dirs,
|
||||
];
|
||||
|
||||
return new Promise<Map<string, Date>>((resolve, reject) => {
|
||||
const git = spawn("git", gitArgs, { cwd: process.cwd() });
|
||||
// Use readline to process output to reduce memory usage since output will be large.
|
||||
const rl = readline.createInterface({
|
||||
input: git.stdout!,
|
||||
crlfDelay: Infinity,
|
||||
});
|
||||
|
||||
const lastmodMetadata = new Map<string, Date>();
|
||||
let currentDate: Date;
|
||||
|
||||
rl.on("line", (rawLine) => {
|
||||
const line = rawLine.trim();
|
||||
if (!line) return;
|
||||
if (line.startsWith("DATE: ")) {
|
||||
// Cut off the 'DATE: ' prefix and use Date class to handle time zones
|
||||
currentDate = new Date(line.slice(6));
|
||||
return;
|
||||
}
|
||||
const file = line;
|
||||
if (!lastmodMetadata.has(file) && currentDate) {
|
||||
lastmodMetadata.set(file, currentDate);
|
||||
}
|
||||
});
|
||||
|
||||
let stderr = "";
|
||||
git.stderr?.on("data", (chunk) => (stderr += chunk.toString()));
|
||||
git.on("error", reject);
|
||||
|
||||
git.on("close", (code) => {
|
||||
rl.close();
|
||||
if (code !== 0)
|
||||
return reject(new Error(`git exited with code ${code}: ${stderr}`));
|
||||
resolve(lastmodMetadata);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempt to find corresponding source file path for a sitemap item.
|
||||
* This only looks for Starlight files in `src/content/docs`.
|
||||
* @todo Leverage Astro's IntegrationResolvedRoute.entrypoint to account for pages in `src/pages`.
|
||||
* @returns Relative file path or null if not found
|
||||
*/
|
||||
function getSourceFile(item: SitemapItem) {
|
||||
const url = new URL(item.url);
|
||||
// Remove trailing slash
|
||||
const pathname = url.pathname.replace(/\/$/, "");
|
||||
|
||||
// Try different file extensions and paths
|
||||
const possiblePaths = [
|
||||
`src/content/docs${pathname}.md`,
|
||||
`src/content/docs${pathname}.mdx`,
|
||||
`src/content/docs${pathname}/index.md`,
|
||||
`src/content/docs${pathname}/index.mdx`,
|
||||
];
|
||||
|
||||
for (const path of possiblePaths) {
|
||||
if (existsSync(path)) {
|
||||
return path;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
export function createSitemapLastmodSerializer() {
|
||||
let lastModMetadata: Map<string, Date> | undefined = undefined;
|
||||
const currentDateString = new Date().toISOString();
|
||||
return async (item: SitemapItem) => {
|
||||
const filePath = getSourceFile(item);
|
||||
// Only calculate metadata once
|
||||
if (lastModMetadata === undefined) {
|
||||
const startTime = performance.now();
|
||||
lastModMetadata = await getLastmodViaGitShell(
|
||||
"src/content/docs",
|
||||
"src/pages/*.astro",
|
||||
);
|
||||
const endTime = performance.now();
|
||||
|
||||
// Mimic the Astro logger output
|
||||
console.log(
|
||||
dim(new Date().toLocaleTimeString("en-US", { hour12: false })),
|
||||
blue("[@cloudflare/lastmod-serializer]"),
|
||||
green(
|
||||
`✓ Lastmod metadata calculated in ${readableMsDuration(endTime - startTime)}.`,
|
||||
),
|
||||
);
|
||||
}
|
||||
if (filePath && lastModMetadata.has(filePath)) {
|
||||
item.lastmod = lastModMetadata.get(filePath)!.toISOString();
|
||||
} else {
|
||||
item.lastmod = currentDateString;
|
||||
}
|
||||
return item;
|
||||
};
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue