Example custom semgrep rule for detecting fixed time references that is stored in repo for scanning against pull requests (#26647)

* Include custom semgrep rule stored in repo for scanning against pull requests

* disable metrics and root path to avoid warnings

* This rule must use the generic semgrep parser

* include a way to skip the local semgrep scan by including [skip semgrep] in commit message

* Require a fetch-depth of 0 to get all of the history

* Iin CI we compare committed changes made but when run locally we want to consider all changes made to the working directory (including uncommitted)

* Improved warning message for coming soon and included both committed and uncommitted changes in the local semgrep check

* Avoid fatal git error on ownership within CLI working directory
This commit is contained in:
cd rubin 2025-11-24 15:40:08 +00:00 committed by GitHub
parent 6704faf669
commit 1e146bf316
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 105 additions and 2 deletions

View file

@ -2,12 +2,15 @@ on:
workflow_dispatch: {}
schedule:
- cron: "0 4 * * *"
pull_request: {}
name: Semgrep config
permissions:
contents: read
jobs:
semgrep:
name: semgrep/ci
name: semgrep
runs-on: ubuntu-latest
env:
SEMGREP_APP_TOKEN: ${{ secrets.SEMGREP_APP_TOKEN }}
@ -18,4 +21,35 @@ jobs:
image: semgrep/semgrep
steps:
- uses: actions/checkout@v4
- run: semgrep ci
with:
# fetch full history so Semgrep can compare against the base branch
fetch-depth: 0
# Semgrep CI to run on Schedule (Cron) or Manual Dispatch
# scans using managed rules at cloudflare.semgrep.dev
- name: Semgrep CI Rules (Managed rules at cloudflare.semgrep.dev)
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
run: semgrep ci
# Semgrep Scan to run on Pull Request events
# scans using rules inside the .semgrep/ folder and fails on error
# include [skip semgrep] in top-most commit message to skip scan
- name: Semgrep Repo Rules (Custom rules found in .semgrep/)
if: github.event_name == 'pull_request' && !contains(github.event.head_commit.message, '[skip semgrep]')
run: |
git config --global --add safe.directory $PWD
base_commit=$(git merge-base HEAD origin/$GITHUB_BASE_REF)
git diff $base_commit... --diff-filter=ACMRT --name-only | grep -E '\.(htm|html|yaml|yml|md|mdx)$' > tools/relevant_changed_files.txt || true
# Check if file list is empty to prevent errors
if [ -s tools/relevant_changed_files.txt ]; then
list_of_files=$(cat tools/relevant_changed_files.txt | tr '\n' ' ')
semgrep scan \
--config .semgrep --metrics=off \
--include "*.mdx" --include "*.mdx" \
$list_of_files
# add '--error' to return error code to workflow
else
echo "No relevant files changed."
fi

2
.gitignore vendored
View file

@ -30,3 +30,5 @@ pnpm-debug.log*
/worker/functions/
.idea
tools/relevant_changed_files.txt

View file

@ -0,0 +1,41 @@
rules:
- id: coming-soon
languages: [generic]
message: "Found forbidden string 'coming soon'. Too often we set expectations unfairly by attaching this phrase to a feature that may not actually arrive soon."
severity: MEDIUM
paths:
include:
- "*.htm"
- "*.html"
- "*.md"
- "*.mdx"
- "*.yaml"
- "*.yml"
exclude:
- "/src/content/changelog/**"
- "/src/content/release-notes/**"
- "/.semgrep/**"
- "/.github/**"
patterns:
- pattern-regex: "[Cc]oming [Ss]oon"
- id: potential-date
languages: [generic]
message: "Potential date found. Documentation should strive to represent universal truth, not something time-bound."
severity: MEDIUM
paths:
include:
- "*.htm"
- "*.html"
- "*.md"
- "*.mdx"
- "*.yaml"
- "*.yml"
exclude:
- "/src/content/changelog/**"
- "/src/content/release-notes/**"
- "/.semgrep/**"
- "/.github/**"
pattern-either:
- pattern-regex: Jan\| Feb\| Mar\| Apr\| May\| Jun\| Jul\| Aug\| Sep\| Nov\| Dec
- pattern-regex: \ 20[0-9][0-9]

26
tools/semgrep-repo-rules Executable file
View file

@ -0,0 +1,26 @@
#! /bin/bash
repo_root_dir="$(git rev-parse --show-toplevel)"
pushd "${repo_root_dir}" > /dev/null || return
base_commit=$(git merge-base HEAD origin/production)
git diff $base_commit... --diff-filter=ACMRT --name-only | grep -E '\.(htm|html|yaml|yml|md|mdx)$' > tools/relevant_changed_files.txt || true
# this file wants to also match uncommitted changes, not just commited changes (in CI this is not the case)
git diff --diff-filter=ACMRT --name-only | grep -E '\.(htm|html|yaml|yml|md|mdx)$' >> tools/relevant_changed_files.txt || true
if [ -s tools/relevant_changed_files.txt ]; then
list_of_files=$(cat tools/relevant_changed_files.txt | tr '\n' ' ')
docker run --rm -v "${PWD}:/src" semgrep/semgrep \
semgrep scan \
--config .semgrep --metrics=off \
--include "*.mdx" --include "*.mdx" \
--force-color \
$list_of_files
else
echo "No relevant files changed."
fi
popd > /dev/null || return