semgrep github workflow updated to show style guide warnings in ./semgrep as annotations and error

* semgrep github workflow updated to show warnings

* Add explicit messaging for how to skip semgrep check and intensify messaging to produce errors

* adding passthrough error code handling to jq and model the same in the semgrep-repo-rules tool

* Use $PIPESTATUS to get error code of item in piped command list

* show the error code values along pipeline to seek issue

* Specify bash in shell config as sh is the default inside a container

* Exit with correct error code

* show semgrep messages as warning annotations to distinguish from semgrep error code

* add use of [skip style guide check] in commit message

* Set COMMIT_MESSAGE environment variable with last commit message

* COMMIT_MESSAGE needs to set to the second to last message to skip the autogenerated merge message

* Grabbing commit SHA from the pull_request event

* Add explantory message inside configure step

* Show commit message cleanly in configure step

* Use tee to set the environment variable and show the value it is set to

* keep semgrep return code intact from local tool run
This commit is contained in:
cd rubin 2025-12-05 11:28:06 +00:00 committed by GitHub
parent 37ad63efb3
commit d9ee7611b7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 88 additions and 49 deletions

View file

@ -25,20 +25,31 @@ jobs:
# fetch full history so Semgrep can compare against the base branch
fetch-depth: 0
# Configure
# add git safe directory to enable git commands on checkout path
# set COMMIT_MESSAGE environment variable to be able to skip semgrep if requested
- name: Configure
run: |
git config --global --add safe.directory $PWD
echo "COMMIT_MESSAGE=\"$(git log --format=%B -n 1 ${{ github.event.pull_request.head.sha }})\"" | tee /dev/stderr >> "$GITHUB_ENV"
echo "(if the last commit message contains '[skip style guide checks]' Semgrep style guide rule checks will be skipped)"
# Semgrep CI to run on Schedule (Cron) or Manual Dispatch
# scans using managed rules at cloudflare.semgrep.dev
- name: Semgrep CI Rules (Managed rules at cloudflare.semgrep.dev)
- name: Semgrep managed rules (managed at cloudflare.semgrep.dev)
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
run: semgrep ci
# Semgrep Scan to run on Pull Request events
# scans using rules inside the .semgrep/ folder and fails on error
# include [skip semgrep] in top-most commit message to skip scan
- name: Semgrep Repo Rules (Custom rules found in .semgrep/)
if: github.event_name == 'pull_request' && !contains(github.event.head_commit.message, '[skip semgrep]')
- name: Semgrep style guide rules (stored in .semgrep/)
shell: bash
if: github.event_name == 'pull_request' && !contains(env.COMMIT_MESSAGE, '[skip style guide checks]')
run: |
git config --global --add safe.directory $PWD
echo "env.COMMIT_MESSAGE: ${{ env.COMMIT_MESSAGE }}"
base_commit=$(git merge-base HEAD origin/$GITHUB_BASE_REF)
git diff $base_commit... --diff-filter=ACMRT --name-only | grep -E '\.(htm|html|yaml|yml|md|mdx)$' > tools/relevant_changed_files.txt || true
@ -48,8 +59,12 @@ jobs:
semgrep scan \
--config .semgrep --metrics=off \
--include "*.mdx" --include "*.mdx" \
$list_of_files
# add '--error' to return error code to workflow
--error \
--json \
$list_of_files \
| jq --raw-output ".results[] | \"::warning file=\(.path),line=\(.start.line),title=\(.check_id)::\(.extra.message)\""
exit ${PIPESTATUS[0]}
else
echo "No relevant files changed."
echo "No relevant files changed"
fi

View file

@ -1,41 +0,0 @@
rules:
- id: coming-soon
languages: [generic]
message: "Found forbidden string 'coming soon'. Too often we set expectations unfairly by attaching this phrase to a feature that may not actually arrive soon."
severity: MEDIUM
paths:
include:
- "*.htm"
- "*.html"
- "*.md"
- "*.mdx"
- "*.yaml"
- "*.yml"
exclude:
- "/src/content/changelog/**"
- "/src/content/release-notes/**"
- "/.semgrep/**"
- "/.github/**"
patterns:
- pattern-regex: "[Cc]oming [Ss]oon"
- id: potential-date
languages: [generic]
message: "Potential date found. Documentation should strive to represent universal truth, not something time-bound."
severity: MEDIUM
paths:
include:
- "*.htm"
- "*.html"
- "*.md"
- "*.mdx"
- "*.yaml"
- "*.yml"
exclude:
- "/src/content/changelog/**"
- "/src/content/release-notes/**"
- "/.semgrep/**"
- "/.github/**"
pattern-either:
- pattern-regex: Jan\| Feb\| Mar\| Apr\| May\| Jun\| Jul\| Aug\| Sep\| Nov\| Dec
- pattern-regex: \ 20[0-9][0-9]

View file

@ -0,0 +1,61 @@
rules:
- id: style-guide-coming-soon
languages: [generic]
message: "Found forbidden string 'coming soon'. Too often we set expectations unfairly by attaching this phrase to a feature that may not actually arrive soon. (add [skip style guide checks] to commit message to skip)"
severity: MEDIUM
paths:
include:
- "*.htm"
- "*.html"
- "*.md"
- "*.mdx"
- "*.yaml"
- "*.yml"
exclude:
- "/src/content/changelog/**"
- "/src/content/release-notes/**"
- "/.semgrep/**"
- "/.github/**"
patterns:
- pattern-regex: "[Cc]oming [Ss]oon"
- id: style-guide-potential-date-month
languages: [generic]
message: "Potential month found. Documentation should strive to represent universal truth, not something time-bound. (add [skip style guide checks] to commit message to skip)"
severity: MEDIUM
paths:
include:
- "*.htm"
- "*.html"
- "*.md"
- "*.mdx"
- "*.yaml"
- "*.yml"
exclude:
- "/src/content/changelog/**"
- "/src/content/release-notes/**"
- "/.semgrep/**"
- "/.github/**"
patterns:
- pattern-regex: "Jan|Feb|Mar[^k]|Apr|May[^b]|Jun[^k]|Jul|Aug|Sep|Nov|Dec[^i]"
- id: style-guide-potential-date-year
languages: [generic]
message: "Potential year found. Documentation should strive to represent universal truth, not something time-bound. (add [skip style guide checks] to commit message to skip)"
severity: MEDIUM
paths:
include:
- "*.htm"
- "*.html"
- "*.md"
- "*.mdx"
- "*.yaml"
- "*.yml"
exclude:
- "/src/content/changelog/**"
- "/src/content/release-notes/**"
- "/.semgrep/**"
- "/.github/**"
patterns:
# ignore 2xxx- with a - at the end (double-escape because in string and not a range operator!)
- pattern-regex: "20[0-9][0-9][^\\-]"

View file

@ -1,5 +1,6 @@
#! /bin/bash
repo_root_dir="$(git rev-parse --show-toplevel)"
pushd "${repo_root_dir}" > /dev/null || return
@ -17,8 +18,11 @@ if [ -s tools/relevant_changed_files.txt ]; then
semgrep scan \
--config .semgrep --metrics=off \
--include "*.mdx" --include "*.mdx" \
--force-color \
--error \
$list_of_files
semgrep_return_code=$?
echo "return code: $semgrep_return_code"
exit $semgrep_return_code
else
echo "No relevant files changed."
fi