11#! /usr/bin/env bash
22set -euo pipefail
33
4- # CHANGE: Add bash-only pre-commit guard that redacts probable GitHub/OAuth secrets in staged files .
5- # WHY: Avoid relying on Node runtime in hook execution and keep local push-protection checks deterministic .
4+ # CHANGE: Add staged knowledge secret guard with external scanner support .
5+ # WHY: Prefer proven scanners (gitleaks) when available, while keeping deterministic fallback redaction .
66
77ROOT_DIR=" $( git rev-parse --show-toplevel) "
88cd " $ROOT_DIR "
99
1010command -v git > /dev/null || { echo " ERROR: git is required" >&2 ; exit 1; }
1111command -v perl > /dev/null || { echo " ERROR: perl is required" >&2 ; exit 1; }
1212
13- SECRET_PATTERN=' \b(?:github_pat_|gho_|ghp_|ghu_|ghs_|ghr_|gha_)[A-Za-z0-9_]{20,255}\b'
13+ SECRET_PATTERN=' (\b(?:github_pat_|gho_|ghp_|ghu_|ghs_|ghr_|gha_)[A-Za-z0-9_]{20,255}\b|\bsk-(?!ant-)(?:proj-)?[A-Za-z0-9_-]{20,}\b|\bsk-ant-[A-Za-z0-9_-]{20,}\b|-----BEGIN(?: [A-Z0-9]+)* PRIVATE KEY-----)'
14+ HAS_GITLEAKS=0
15+
16+ if command -v gitleaks > /dev/null 2>&1 ; then
17+ HAS_GITLEAKS=1
18+ fi
19+
20+ is_knowledge_path () {
21+ local path=" $1 "
22+ [[ " $path " =~ (^| /)\. (knowledge| knowlenge)(/| $) ]]
23+ }
24+
25+ scan_with_gitleaks_file () {
26+ local file_path=" $1 "
27+ if [ " $HAS_GITLEAKS " -ne 1 ]; then
28+ printf ' %s\n' " skip"
29+ return
30+ fi
31+
32+ if gitleaks stdin --no-banner --redact --log-level error < " $file_path " > /dev/null 2>&1 ; then
33+ printf ' %s\n' " clean"
34+ return
35+ fi
36+
37+ local code=$?
38+ if [ " $code " -eq 1 ]; then
39+ printf ' %s\n' " hit"
40+ return
41+ fi
42+
43+ printf ' %s\n' " error"
44+ }
45+
46+ staged_blob_to_file () {
47+ local path=" $1 "
48+ local out=" $2 "
49+ git cat-file -p " :$path " > " $out "
50+ }
51+
52+ has_secret_in_staged_blob () {
53+ local staged_blob_path=" $1 "
54+ local gitleaks_state
55+ gitleaks_state=" $( scan_with_gitleaks_file " $staged_blob_path " ) "
56+
57+ if [ " $gitleaks_state " = " hit" ]; then
58+ return 0
59+ fi
60+ if grep -Pq " $SECRET_PATTERN " " $staged_blob_path " ; then
61+ return 0
62+ fi
63+ return 1
64+ }
1465
1566redacted_count=0
1667manual_fix_files=()
1768has_staged_files=0
1869
1970TMP_DIR=$( mktemp -d)
2071trap ' rm -rf "$TMP_DIR"' EXIT
72+ index=0
2173
2274while IFS= read -r -d ' ' path; do
2375 if [ -z " $path " ]; then
2476 continue
2577 fi
78+ if ! is_knowledge_path " $path " ; then
79+ continue
80+ fi
2681
2782 if ! git cat-file -e " :$path " 2> /dev/null; then
2883 continue
2984 fi
3085
3186 has_staged_files=1
32- tmp_path=" ${TMP_DIR} /entry"
33- has_unstaged=false
34-
35- if ! git diff --quiet -- " $path " ; then
36- has_unstaged=true
87+ has_unstaged=true
88+ if git diff --quiet -- " $path " ; then
89+ has_unstaged=false
3790 fi
3891
39- if [ " $has_unstaged " = true ]; then
40- git cat-file -p " :$path " > " $tmp_path "
41-
42- if grep -Pq " $SECRET_PATTERN " " $tmp_path " ; then
43- manual_fix_files+=(" $path " )
44- fi
45-
92+ index=$(( index + 1 ))
93+ tmp_path=" ${TMP_DIR} /entry-${index} "
94+ staged_blob_to_file " $path " " $tmp_path "
95+ if ! has_secret_in_staged_blob " $tmp_path " ; then
4696 continue
4797 fi
4898
49- if ! grep -Pq " $SECRET_PATTERN " " $path " ; then
99+ if [ " $has_unstaged " = true ]; then
100+ manual_fix_files+=(" $path " )
50101 continue
51102 fi
52103
53- perl -0pi -e ' s/\b(?:github_pat_|gho_|ghp_|ghu_|ghs_|ghr_|gha_)[A-Za-z0-9_]{20,255}\b/<REDACTED_GITHUB_TOKEN>/g' " $path "
104+ perl -0pi -e '
105+ s/\b(?:github_pat_|gho_|ghp_|ghu_|ghs_|ghr_|gha_)[A-Za-z0-9_]{20,255}\b/<REDACTED_GITHUB_TOKEN>/g;
106+ s/\bsk-ant-[A-Za-z0-9_-]{20,}\b/<REDACTED_ANTHROPIC_KEY>/g;
107+ s/\bsk-(?!ant-)(?:proj-)?[A-Za-z0-9_-]{20,}\b/<REDACTED_OPENAI_KEY>/g;
108+ s/-----BEGIN(?: [A-Z0-9]+)* PRIVATE KEY-----[\s\S]*?-----END(?: [A-Z0-9]+)* PRIVATE KEY-----/<REDACTED_PRIVATE_KEY>/g;
109+ ' " $path "
54110 git add -- " $path "
55- redacted_count=$(( redacted_count + 1 ))
111+
112+ redacted_path=" ${TMP_DIR} /post-redacted-${index} "
113+ staged_blob_to_file " $path " " $redacted_path "
114+ if has_secret_in_staged_blob " $redacted_path " ; then
115+ manual_fix_files+=(" $path " )
116+ else
117+ redacted_count=$(( redacted_count + 1 ))
118+ fi
56119done < <( git diff --cached --name-only --diff-filter=ACM -z)
57120
58121if [ " $has_staged_files " -eq 0 ]; then
59122 exit 0
60123fi
61124
62125if [ " ${# manual_fix_files[@]} " -gt 0 ]; then
63- echo " ERROR: secret-like tokens found in staged versions with unstaged changes."
126+ echo " ERROR: secret-like tokens found in staged .knowledge/.knowlenge files with unstaged changes."
64127 echo " Please fix these files manually in index or clear unstaged changes, then commit again:"
65128 for file in " ${manual_fix_files[@]} " ; do
66129 echo " - $file "
@@ -70,7 +133,11 @@ if [ "${#manual_fix_files[@]}" -gt 0 ]; then
70133fi
71134
72135if [ " $redacted_count " -gt 0 ]; then
73- echo " pre-commit: auto-redacted secrets in $redacted_count staged file(s)."
136+ if [ " $HAS_GITLEAKS " -eq 1 ]; then
137+ echo " pre-commit: auto-redacted secrets in $redacted_count staged .knowledge/.knowlenge file(s) (scanner: gitleaks + fallback)."
138+ else
139+ echo " pre-commit: auto-redacted secrets in $redacted_count staged .knowledge/.knowlenge file(s) (scanner: fallback regex)."
140+ fi
74141fi
75142
76143exit 0
0 commit comments