Skip to content

Commit eeb8aa7

Browse files
committed
feat(hooks): use gitleaks for knowledge secret scanning when available
1 parent d2492e7 commit eeb8aa7

File tree

2 files changed

+132
-24
lines changed

2 files changed

+132
-24
lines changed

scripts/pre-commit-secret-guard.sh

Lines changed: 71 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
#!/usr/bin/env bash
22
set -euo pipefail
33

4-
# CHANGE: Add bash-only pre-commit guard that redacts secret-like tokens in staged .knowledge/.knowlenge files.
5-
# WHY: Keep knowledge folders safe to commit even when users paste credentials by mistake.
4+
# CHANGE: Add staged knowledge secret guard with external scanner support.
5+
# WHY: Prefer proven scanners (gitleaks) when available, while keeping deterministic fallback redaction.
66

77
ROOT_DIR="$(git rev-parse --show-toplevel)"
88
cd "$ROOT_DIR"
@@ -11,18 +11,65 @@ command -v git >/dev/null || { echo "ERROR: git is required" >&2; exit 1; }
1111
command -v perl >/dev/null || { echo "ERROR: perl is required" >&2; exit 1; }
1212

1313
SECRET_PATTERN='(\b(?:github_pat_|gho_|ghp_|ghu_|ghs_|ghr_|gha_)[A-Za-z0-9_]{20,255}\b|\bsk-(?!ant-)(?:proj-)?[A-Za-z0-9_-]{20,}\b|\bsk-ant-[A-Za-z0-9_-]{20,}\b|-----BEGIN(?: [A-Z0-9]+)* PRIVATE KEY-----)'
14+
HAS_GITLEAKS=0
15+
16+
if command -v gitleaks >/dev/null 2>&1; then
17+
HAS_GITLEAKS=1
18+
fi
1419

1520
is_knowledge_path() {
1621
local path="$1"
1722
[[ "$path" =~ (^|/)\.(knowledge|knowlenge)(/|$) ]]
1823
}
1924

25+
scan_with_gitleaks_file() {
26+
local file_path="$1"
27+
if [ "$HAS_GITLEAKS" -ne 1 ]; then
28+
printf '%s\n' "skip"
29+
return
30+
fi
31+
32+
if gitleaks stdin --no-banner --redact --log-level error < "$file_path" >/dev/null 2>&1; then
33+
printf '%s\n' "clean"
34+
return
35+
fi
36+
37+
local code=$?
38+
if [ "$code" -eq 1 ]; then
39+
printf '%s\n' "hit"
40+
return
41+
fi
42+
43+
printf '%s\n' "error"
44+
}
45+
46+
staged_blob_to_file() {
47+
local path="$1"
48+
local out="$2"
49+
git cat-file -p ":$path" > "$out"
50+
}
51+
52+
has_secret_in_staged_blob() {
53+
local staged_blob_path="$1"
54+
local gitleaks_state
55+
gitleaks_state="$(scan_with_gitleaks_file "$staged_blob_path")"
56+
57+
if [ "$gitleaks_state" = "hit" ]; then
58+
return 0
59+
fi
60+
if grep -Pq "$SECRET_PATTERN" "$staged_blob_path"; then
61+
return 0
62+
fi
63+
return 1
64+
}
65+
2066
redacted_count=0
2167
manual_fix_files=()
2268
has_staged_files=0
2369

2470
TMP_DIR=$(mktemp -d)
2571
trap 'rm -rf "$TMP_DIR"' EXIT
72+
index=0
2673

2774
while IFS= read -r -d '' path; do
2875
if [ -z "$path" ]; then
@@ -37,23 +84,20 @@ while IFS= read -r -d '' path; do
3784
fi
3885

3986
has_staged_files=1
40-
tmp_path="${TMP_DIR}/entry"
41-
has_unstaged=false
42-
43-
if ! git diff --quiet -- "$path"; then
44-
has_unstaged=true
87+
has_unstaged=true
88+
if git diff --quiet -- "$path"; then
89+
has_unstaged=false
4590
fi
4691

47-
if [ "$has_unstaged" = true ]; then
48-
git cat-file -p ":$path" > "$tmp_path"
49-
if grep -Pq "$SECRET_PATTERN" "$tmp_path"; then
50-
manual_fix_files+=("$path")
51-
fi
52-
92+
index=$((index + 1))
93+
tmp_path="${TMP_DIR}/entry-${index}"
94+
staged_blob_to_file "$path" "$tmp_path"
95+
if ! has_secret_in_staged_blob "$tmp_path"; then
5396
continue
5497
fi
5598

56-
if ! grep -Pq "$SECRET_PATTERN" "$path"; then
99+
if [ "$has_unstaged" = true ]; then
100+
manual_fix_files+=("$path")
57101
continue
58102
fi
59103

@@ -64,7 +108,14 @@ while IFS= read -r -d '' path; do
64108
s/-----BEGIN(?: [A-Z0-9]+)* PRIVATE KEY-----[\s\S]*?-----END(?: [A-Z0-9]+)* PRIVATE KEY-----/<REDACTED_PRIVATE_KEY>/g;
65109
' "$path"
66110
git add -- "$path"
67-
redacted_count=$((redacted_count + 1))
111+
112+
redacted_path="${TMP_DIR}/post-redacted-${index}"
113+
staged_blob_to_file "$path" "$redacted_path"
114+
if has_secret_in_staged_blob "$redacted_path"; then
115+
manual_fix_files+=("$path")
116+
else
117+
redacted_count=$((redacted_count + 1))
118+
fi
68119
done < <(git diff --cached --name-only --diff-filter=ACM -z)
69120

70121
if [ "$has_staged_files" -eq 0 ]; then
@@ -82,7 +133,11 @@ if [ "${#manual_fix_files[@]}" -gt 0 ]; then
82133
fi
83134

84135
if [ "$redacted_count" -gt 0 ]; then
85-
echo "pre-commit: auto-redacted secrets in $redacted_count staged .knowledge/.knowlenge file(s)."
136+
if [ "$HAS_GITLEAKS" -eq 1 ]; then
137+
echo "pre-commit: auto-redacted secrets in $redacted_count staged .knowledge/.knowlenge file(s) (scanner: gitleaks + fallback)."
138+
else
139+
echo "pre-commit: auto-redacted secrets in $redacted_count staged .knowledge/.knowlenge file(s) (scanner: fallback regex)."
140+
fi
86141
fi
87142

88143
exit 0

scripts/pre-push-knowledge-guard.js

Lines changed: 61 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
// SOURCE: n/a
88
// FORMAT THEOREM: ∀b ∈ Blobs(pushedRange, knowledgePaths): size(b) ≤ MAX_BYTES ∧ noSecrets(b) → pushAllowed
99
// PURITY: SHELL (git IO)
10-
// INVARIANT: No pushed blob in knowledge paths exceeds MAX_BYTES or matches secret patterns.
10+
// INVARIANT: No pushed blob in knowledge paths exceeds MAX_BYTES or matches secret patterns (regex + optional gitleaks).
1111

1212
const { execFileSync } = require("node:child_process");
1313
const fs = require("node:fs");
@@ -45,13 +45,44 @@ const sh = (cmd, args, options = {}) =>
4545
const shBytes = (cmd, args, options = {}) =>
4646
execFileSync(cmd, args, { encoding: null, ...options });
4747

48+
const hasGitleaks = (() => {
49+
try {
50+
execFileSync("gitleaks", ["version"], {
51+
encoding: "utf8",
52+
stdio: ["ignore", "ignore", "ignore"],
53+
});
54+
return true;
55+
} catch {
56+
return false;
57+
}
58+
})();
59+
4860
const toMb = (bytes) => (bytes / 1_000_000).toFixed(2);
4961
const classifySecret = (text) => {
5062
for (const pattern of SECRET_PATTERNS) {
5163
if (pattern.regex.test(text)) return pattern.name;
5264
}
5365
return null;
5466
};
67+
const scanWithGitleaks = (content) => {
68+
if (!hasGitleaks) return "skip";
69+
try {
70+
execFileSync(
71+
"gitleaks",
72+
["stdin", "--no-banner", "--redact", "--log-level", "error"],
73+
{
74+
encoding: null,
75+
stdio: ["pipe", "ignore", "ignore"],
76+
input: content,
77+
}
78+
);
79+
return "clean";
80+
} catch (error) {
81+
const status = typeof error?.status === "number" ? error.status : null;
82+
if (status === 1) return "hit";
83+
return "error";
84+
}
85+
};
5586

5687
const stdin = fs.readFileSync(0, "utf8").trimEnd();
5788
if (stdin.length === 0) process.exit(0);
@@ -80,6 +111,7 @@ const oversize = [];
80111
const secretHits = [];
81112
const objectToPaths = new Map();
82113
const oversizeBlobOids = new Set();
114+
let gitleaksErrorCount = 0;
83115

84116
for (const range of ranges) {
85117
let revList = "";
@@ -138,14 +170,29 @@ for (const oid of oids) {
138170
}
139171
if (content.includes(0)) continue;
140172

141-
const secretType = classifySecret(content.toString("utf8"));
142-
if (secretType === null) continue;
173+
const text = content.toString("utf8");
174+
const secretType = classifySecret(text);
175+
if (secretType !== null) {
176+
secretHits.push({
177+
oid,
178+
paths: objectToPaths.get(oid) ?? new Set(),
179+
type: secretType,
180+
});
181+
continue;
182+
}
143183

144-
secretHits.push({
145-
oid,
146-
paths: objectToPaths.get(oid) ?? new Set(),
147-
type: secretType,
148-
});
184+
const gitleaksState = scanWithGitleaks(content);
185+
if (gitleaksState === "hit") {
186+
secretHits.push({
187+
oid,
188+
paths: objectToPaths.get(oid) ?? new Set(),
189+
type: "Gitleaks finding",
190+
});
191+
continue;
192+
}
193+
if (gitleaksState === "error") {
194+
gitleaksErrorCount += 1;
195+
}
149196
}
150197

151198
if (oversize.length === 0 && secretHits.length === 0) process.exit(0);
@@ -170,6 +217,12 @@ if (secretHits.length > 0) {
170217
}
171218
}
172219

220+
if (gitleaksErrorCount > 0) {
221+
console.error(
222+
`WARN: gitleaks scanner errored for ${gitleaksErrorCount} blob(s); fallback regex checks were used.`
223+
);
224+
}
225+
173226
console.error("");
174227
console.error("Fix options:");
175228
console.error(" - For new changes: commit again (pre-commit will split + redact knowledge files).");

0 commit comments

Comments
 (0)