Skip to content

Commit 18f37a2

Browse files
authored
feat(hooks): block secrets in all nested .knowledge/.knowlenge paths (#53)
* feat(hooks): guard secrets for all .knowledge/.knowlenge paths * feat(hooks): use gitleaks for knowledge secret scanning when available * feat(container): auto-install gitleaks in generated docker image
1 parent 8a0d4a8 commit 18f37a2

File tree

10 files changed

+315
-85
lines changed

10 files changed

+315
-85
lines changed

.githooks/pre-commit

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,14 @@ REPO_ROOT="$(cd "$HOOK_DIR/.." && pwd)"
66
cd "$REPO_ROOT"
77

88
node scripts/split-knowledge-large-files.js
9-
if [ -d ".knowledge" ]; then
10-
git add -A .knowledge
11-
fi
12-
13-
if [ -d ".knowlenge" ]; then
14-
git add -A .knowlenge
15-
fi
9+
while IFS= read -r -d '' knowledge_dir; do
10+
git add -A -- "$knowledge_dir"
11+
done < <(
12+
find . -type d \
13+
\( -name ".knowledge" -o -name ".knowlenge" \) \
14+
-not -path "*/.git/*" \
15+
-print0
16+
)
1617

1718
MAX_BYTES=$((99 * 1000 * 1000))
1819
too_large=()

.githooks/pre-push

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
#!/usr/bin/env bash
22
set -euo pipefail
33

4-
# NOTE: pre-commit splits large .knowledge/.knowlenge files before they land in new commits.
5-
# pre-push is a safety net: it prevents pushing any commit range containing >99MB blobs.
4+
# NOTE: pre-commit splits/redacts knowledge files before they land in new commits.
5+
# pre-push is a safety net: it prevents pushing commit ranges containing oversized or secret blobs
6+
# under any nested .knowledge/.knowlenge path.
67

78
if [ "${DOCKER_GIT_SKIP_KNOWLEDGE_GUARD:-}" = "1" ]; then
89
exit 0
910
fi
1011

1112
node scripts/pre-push-knowledge-guard.js "$@"
12-

packages/docker-git/tests/core/templates.test.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ describe("planFiles", () => {
6464
expect(dockerfileSpec.contents).toContain("AUTO_MENU")
6565
expect(dockerfileSpec.contents).toContain("ncurses-term")
6666
expect(dockerfileSpec.contents).toContain("tag-order builtins commands")
67+
expect(dockerfileSpec.contents).toContain("gitleaks version")
6768
}
6869

6970
if (entrypointSpec && entrypointSpec._tag === "File") {

packages/lib/src/core/templates/dockerfile.ts

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,21 @@ RUN curl -fsSL https://opencode.ai/install | HOME=/usr/local bash -s -- --no-mod
4646
RUN ln -sf /usr/local/.opencode/bin/opencode /usr/local/bin/opencode
4747
RUN opencode --version`
4848

49+
const gitleaksVersion = "8.28.0"
50+
51+
const renderDockerfileGitleaks = (): string =>
52+
`# Tooling: gitleaks (secret scanner for .knowledge/.knowlenge hooks)
53+
RUN ARCH="$(uname -m)" \
54+
&& case "$ARCH" in \
55+
x86_64|amd64) GITLEAKS_ARCH="x64" ;; \
56+
aarch64|arm64) GITLEAKS_ARCH="arm64" ;; \
57+
*) echo "Unsupported arch for gitleaks: $ARCH" >&2; exit 1 ;; \
58+
esac \
59+
&& curl -fsSL "https://github.com/gitleaks/gitleaks/releases/download/v${gitleaksVersion}/gitleaks_${gitleaksVersion}_linux_$GITLEAKS_ARCH.tar.gz" \
60+
| tar -xz -C /usr/local/bin gitleaks \
61+
&& chmod +x /usr/local/bin/gitleaks \
62+
&& gitleaks version`
63+
4964
const dockerfilePlaywrightMcpBlock = String.raw`RUN npm install -g @playwright/mcp@latest
5065
5166
# docker-git: wrapper that converts a CDP HTTP endpoint into a usable WS endpoint
@@ -157,6 +172,7 @@ export const renderDockerfile = (config: TemplateConfig): string =>
157172
renderDockerfileNode(),
158173
renderDockerfileBun(config),
159174
renderDockerfileOpenCode(),
175+
renderDockerfileGitleaks(),
160176
renderDockerfileUsers(config),
161177
renderDockerfileWorkspace(config)
162178
].join("\n\n")

packages/lib/tests/usecases/prepare-files.test.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ describe("prepareProjectFiles", () => {
105105
const entrypoint = yield* _(fs.readFileString(path.join(outDir, "entrypoint.sh")))
106106
const composeBefore = yield* _(fs.readFileString(path.join(outDir, "docker-compose.yml")))
107107
expect(dockerfile).toContain("docker-compose-v2")
108+
expect(dockerfile).toContain("gitleaks version")
108109
expect(entrypoint).toContain('DOCKER_GIT_HOME="/home/dev/.docker-git"')
109110
expect(entrypoint).toContain('SOURCE_SHARED_AUTH="/home/dev/.codex-shared/auth.json"')
110111
expect(entrypoint).toContain('OPENCODE_DATA_DIR="/home/dev/.local/share/opencode"')

scripts/pre-commit-secret-guard.sh

Lines changed: 87 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,66 +1,129 @@
11
#!/usr/bin/env bash
22
set -euo pipefail
33

4-
# CHANGE: Add bash-only pre-commit guard that redacts probable GitHub/OAuth secrets in staged files.
5-
# WHY: Avoid relying on Node runtime in hook execution and keep local push-protection checks deterministic.
4+
# CHANGE: Add staged knowledge secret guard with external scanner support.
5+
# WHY: Prefer proven scanners (gitleaks) when available, while keeping deterministic fallback redaction.
66

77
ROOT_DIR="$(git rev-parse --show-toplevel)"
88
cd "$ROOT_DIR"
99

1010
command -v git >/dev/null || { echo "ERROR: git is required" >&2; exit 1; }
1111
command -v perl >/dev/null || { echo "ERROR: perl is required" >&2; exit 1; }
1212

13-
SECRET_PATTERN='\b(?:github_pat_|gho_|ghp_|ghu_|ghs_|ghr_|gha_)[A-Za-z0-9_]{20,255}\b'
13+
SECRET_PATTERN='(\b(?:github_pat_|gho_|ghp_|ghu_|ghs_|ghr_|gha_)[A-Za-z0-9_]{20,255}\b|\bsk-(?!ant-)(?:proj-)?[A-Za-z0-9_-]{20,}\b|\bsk-ant-[A-Za-z0-9_-]{20,}\b|-----BEGIN(?: [A-Z0-9]+)* PRIVATE KEY-----)'
14+
HAS_GITLEAKS=0
15+
16+
if command -v gitleaks >/dev/null 2>&1; then
17+
HAS_GITLEAKS=1
18+
fi
19+
20+
is_knowledge_path() {
21+
local path="$1"
22+
[[ "$path" =~ (^|/)\.(knowledge|knowlenge)(/|$) ]]
23+
}
24+
25+
scan_with_gitleaks_file() {
26+
local file_path="$1"
27+
if [ "$HAS_GITLEAKS" -ne 1 ]; then
28+
printf '%s\n' "skip"
29+
return
30+
fi
31+
32+
if gitleaks stdin --no-banner --redact --log-level error < "$file_path" >/dev/null 2>&1; then
33+
printf '%s\n' "clean"
34+
return
35+
fi
36+
37+
local code=$?
38+
if [ "$code" -eq 1 ]; then
39+
printf '%s\n' "hit"
40+
return
41+
fi
42+
43+
printf '%s\n' "error"
44+
}
45+
46+
staged_blob_to_file() {
47+
local path="$1"
48+
local out="$2"
49+
git cat-file -p ":$path" > "$out"
50+
}
51+
52+
has_secret_in_staged_blob() {
53+
local staged_blob_path="$1"
54+
local gitleaks_state
55+
gitleaks_state="$(scan_with_gitleaks_file "$staged_blob_path")"
56+
57+
if [ "$gitleaks_state" = "hit" ]; then
58+
return 0
59+
fi
60+
if grep -Pq "$SECRET_PATTERN" "$staged_blob_path"; then
61+
return 0
62+
fi
63+
return 1
64+
}
1465

1566
redacted_count=0
1667
manual_fix_files=()
1768
has_staged_files=0
1869

1970
TMP_DIR=$(mktemp -d)
2071
trap 'rm -rf "$TMP_DIR"' EXIT
72+
index=0
2173

2274
while IFS= read -r -d '' path; do
2375
if [ -z "$path" ]; then
2476
continue
2577
fi
78+
if ! is_knowledge_path "$path"; then
79+
continue
80+
fi
2681

2782
if ! git cat-file -e ":$path" 2>/dev/null; then
2883
continue
2984
fi
3085

3186
has_staged_files=1
32-
tmp_path="${TMP_DIR}/entry"
33-
has_unstaged=false
34-
35-
if ! git diff --quiet -- "$path"; then
36-
has_unstaged=true
87+
has_unstaged=true
88+
if git diff --quiet -- "$path"; then
89+
has_unstaged=false
3790
fi
3891

39-
if [ "$has_unstaged" = true ]; then
40-
git cat-file -p ":$path" > "$tmp_path"
41-
42-
if grep -Pq "$SECRET_PATTERN" "$tmp_path"; then
43-
manual_fix_files+=("$path")
44-
fi
45-
92+
index=$((index + 1))
93+
tmp_path="${TMP_DIR}/entry-${index}"
94+
staged_blob_to_file "$path" "$tmp_path"
95+
if ! has_secret_in_staged_blob "$tmp_path"; then
4696
continue
4797
fi
4898

49-
if ! grep -Pq "$SECRET_PATTERN" "$path"; then
99+
if [ "$has_unstaged" = true ]; then
100+
manual_fix_files+=("$path")
50101
continue
51102
fi
52103

53-
perl -0pi -e 's/\b(?:github_pat_|gho_|ghp_|ghu_|ghs_|ghr_|gha_)[A-Za-z0-9_]{20,255}\b/<REDACTED_GITHUB_TOKEN>/g' "$path"
104+
perl -0pi -e '
105+
s/\b(?:github_pat_|gho_|ghp_|ghu_|ghs_|ghr_|gha_)[A-Za-z0-9_]{20,255}\b/<REDACTED_GITHUB_TOKEN>/g;
106+
s/\bsk-ant-[A-Za-z0-9_-]{20,}\b/<REDACTED_ANTHROPIC_KEY>/g;
107+
s/\bsk-(?!ant-)(?:proj-)?[A-Za-z0-9_-]{20,}\b/<REDACTED_OPENAI_KEY>/g;
108+
s/-----BEGIN(?: [A-Z0-9]+)* PRIVATE KEY-----[\s\S]*?-----END(?: [A-Z0-9]+)* PRIVATE KEY-----/<REDACTED_PRIVATE_KEY>/g;
109+
' "$path"
54110
git add -- "$path"
55-
redacted_count=$((redacted_count + 1))
111+
112+
redacted_path="${TMP_DIR}/post-redacted-${index}"
113+
staged_blob_to_file "$path" "$redacted_path"
114+
if has_secret_in_staged_blob "$redacted_path"; then
115+
manual_fix_files+=("$path")
116+
else
117+
redacted_count=$((redacted_count + 1))
118+
fi
56119
done < <(git diff --cached --name-only --diff-filter=ACM -z)
57120

58121
if [ "$has_staged_files" -eq 0 ]; then
59122
exit 0
60123
fi
61124

62125
if [ "${#manual_fix_files[@]}" -gt 0 ]; then
63-
echo "ERROR: secret-like tokens found in staged versions with unstaged changes."
126+
echo "ERROR: secret-like tokens found in staged .knowledge/.knowlenge files with unstaged changes."
64127
echo "Please fix these files manually in index or clear unstaged changes, then commit again:"
65128
for file in "${manual_fix_files[@]}"; do
66129
echo " - $file"
@@ -70,7 +133,11 @@ if [ "${#manual_fix_files[@]}" -gt 0 ]; then
70133
fi
71134

72135
if [ "$redacted_count" -gt 0 ]; then
73-
echo "pre-commit: auto-redacted secrets in $redacted_count staged file(s)."
136+
if [ "$HAS_GITLEAKS" -eq 1 ]; then
137+
echo "pre-commit: auto-redacted secrets in $redacted_count staged .knowledge/.knowlenge file(s) (scanner: gitleaks + fallback)."
138+
else
139+
echo "pre-commit: auto-redacted secrets in $redacted_count staged .knowledge/.knowlenge file(s) (scanner: fallback regex)."
140+
fi
74141
fi
75142

76143
exit 0

0 commit comments

Comments
 (0)