From 076ae05123a9f6361409a0d994630b681c28ab90 Mon Sep 17 00:00:00 2001 From: Joost van der Laan Date: Tue, 24 Feb 2026 12:33:23 +0000 Subject: [PATCH 1/9] Add prek git hooks Co-Authored-By: Claude Opus 4.6 --- flake.nix | 2 ++ 1 file changed, 2 insertions(+) diff --git a/flake.nix b/flake.nix index e7cca34..caaf279 100644 --- a/flake.nix +++ b/flake.nix @@ -62,6 +62,7 @@ git gh jujutsu + prek ripgrep (python3.withPackages (ps: with ps; [ pip @@ -69,6 +70,7 @@ ]); shellHook = '' + prek install # === Claude Loop Setup (ralph_wiggum) === _LOOP_SRC="${prompts}/ralph_wiggum" From 28c6ead610cc64282457ee76d3d78b8f55aad3b1 Mon Sep 17 00:00:00 2001 From: Joost van der Laan Date: Tue, 24 Feb 2026 14:05:58 +0000 Subject: [PATCH 2/9] Add prek.toml config Co-Authored-By: Claude Opus 4.6 --- prek.toml | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 prek.toml diff --git a/prek.toml b/prek.toml new file mode 100644 index 0000000..e6b9cbf --- /dev/null +++ b/prek.toml @@ -0,0 +1,11 @@ +[[repos]] +repo = "builtin" +hooks = [ + { id = "trailing-whitespace" }, + { id = "end-of-file-fixer" }, + { id = "check-yaml" }, + { id = "check-json" }, + { id = "check-merge-conflict" }, + { id = "detect-private-key" }, + { id = "check-added-large-files", args = ["--maxkb=500"] }, +] From 0693433f7b7575a2d44407b0515a400e12640d60 Mon Sep 17 00:00:00 2001 From: Joost van der Laan Date: Tue, 24 Feb 2026 15:28:31 +0000 Subject: [PATCH 3/9] Add lefthook commit-msg and pre-push hooks Co-Authored-By: Claude Opus 4.6 --- flake.nix | 2 ++ lefthook.yml | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+) create mode 100644 lefthook.yml diff --git a/flake.nix b/flake.nix index caaf279..94ee661 100644 --- a/flake.nix +++ b/flake.nix @@ -63,6 +63,7 @@ gh jujutsu prek + lefthook ripgrep (python3.withPackages (ps: with ps; [ pip @@ -71,6 +72,7 @@ shellHook = '' prek install + lefthook install # === Claude Loop Setup (ralph_wiggum) === _LOOP_SRC="${prompts}/ralph_wiggum" diff --git a/lefthook.yml b/lefthook.yml new file mode 100644 index 0000000..98000f8 --- /dev/null +++ b/lefthook.yml @@ -0,0 +1,91 @@ +# Lefthook configuration — commit-msg and pre-push hooks +# Pre-commit file checks are handled by prek (see prek.toml) + +commit-msg: + parallel: true + commands: + capitalized-subject: + run: | + first_line=$(head -1 {1}) + first_char=$(echo "$first_line" | cut -c1) + if ! echo "$first_char" | grep -q '^[A-Z]$'; then + echo "Commit message subject should start with capital letter" + exit 1 + fi + + empty-message: + run: | + if [ ! -s {1} ]; then + echo "Commit message cannot be empty" + exit 1 + fi + + hard-tabs: + run: | + if grep -v ^# {1} | grep -q $'\t'; then + echo "Commit message contains hard tabs" + exit 1 + fi + + text-width: + run: | + first_line_length=$(head -1 {1} | wc -c | xargs -I {} expr {} - 1 ) + if [ $first_line_length -gt 50 ]; then + echo "Commit subject should be 50 characters or less" + exit 1 + fi + grep -v ^# {1} | tail -n +3 | while read -r line; do + if [ ${#line} -gt 72 ]; then + echo "Commit message body lines should be 72 characters or less" + exit 1 + fi + done + + trailing-period: + run: | + first_line=$(head -1 {1}) + if echo "$first_line" | grep -q '\.$'; then + echo "Commit message subject should not end with period" + exit 1 + fi + + single-line-subject: + run: | + first_line=$(head -1 {1}) + if [ "$(echo "$first_line" | wc -l)" -gt 1 ]; then + echo "Commit message subject should be a single line" + exit 1 + fi + + russian-novel: + run: | + total_chars=$(wc -c < {1}) + if [ $total_chars -gt 500 ]; then + echo "Commit message is too long ($total_chars characters). Keep it under 500 characters." + exit 1 + fi + + github-issue: + run: | + first_line=$(head -1 {1}) + if ! echo "$first_line" | grep -q '#[0-9]\+'; then + echo "No GitHub issue number found in commit message (e.g., #123)" + exit 1 + fi + +pre-push: + parallel: true + commands: + protected-branch: + run: | + protected_branches=("main" "master" "development") + current_branch=$(git symbolic-ref HEAD | sed -e 's,.*/\(.*\),\1,') + found_protected_branch=0 + for branch in "${protected_branches[@]}"; do + if [ "$current_branch" = "$branch" ]; then + echo "Direct push to protected branch '$branch' is not allowed" + echo "Please create a feature branch and submit a pull request" + found_protected_branch=1 + fi + done + exit $found_protected_branch From 6ab1d4c3011decf41b7d57f80d4454760595b9fe Mon Sep 17 00:00:00 2001 From: Joost van der Laan Date: Wed, 25 Feb 2026 10:57:22 +0000 Subject: [PATCH 4/9] Complete lefthook to prek migration Move commit-msg and pre-push hooks from lefthook.yml into prek.toml as local shell hooks. Build prek from source in flake.nix instead of using nixpkgs. Remove lefthook from dev shell. --- flake.nix | 38 ++++++++++++++++++++-- lefthook.yml | 91 ---------------------------------------------------- prek.toml | 14 ++++++++ 3 files changed, 49 insertions(+), 94 deletions(-) delete mode 100644 lefthook.yml diff --git a/flake.nix b/flake.nix index 94ee661..c2e0ad2 100644 --- a/flake.nix +++ b/flake.nix @@ -25,6 +25,20 @@ "x86_64-darwin" = "darwin_amd64"; "aarch64-darwin" = "darwin_arm64"; }; + + prekVersion = "0.3.3"; + prekHashes = { + "x86_64-linux" = "sha256-RPYzZ6h/zqvoYrByNARmRc4SFAix7Y6nquANClQMonE="; + "x86_64-darwin" = "sha256-C2VVXSvSrdaySh8r5Rz+5tDIN4klYLrywhY72vr+0zg="; + "aarch64-linux" = "sha256-hckriLkcvhVIouYWMq8ASgnH8rtHXANzy7DFI6hI4gQ="; + "aarch64-darwin" = "sha256-EsHigdTUhOqm1QKATGqMd6sG8f3SLF/UbAL4euXzwa8="; + }; + prekTargets = { + "x86_64-linux" = "x86_64-unknown-linux-gnu"; + "x86_64-darwin" = "x86_64-apple-darwin"; + "aarch64-linux" = "aarch64-unknown-linux-gnu"; + "aarch64-darwin" = "aarch64-apple-darwin"; + }; in { devShells = forEachSystem @@ -53,17 +67,36 @@ license = pkgs.lib.licenses.mit; }; }; + + prek = pkgs.stdenv.mkDerivation { + pname = "prek"; + version = prekVersion; + src = pkgs.fetchurl { + url = "https://github.com/j178/prek/releases/download/v${prekVersion}/prek-${prekTargets.${system}}.tar.gz"; + hash = prekHashes.${system}; + }; + sourceRoot = "."; + installPhase = '' + mkdir -p $out/bin + cp prek-${prekTargets.${system}}/prek $out/bin/ + chmod +x $out/bin/prek + ''; + meta = { + description = "Better pre-commit, re-engineered in Rust"; + homepage = "https://github.com/j178/prek"; + license = pkgs.lib.licenses.mit; + }; + }; in { default = pkgs.mkShell { packages = [ beads-rust + prek ] ++ (with pkgs; [ git gh jujutsu - prek - lefthook ripgrep (python3.withPackages (ps: with ps; [ pip @@ -72,7 +105,6 @@ shellHook = '' prek install - lefthook install # === Claude Loop Setup (ralph_wiggum) === _LOOP_SRC="${prompts}/ralph_wiggum" diff --git a/lefthook.yml b/lefthook.yml deleted file mode 100644 index 98000f8..0000000 --- a/lefthook.yml +++ /dev/null @@ -1,91 +0,0 @@ -# Lefthook configuration — commit-msg and pre-push hooks -# Pre-commit file checks are handled by prek (see prek.toml) - -commit-msg: - parallel: true - commands: - capitalized-subject: - run: | - first_line=$(head -1 {1}) - first_char=$(echo "$first_line" | cut -c1) - if ! echo "$first_char" | grep -q '^[A-Z]$'; then - echo "Commit message subject should start with capital letter" - exit 1 - fi - - empty-message: - run: | - if [ ! -s {1} ]; then - echo "Commit message cannot be empty" - exit 1 - fi - - hard-tabs: - run: | - if grep -v ^# {1} | grep -q $'\t'; then - echo "Commit message contains hard tabs" - exit 1 - fi - - text-width: - run: | - first_line_length=$(head -1 {1} | wc -c | xargs -I {} expr {} - 1 ) - if [ $first_line_length -gt 50 ]; then - echo "Commit subject should be 50 characters or less" - exit 1 - fi - grep -v ^# {1} | tail -n +3 | while read -r line; do - if [ ${#line} -gt 72 ]; then - echo "Commit message body lines should be 72 characters or less" - exit 1 - fi - done - - trailing-period: - run: | - first_line=$(head -1 {1}) - if echo "$first_line" | grep -q '\.$'; then - echo "Commit message subject should not end with period" - exit 1 - fi - - single-line-subject: - run: | - first_line=$(head -1 {1}) - if [ "$(echo "$first_line" | wc -l)" -gt 1 ]; then - echo "Commit message subject should be a single line" - exit 1 - fi - - russian-novel: - run: | - total_chars=$(wc -c < {1}) - if [ $total_chars -gt 500 ]; then - echo "Commit message is too long ($total_chars characters). Keep it under 500 characters." - exit 1 - fi - - github-issue: - run: | - first_line=$(head -1 {1}) - if ! echo "$first_line" | grep -q '#[0-9]\+'; then - echo "No GitHub issue number found in commit message (e.g., #123)" - exit 1 - fi - -pre-push: - parallel: true - commands: - protected-branch: - run: | - protected_branches=("main" "master" "development") - current_branch=$(git symbolic-ref HEAD | sed -e 's,.*/\(.*\),\1,') - found_protected_branch=0 - for branch in "${protected_branches[@]}"; do - if [ "$current_branch" = "$branch" ]; then - echo "Direct push to protected branch '$branch' is not allowed" - echo "Please create a feature branch and submit a pull request" - found_protected_branch=1 - fi - done - exit $found_protected_branch diff --git a/prek.toml b/prek.toml index e6b9cbf..72ac3ba 100644 --- a/prek.toml +++ b/prek.toml @@ -1,3 +1,4 @@ +# Pre-commit file checks (builtin Rust hooks — zero network, instant startup) [[repos]] repo = "builtin" hooks = [ @@ -9,3 +10,16 @@ hooks = [ { id = "detect-private-key" }, { id = "check-added-large-files", args = ["--maxkb=500"] }, ] + +# Commit message validation (local shell hooks) +[[repos]] +repo = "local" +hooks = [ + { id = "capitalized-subject", name = "Capitalized subject", language = "system", entry = "sh -c 'head -1 \"$1\" | grep -q \"^[A-Z]\" || { echo \"Commit subject must start with a capital letter\"; exit 1; }' --", stages = ["commit-msg"] }, + { id = "subject-max-length", name = "Subject max 50 chars", language = "system", entry = "sh -c 'len=$(head -1 \"$1\" | wc -m); [ \"$len\" -le 51 ] || { echo \"Commit subject too long ($len chars, max 50)\"; exit 1; }' --", stages = ["commit-msg"] }, + { id = "body-max-length", name = "Body lines max 72 chars", language = "system", entry = "sh -c 'grep -v \"^#\" \"$1\" | tail -n +3 | awk \"length > 72 { print NR\\\": \\\"\\$0; found=1 } END { if (found) { print \\\"Body lines must be 72 chars or less\\\"; exit 1 } }\"' --", stages = ["commit-msg"] }, + { id = "no-trailing-period", name = "No trailing period", language = "system", entry = "sh -c 'head -1 \"$1\" | grep -q \"\\\\.$\" && { echo \"Commit subject must not end with a period\"; exit 1; } || true' --", stages = ["commit-msg"] }, + { id = "github-issue-ref", name = "GitHub issue reference", language = "system", entry = "sh -c 'head -1 \"$1\" | grep -q \"#[0-9]\" || { echo \"Commit subject must reference a GitHub issue (e.g. #123)\"; exit 1; }' --", stages = ["commit-msg"] }, + { id = "max-message-length", name = "Max 500 chars total", language = "system", entry = "sh -c 'chars=$(wc -c < \"$1\"); [ \"$chars\" -le 500 ] || { echo \"Commit message too long ($chars chars, max 500)\"; exit 1; }' --", stages = ["commit-msg"] }, + { id = "no-branch-push", name = "No push to main/master", language = "system", entry = "sh -c 'branch=$(git symbolic-ref --short HEAD 2>/dev/null); case \"$branch\" in main|master|development) echo \"Direct push to $branch is not allowed\"; exit 1;; esac' --", stages = ["pre-push"] }, +] From 490e726df882e93eebc01aa0da873d06d014c6fd Mon Sep 17 00:00:00 2001 From: Joost van der Laan Date: Wed, 25 Feb 2026 10:58:38 +0000 Subject: [PATCH 5/9] Add prek CI workflow and document nix commands Add GitHub Actions workflow using Determinate Nix to run prek checks on push/PR. Document nix develop commands in README.md and AGENTS.md. --- .github/workflows/prek.yml | 23 +++++++++++++++++++++++ AGENTS.md | 7 +++++++ README.md | 8 ++++++++ 3 files changed, 38 insertions(+) create mode 100644 .github/workflows/prek.yml diff --git a/.github/workflows/prek.yml b/.github/workflows/prek.yml new file mode 100644 index 0000000..6f4305a --- /dev/null +++ b/.github/workflows/prek.yml @@ -0,0 +1,23 @@ +name: Prek + +on: + push: + branches: [main] + pull_request: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + prek: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: DeterminateSystems/nix-installer-action@main + - uses: DeterminateSystems/magic-nix-cache-action@main + - name: Run prek checks + run: nix develop --command prek run --all-files diff --git a/AGENTS.md b/AGENTS.md index 05b2b8c..e6fe4f4 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -87,3 +87,10 @@ All content is markdown and JSON — edit directly, no build step required. When - NEVER stop before pushing - that leaves work stranded locally - NEVER say "ready to push when you are" - YOU must push - If push fails, resolve and retry until it succeeds + +## Code Quality + +```bash +nix develop --command prek run --all-files # Run all pre-commit checks +``` + diff --git a/README.md b/README.md index 09f74a3..a1e0428 100644 --- a/README.md +++ b/README.md @@ -96,3 +96,11 @@ Plugins are just markdown files. Fork the repo, make your changes, and submit a ## License This fork is licensed under Apache 2.0, same as the [original Anthropic repository](https://github.com/anthropics/knowledge-work-plugins). See [LICENSE](LICENSE) for details. + +## Development Environment + +```bash +nix develop # Enter dev shell with all tools +nix develop --command prek run --all-files # Run pre-commit checks +``` + From 6fda25479993c919c3d27527ce696e239a491e31 Mon Sep 17 00:00:00 2001 From: Joost van der Laan Date: Wed, 25 Feb 2026 12:02:25 +0000 Subject: [PATCH 6/9] Fix beads-rust x86_64-linux hash for upstream tarball change Co-Authored-By: Claude Opus 4.6 --- flake.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flake.nix b/flake.nix index c2e0ad2..b63b105 100644 --- a/flake.nix +++ b/flake.nix @@ -16,7 +16,7 @@ beadsRustVersion = "0.1.19"; beadsRustHashes = { - "x86_64-linux" = "sha256-61a0IeR+NI56GDJdIQlxeiQ3wqNneAe1gUPzAz5oTMw="; + "x86_64-linux" = "sha256-rL0PabvZxOLr+iOmZfmpB2tgoCxc/CQLVDFB8NRWHYY="; "x86_64-darwin" = "sha256-98srAx9fRr7NDbzVjIs4za7KONicVgPkZEimSaZ85/w="; "aarch64-darwin" = "sha256-p8cZ6+c4LUSMU1Cvz+lus6NfYYTWFilCD2Jt2G+PGSg="; }; From a741d981c6c7a102c93c7d4494e7ab39128e05ea Mon Sep 17 00:00:00 2001 From: Joost van der Laan Date: Wed, 25 Feb 2026 12:02:27 +0000 Subject: [PATCH 7/9] Fix trailing whitespace and end-of-file issues Auto-fixed by prek builtin hooks (trailing-whitespace, end-of-file-fixer). Co-Authored-By: Claude Opus 4.6 --- .beads/metadata.json | 2 +- AGENTS.md | 1 - PROMPT_plan.md | 6 +- README.md | 1 - .../skills/clinical-trial-protocol/SKILL.md | 4 +- .../references/00-initialize-intervention.md | 2 +- .../references/02-protocol-foundation.md | 8 +- .../references/flattening_guide.md | 24 +++--- .../scripts/export_parser.py | 36 ++++---- .../references/01-intuition-pumps.md | 4 +- .../references/02-risk-assessment.md | 4 +- .../references/03-optimization-function.md | 20 ++--- .../references/04-parameter-strategy.md | 4 +- .../references/09-meta-framework.md | 1 - .../scvi-tools/references/atac_peakvi.md | 34 ++++---- .../references/batch_correction_sysvi.md | 28 +++---- .../scvi-tools/references/citeseq_totalvi.md | 24 +++--- .../scvi-tools/references/data_preparation.md | 2 +- .../scvi-tools/references/label_transfer.md | 28 +++---- .../references/rna_velocity_velovi.md | 26 +++--- .../references/scrna_integration.md | 22 ++--- .../skills/scvi-tools/scripts/model_utils.py | 82 +++++++++---------- .../cowork-plugin-customizer/LICENSE.txt | 2 +- 23 files changed, 180 insertions(+), 185 deletions(-) diff --git a/.beads/metadata.json b/.beads/metadata.json index c787975..f581edc 100644 --- a/.beads/metadata.json +++ b/.beads/metadata.json @@ -1,4 +1,4 @@ { "database": "beads.db", "jsonl_export": "issues.jsonl" -} \ No newline at end of file +} diff --git a/AGENTS.md b/AGENTS.md index e6fe4f4..7c8648a 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -93,4 +93,3 @@ All content is markdown and JSON — edit directly, no build step required. When ```bash nix develop --command prek run --all-files # Run all pre-commit checks ``` - diff --git a/PROMPT_plan.md b/PROMPT_plan.md index 3a60163..cbb5293 100644 --- a/PROMPT_plan.md +++ b/PROMPT_plan.md @@ -8,7 +8,7 @@ - For each epic, verify child tasks cover all aspects of the specification - Check for missing dependencies using `bd dep cycles` (should be empty) - Identify any tasks that should block others but don't - + 2. Update the beads database to fix any issues found: - Create missing tasks with `bd create "title" -t task -p -d "description"` - Add missing dependencies with `bd dep add --type blocks` @@ -20,7 +20,7 @@ - `bd blocked` should show tasks waiting on dependencies - `bd stats` should show accurate counts -IMPORTANT: Plan only. Do NOT implement anything. Do NOT assume functionality is missing; +IMPORTANT: Plan only. Do NOT implement anything. Do NOT assume functionality is missing; use `bd list` and code search to verify first. -ULTIMATE GOAL: Refactor all knowledge-work plugins from generic Anthropic templates to FashionUnited-specific workflows, tools, and domain context. Ensure all necessary tasks exist as beads with proper dependencies so `bd ready` always shows the right next work. \ No newline at end of file +ULTIMATE GOAL: Refactor all knowledge-work plugins from generic Anthropic templates to FashionUnited-specific workflows, tools, and domain context. Ensure all necessary tasks exist as beads with proper dependencies so `bd ready` always shows the right next work. diff --git a/README.md b/README.md index a1e0428..3a9b9db 100644 --- a/README.md +++ b/README.md @@ -103,4 +103,3 @@ This fork is licensed under Apache 2.0, same as the [original Anthropic reposito nix develop # Enter dev shell with all tools nix develop --command prek run --all-files # Run pre-commit checks ``` - diff --git a/archived/bio-research/skills/clinical-trial-protocol/SKILL.md b/archived/bio-research/skills/clinical-trial-protocol/SKILL.md index d32014b..2286454 100644 --- a/archived/bio-research/skills/clinical-trial-protocol/SKILL.md +++ b/archived/bio-research/skills/clinical-trial-protocol/SKILL.md @@ -21,7 +21,7 @@ description: Generate clinical trial protocols for medical devices or drugs. Thi ## Overview -This skill generates clinical trial protocols for **medical devices or drugs** using a **modular, waypoint-based architecture** +This skill generates clinical trial protocols for **medical devices or drugs** using a **modular, waypoint-based architecture** ## What This Skill Does @@ -504,5 +504,3 @@ When this skill is invoked: - **Research Only:** Display research summary location and offer to continue with full protocol - **Full Protocol:** Congratulate user, display protocol location and next steps - Remind user of disclaimers - - diff --git a/archived/bio-research/skills/clinical-trial-protocol/references/00-initialize-intervention.md b/archived/bio-research/skills/clinical-trial-protocol/references/00-initialize-intervention.md index 7b55611..8a58859 100644 --- a/archived/bio-research/skills/clinical-trial-protocol/references/00-initialize-intervention.md +++ b/archived/bio-research/skills/clinical-trial-protocol/references/00-initialize-intervention.md @@ -198,4 +198,4 @@ If `waypoints/intervention_metadata.json` already exists: - Ensure the intervention_id is filesystem-safe (no spaces, special chars) - Validate that required fields are not empty - Write clean, formatted JSON with proper indentation -- Handle both device and drug interventions appropriately with the right terminology \ No newline at end of file +- Handle both device and drug interventions appropriately with the right terminology diff --git a/archived/bio-research/skills/clinical-trial-protocol/references/02-protocol-foundation.md b/archived/bio-research/skills/clinical-trial-protocol/references/02-protocol-foundation.md index 647c8f1..0d83ae6 100644 --- a/archived/bio-research/skills/clinical-trial-protocol/references/02-protocol-foundation.md +++ b/archived/bio-research/skills/clinical-trial-protocol/references/02-protocol-foundation.md @@ -250,11 +250,11 @@ STATEMENT OF COMPLIANCE **Content to Generate:** STATEMENT OF COMPLIANCE -Provide a statement that the trial will be conducted in compliance with the protocol, International Conference on Harmonisation Good Clinical Practice (ICH GCP) and applicable state, local and federal regulatory requirements. Each engaged institution must have a current Federal-Wide Assurance (FWA) issued by the Office for Human Research Protections (OHRP) and must provide this protocol and the associated informed consent documents and recruitment materials for review and approval by an appropriate Institutional Review Board (IRB) or Ethics Committee (EC) registered with OHRP. Any amendments to the protocol or consent materials must also be approved before implementation. Select one of the two statements below: +Provide a statement that the trial will be conducted in compliance with the protocol, International Conference on Harmonisation Good Clinical Practice (ICH GCP) and applicable state, local and federal regulatory requirements. Each engaged institution must have a current Federal-Wide Assurance (FWA) issued by the Office for Human Research Protections (OHRP) and must provide this protocol and the associated informed consent documents and recruitment materials for review and approval by an appropriate Institutional Review Board (IRB) or Ethics Committee (EC) registered with OHRP. Any amendments to the protocol or consent materials must also be approved before implementation. Select one of the two statements below: -(1) [The trial will be carried out in accordance with International Conference on Harmonisation Good Clinical Practice (ICH GCP) and the following: +(1) [The trial will be carried out in accordance with International Conference on Harmonisation Good Clinical Practice (ICH GCP) and the following: -• United States (US) Code of Federal Regulations (CFR) applicable to clinical studies (45 CFR Part 46, 21 CFR Part 50, 21 CFR Part 56, 21 CFR Part 312, and/or 21 CFR Part 812) +• United States (US) Code of Federal Regulations (CFR) applicable to clinical studies (45 CFR Part 46, 21 CFR Part 50, 21 CFR Part 56, 21 CFR Part 312, and/or 21 CFR Part 812) National Institutes of Health (NIH)-funded investigators and clinical trial site staff who are responsible for the conduct, management, or oversight of NIH-funded clinical trials have completed Human Subjects Protection and ICH GCP Training. @@ -341,7 +341,7 @@ This section contains three major components. Generate each with appropriate det #### Section 1.2: Schema (30 lines) -**Generate a text-based flow diagram** showing study progression. +**Generate a text-based flow diagram** showing study progression. **Required Elements:** - **Screening Period:** Show duration (e.g., "Within 28 days") and key activities (eligibility assessment) diff --git a/archived/bio-research/skills/instrument-data-to-allotrope/references/flattening_guide.md b/archived/bio-research/skills/instrument-data-to-allotrope/references/flattening_guide.md index d20e654..c51b7f6 100644 --- a/archived/bio-research/skills/instrument-data-to-allotrope/references/flattening_guide.md +++ b/archived/bio-research/skills/instrument-data-to-allotrope/references/flattening_guide.md @@ -29,11 +29,11 @@ ASM Hierarchy → Flat Column device-system-document. device-identifier → instrument_serial_number model-number → instrument_model - + measurement-aggregate-document. analyst → analyst measurement-time → measurement_datetime - + measurement-document[]. sample-identifier → sample_id viable-cell-density.value → viable_cell_density @@ -185,43 +185,43 @@ import pandas as pd def flatten_asm(asm_dict, technique="cell-counting"): """ Flatten ASM JSON to pandas DataFrame. - + Args: asm_dict: Parsed ASM JSON technique: ASM technique type - + Returns: pandas DataFrame with one row per measurement """ rows = [] - + # Get aggregate document agg_key = f"{technique}-aggregate-document" agg_doc = asm_dict.get(agg_key, {}) - + # Extract device info device = agg_doc.get("device-system-document", {}) device_info = { "instrument_serial_number": device.get("device-identifier"), "instrument_model": device.get("model-number") } - + # Get technique documents doc_key = f"{technique}-document" for doc in agg_doc.get(doc_key, []): meas_agg = doc.get("measurement-aggregate-document", {}) - + # Extract common metadata common = { "analyst": meas_agg.get("analyst"), "measurement_datetime": meas_agg.get("measurement-time"), **device_info } - + # Extract each measurement for meas in meas_agg.get("measurement-document", []): row = {**common} - + # Flatten measurement fields for key, value in meas.items(): if isinstance(value, dict) and "value" in value: @@ -232,9 +232,9 @@ def flatten_asm(asm_dict, technique="cell-counting"): row[f"{col}_unit"] = value["unit"] else: row[key.replace("-", "_")] = value - + rows.append(row) - + return pd.DataFrame(rows) # Usage diff --git a/archived/bio-research/skills/instrument-data-to-allotrope/scripts/export_parser.py b/archived/bio-research/skills/instrument-data-to-allotrope/scripts/export_parser.py index d7f558e..4ec0158 100644 --- a/archived/bio-research/skills/instrument-data-to-allotrope/scripts/export_parser.py +++ b/archived/bio-research/skills/instrument-data-to-allotrope/scripts/export_parser.py @@ -63,16 +63,16 @@ def convert_to_asm(filepath: str) -> Optional[Dict[str, Any]]: """ Convert {instrument_name} file to ASM format. - + Args: filepath: Path to input file - + Returns: ASM dictionary or None if conversion fails """ if not ALLOTROPY_AVAILABLE: raise ImportError("allotropy library required. Install with: pip install allotropy") - + try: asm = allotrope_from_file(filepath, Vendor.{vendor}) return asm @@ -84,36 +84,36 @@ def convert_to_asm(filepath: str) -> Optional[Dict[str, Any]]: def flatten_asm(asm: Dict[str, Any]) -> list: """ Flatten ASM to list of row dictionaries for CSV export. - + Args: asm: ASM dictionary - + Returns: List of flattened row dictionaries """ technique = "{technique}" rows = [] - + agg_key = f"{{technique}}-aggregate-document" agg_doc = asm.get(agg_key, {{}}) - + # Extract device info device = agg_doc.get("device-system-document", {{}}) device_info = {{ "instrument_serial_number": device.get("device-identifier"), "instrument_model": device.get("model-number"), }} - + doc_key = f"{{technique}}-document" for doc in agg_doc.get(doc_key, []): meas_agg = doc.get("measurement-aggregate-document", {{}}) - + common = {{ "analyst": meas_agg.get("analyst"), "measurement_time": meas_agg.get("measurement-time"), **device_info }} - + for meas in meas_agg.get("measurement-document", []): row = {{**common}} for key, value in meas.items(): @@ -125,7 +125,7 @@ def flatten_asm(asm: Dict[str, Any]) -> list: else: row[clean_key] = value rows.append(row) - + return rows @@ -134,28 +134,28 @@ def main(): parser.add_argument("input", help="Input file path") parser.add_argument("--output", "-o", help="Output JSON path") parser.add_argument("--flatten", action="store_true", help="Also generate CSV") - + args = parser.parse_args() - + input_path = Path(args.input) if not input_path.exists(): print(f"Error: File not found: {{args.input}}") return 1 - + # Convert to ASM print(f"Converting {{args.input}}...") asm = convert_to_asm(str(input_path)) - + if asm is None: print("Conversion failed") return 1 - + # Write ASM JSON output_path = args.output or str(input_path.with_suffix('.asm.json')) with open(output_path, 'w') as f: json.dump(asm, f, indent=2, default=str) print(f"ASM written to: {{output_path}}") - + # Optionally flatten if args.flatten and PANDAS_AVAILABLE: rows = flatten_asm(asm) @@ -163,7 +163,7 @@ def main(): flat_path = str(input_path.with_suffix('.flat.csv')) df.to_csv(flat_path, index=False) print(f"CSV written to: {{flat_path}}") - + return 0 diff --git a/archived/bio-research/skills/scientific-problem-selection/references/01-intuition-pumps.md b/archived/bio-research/skills/scientific-problem-selection/references/01-intuition-pumps.md index e53256a..12f9f5e 100644 --- a/archived/bio-research/skills/scientific-problem-selection/references/01-intuition-pumps.md +++ b/archived/bio-research/skills/scientific-problem-selection/references/01-intuition-pumps.md @@ -12,7 +12,7 @@ Research advances generally fall into one of these categories, each with two dim - *Logic*: Novel ways to manipulate biological systems (e.g., using CRISPR for deep mutational scanning) - *Technology*: New tools for manipulation (e.g., developing base editors, creating whole-genome CRISPR libraries) -**MEASUREMENT** +**MEASUREMENT** - *Logic*: Novel applications of existing measurement tools (e.g., using tissue clearing to study liver fibrosis) - *Technology*: New measurement capabilities (e.g., developing tissue-clearing techniques, super-resolution microscopy) @@ -133,7 +133,7 @@ After generating ideas, we must evaluate them critically. Here are the most comm #### Trap #1: The Truffle Hound **Warning:** Don't become so good at one system or technique that you fail to ask questions of biological import. -**Bad:** "What is the role of p190 RhoGAP in wing development?" +**Bad:** "What is the role of p190 RhoGAP in wing development?" **Better:** "How do signaling pathways and cytoskeleton coordinate to control wing development?" **Self-Check:** Is the question driven by biological curiosity or by what the user is technically capable of? diff --git a/archived/bio-research/skills/scientific-problem-selection/references/02-risk-assessment.md b/archived/bio-research/skills/scientific-problem-selection/references/02-risk-assessment.md index 3c3c46a..2dc9554 100644 --- a/archived/bio-research/skills/scientific-problem-selection/references/02-risk-assessment.md +++ b/archived/bio-research/skills/scientific-problem-selection/references/02-risk-assessment.md @@ -205,8 +205,8 @@ Claude should produce a **2-page Risk Assessment Document**: | [Assumption 2] | Bio/Tech | 1-5 | X mo | [Rationale for score] | | ... | ... | ... | ... | ... | -*Bio = Biological reality, Tech = Technical capability -†Risk: 1=very likely to 5=very unlikely +*Bio = Biological reality, Tech = Technical capability +†Risk: 1=very likely to 5=very unlikely ‡Time to test in months #### Risk Profile Summary: diff --git a/archived/bio-research/skills/scientific-problem-selection/references/03-optimization-function.md b/archived/bio-research/skills/scientific-problem-selection/references/03-optimization-function.md index fb8258b..726557b 100644 --- a/archived/bio-research/skills/scientific-problem-selection/references/03-optimization-function.md +++ b/archived/bio-research/skills/scientific-problem-selection/references/03-optimization-function.md @@ -54,12 +54,12 @@ Based on the answers, Claude should help identify the right optimization functio - **High Generality, Medium Learning:** Ribosome stalling complex - Updates understanding of translation (fundamental process) - Scores well because translation is universal - + - **Medium Generality, High Learning:** Oxytricha germ-line nucleus - Genomic acrobatics may not be common to other organisms - BUT elegant mapping scores highly on how much we learned - May yield tools for genome editing (bonus) - + - **High on Both Axes (Landmark):** RNA interference, biomolecular condensates - These are rare—don't expect every project to be here - But aim to score well on at least one axis @@ -79,13 +79,13 @@ Based on the answers, Claude should help identify the right optimization functio - **Widely Used, Not Critical:** BLAST - Used in countless projects - Rarely THE critical tool, but enormous cumulative impact - + - **Not Widely Used, Highly Critical:** Cryo-electron tomography - Too complicated for broad adoption - But generates stunning data that's impossible to get otherwise - When you need it, nothing else works - -- **High on Both Axes (Game-Changing):** + +- **High on Both Axes (Game-Changing):** - GFP, CRISPR, AlphaFold (the famous ones) - But also: lentiviral delivery, cell sorting, massively parallel sequencing - Technologies we cannot imagine living without @@ -119,14 +119,14 @@ Based on the answers, Claude should help identify the right optimization functio Based on your Phase 1 responses, let me help you choose: **If you selected A (fundamental knowledge):** → Basic Science Framework -**If you selected B (enable experiments):** → Technology Development Framework +**If you selected B (enable experiments):** → Technology Development Framework **If you selected C (solve practical problem):** → Invention Framework **Now, let's be explicit:** 1. **State Your Framework:** "This project should be evaluated as [basic science/technology development/invention]." -2. **Define Your Axes:** +2. **Define Your Axes:** - X-axis measures: [specific metric] - Y-axis measures: [specific metric] @@ -134,7 +134,7 @@ Based on your Phase 1 responses, let me help you choose: - X-axis score (Low/Medium/High): [Your assessment + reasoning] - Y-axis score (Low/Medium/High): [Your assessment + reasoning] -4. **Threshold Check:** +4. **Threshold Check:** - Do you score at least MEDIUM-HIGH on one axis? - If both are LOW-MEDIUM, you have a problem @@ -338,7 +338,7 @@ Claude should produce a **2-page Impact Assessment Document**: #### Visual Framework: ``` [Your Project Type] - + Y-Axis | ★ Your Project [Metric] | / | / @@ -346,7 +346,7 @@ Y-Axis | ★ Your Project | / |_________________ X-Axis [Metric] - + ★ = Your project Reference projects plotted for context ``` diff --git a/archived/bio-research/skills/scientific-problem-selection/references/04-parameter-strategy.md b/archived/bio-research/skills/scientific-problem-selection/references/04-parameter-strategy.md index f101ceb..7b01e1b 100644 --- a/archived/bio-research/skills/scientific-problem-selection/references/04-parameter-strategy.md +++ b/archived/bio-research/skills/scientific-problem-selection/references/04-parameter-strategy.md @@ -49,7 +49,7 @@ For each category, indicate if it's **FIXED** (must stay) or **FLOATING** (could **Question 3: Why are they fixed?** For each fixed parameter, is it because: A. Your expertise/passion -B. Lab resources/capabilities +B. Lab resources/capabilities C. Advisor requirements D. You think it's the "best" solution E. Historical accident (you started this way) @@ -197,7 +197,7 @@ For each fixed parameter, let's plan what happens if it becomes untenable: - **Alternative project:** [If you fixed something else] **Fixed Parameter 2: [Name it]** -- **Why it's fixed:** [Your reason] +- **Why it's fixed:** [Your reason] - **Risk if this fails:** [What breaks] - **Contingency:** [What could you float instead] - **Alternative project:** [If you fixed something else] diff --git a/archived/bio-research/skills/scientific-problem-selection/references/09-meta-framework.md b/archived/bio-research/skills/scientific-problem-selection/references/09-meta-framework.md index 7231fc1..ee6e0a4 100644 --- a/archived/bio-research/skills/scientific-problem-selection/references/09-meta-framework.md +++ b/archived/bio-research/skills/scientific-problem-selection/references/09-meta-framework.md @@ -501,4 +501,3 @@ Let's start with Skill 1. Are you ready to begin? --- *Remember: The highest-leverage work in science is choosing the right problem. This meta-framework ensures you spend your finite time wisely. The investment in systematic planning pays dividends for years.* - diff --git a/archived/bio-research/skills/scvi-tools/references/atac_peakvi.md b/archived/bio-research/skills/scvi-tools/references/atac_peakvi.md index 84bf4e0..86c9546 100644 --- a/archived/bio-research/skills/scvi-tools/references/atac_peakvi.md +++ b/archived/bio-research/skills/scvi-tools/references/atac_peakvi.md @@ -255,36 +255,36 @@ peak_bed_sig.to_csv("significant_peaks.bed", sep='\t', index=False, header=False def compute_gene_activity(adata, peak_gene_map): """ Compute gene activity scores from peak accessibility. - + Parameters ---------- adata : AnnData ATAC data with peaks peak_gene_map : dict Mapping of peaks to genes - + Returns ------- AnnData with gene activity scores """ from scipy.sparse import csr_matrix - + genes = list(set(peak_gene_map.values())) gene_matrix = np.zeros((adata.n_obs, len(genes))) - + for i, gene in enumerate(genes): gene_peaks = [p for p, g in peak_gene_map.items() if g == gene] if gene_peaks: peak_idx = [list(adata.var_names).index(p) for p in gene_peaks if p in adata.var_names] if peak_idx: gene_matrix[:, i] = np.array(adata.X[:, peak_idx].sum(axis=1)).flatten() - + adata_gene = ad.AnnData( X=csr_matrix(gene_matrix), obs=adata.obs.copy(), var=pd.DataFrame(index=genes) ) - + return adata_gene ``` @@ -300,7 +300,7 @@ def analyze_scatac( ): """ Complete scATAC-seq analysis with PeakVI. - + Parameters ---------- adata : AnnData @@ -313,7 +313,7 @@ def analyze_scatac( Latent dimensions resolution : float Leiden clustering resolution - + Returns ------- Tuple of (processed AnnData, trained model) @@ -321,38 +321,38 @@ def analyze_scatac( import scvi import scanpy as sc import numpy as np - + adata = adata.copy() - + # QC sc.pp.calculate_qc_metrics(adata, inplace=True) adata = adata[adata.obs['n_genes_by_counts'] > 500].copy() sc.pp.filter_genes(adata, min_cells=10) - + # Binarize adata.X = (adata.X > 0).astype(np.float32) - + # Select top peaks if adata.n_vars > n_top_peaks: peak_accessibility = np.array(adata.X.sum(axis=0)).flatten() top_peaks = np.argsort(peak_accessibility)[-n_top_peaks:] adata = adata[:, top_peaks].copy() - + # Setup PeakVI scvi.model.PEAKVI.setup_anndata(adata, batch_key=batch_key) - + # Train model = scvi.model.PEAKVI(adata, n_latent=n_latent) model.train(max_epochs=200, early_stopping=True) - + # Latent representation adata.obsm["X_PeakVI"] = model.get_latent_representation() - + # Clustering sc.pp.neighbors(adata, use_rep="X_PeakVI") sc.tl.umap(adata) sc.tl.leiden(adata, resolution=resolution) - + return adata, model # Usage diff --git a/archived/bio-research/skills/scvi-tools/references/batch_correction_sysvi.md b/archived/bio-research/skills/scvi-tools/references/batch_correction_sysvi.md index d282de0..8237784 100644 --- a/archived/bio-research/skills/scvi-tools/references/batch_correction_sysvi.md +++ b/archived/bio-research/skills/scvi-tools/references/batch_correction_sysvi.md @@ -253,7 +253,7 @@ batch_lisi = lisi.ilisi_graph( # Cell type LISI (lower = better preservation) ct_lisi = lisi.clisi_graph( adata, - label_key="cell_type", + label_key="cell_type", use_rep="X_integrated" ) @@ -315,7 +315,7 @@ def integrate_cross_system( ): """ Integrate datasets from different technological systems. - + Parameters ---------- adatas : dict @@ -330,33 +330,33 @@ def integrate_cross_system( Number of HVGs n_latent : int Latent dimensions - + Returns ------- Integrated AnnData with model """ import scvi import scanpy as sc - + # Add system labels and concatenate for system_name, adata in adatas.items(): adata.obs[system_key] = system_name - + adata = sc.concat(list(adatas.values())) - + # Find common genes for name, ad in adatas.items(): if name == list(adatas.keys())[0]: common_genes = set(ad.var_names) else: common_genes = common_genes.intersection(ad.var_names) - + adata = adata[:, list(common_genes)].copy() print(f"Common genes: {len(common_genes)}") - + # Store counts adata.layers["counts"] = adata.X.copy() - + # HVG selection sc.pp.highly_variable_genes( adata, @@ -366,7 +366,7 @@ def integrate_cross_system( layer="counts" ) adata = adata[:, adata.var["highly_variable"]].copy() - + # Setup with system as covariate scvi.model.SCVI.setup_anndata( adata, @@ -374,19 +374,19 @@ def integrate_cross_system( batch_key=batch_key if batch_key in adata.obs else None, categorical_covariate_keys=[system_key] ) - + # Train model = scvi.model.SCVI(adata, n_latent=n_latent, n_layers=2) model.train(max_epochs=300, early_stopping=True) - + # Get representation adata.obsm["X_integrated"] = model.get_latent_representation() - + # Clustering sc.pp.neighbors(adata, use_rep="X_integrated") sc.tl.umap(adata) sc.tl.leiden(adata) - + return adata, model # Usage diff --git a/archived/bio-research/skills/scvi-tools/references/citeseq_totalvi.md b/archived/bio-research/skills/scvi-tools/references/citeseq_totalvi.md index f94423d..9acbf0c 100644 --- a/archived/bio-research/skills/scvi-tools/references/citeseq_totalvi.md +++ b/archived/bio-research/skills/scvi-tools/references/citeseq_totalvi.md @@ -316,7 +316,7 @@ def analyze_citeseq( ): """ Complete CITE-seq analysis with totalVI. - + Parameters ---------- adata_rna : AnnData @@ -329,23 +329,23 @@ def analyze_citeseq( Number of HVGs n_latent : int Latent dimensions - + Returns ------- Tuple of (processed AnnData, trained model) """ import scvi import scanpy as sc - + # Ensure same cells common_cells = adata_rna.obs_names.intersection(adata_protein.obs_names) adata = adata_rna[common_cells].copy() adata_protein = adata_protein[common_cells].copy() - + # Add protein to obsm adata.obsm["protein_expression"] = adata_protein.X.toarray() if hasattr(adata_protein.X, 'toarray') else adata_protein.X adata.uns["protein_names"] = list(adata_protein.var_names) - + # RNA QC # Handle both human (MT-) and mouse (mt-, Mt-) mitochondrial genes adata.var['mt'] = ( @@ -356,10 +356,10 @@ def analyze_citeseq( sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], inplace=True) adata = adata[adata.obs['pct_counts_mt'] < 20].copy() sc.pp.filter_genes(adata, min_cells=3) - + # Store counts adata.layers["counts"] = adata.X.copy() - + # HVG selection sc.pp.highly_variable_genes( adata, @@ -369,7 +369,7 @@ def analyze_citeseq( layer="counts" ) adata = adata[:, adata.var["highly_variable"]].copy() - + # Setup totalVI scvi.model.TOTALVI.setup_anndata( adata, @@ -377,22 +377,22 @@ def analyze_citeseq( protein_expression_obsm_key="protein_expression", batch_key=batch_key ) - + # Train model = scvi.model.TOTALVI(adata, n_latent=n_latent) model.train(max_epochs=200, early_stopping=True) - + # Get representations adata.obsm["X_totalVI"] = model.get_latent_representation() rna_norm, protein_denoised = model.get_normalized_expression(return_mean=True) adata.layers["totalVI_normalized"] = rna_norm adata.obsm["protein_denoised"] = protein_denoised - + # Clustering sc.pp.neighbors(adata, use_rep="X_totalVI") sc.tl.umap(adata) sc.tl.leiden(adata) - + return adata, model # Usage diff --git a/archived/bio-research/skills/scvi-tools/references/data_preparation.md b/archived/bio-research/skills/scvi-tools/references/data_preparation.md index 6cbcc16..c0a9dc5 100644 --- a/archived/bio-research/skills/scvi-tools/references/data_preparation.md +++ b/archived/bio-research/skills/scvi-tools/references/data_preparation.md @@ -36,7 +36,7 @@ if hasattr(adata, 'raw') and adata.raw is not None: print("Found adata.raw") # Use raw counts adata = adata.raw.to_adata() - + # Or check layers if 'counts' in adata.layers: print("Found counts layer") diff --git a/archived/bio-research/skills/scvi-tools/references/label_transfer.md b/archived/bio-research/skills/scvi-tools/references/label_transfer.md index 0c4a454..18e4075 100644 --- a/archived/bio-research/skills/scvi-tools/references/label_transfer.md +++ b/archived/bio-research/skills/scvi-tools/references/label_transfer.md @@ -100,10 +100,10 @@ if missing_genes: # Add missing genes with zero expression import numpy as np from scipy.sparse import csr_matrix - + zero_matrix = csr_matrix((adata_query.n_obs, len(missing_genes))) # ... concat and reorder to match reference - + # Store counts adata_query.layers["counts"] = adata_query.X.copy() ``` @@ -275,7 +275,7 @@ def transfer_labels( ): """ Transfer cell type labels from reference to query. - + Parameters ---------- adata_ref : AnnData @@ -290,18 +290,18 @@ def transfer_labels( Number of HVGs confidence_threshold : float Minimum confidence for predictions - + Returns ------- AnnData with predictions """ import scvi import scanpy as sc - + # Prepare reference adata_ref = adata_ref.copy() adata_ref.layers["counts"] = adata_ref.X.copy() - + sc.pp.highly_variable_genes( adata_ref, n_top_genes=n_top_genes, @@ -310,39 +310,39 @@ def transfer_labels( layer="counts" ) adata_ref = adata_ref[:, adata_ref.var["highly_variable"]].copy() - + # Train reference model scvi.model.SCVI.setup_anndata(adata_ref, layer="counts", batch_key=batch_key) scvi_ref = scvi.model.SCVI(adata_ref, n_latent=30) scvi_ref.train(max_epochs=200) - + scanvi_ref = scvi.model.SCANVI.from_scvi_model( scvi_ref, labels_key=cell_type_key, unlabeled_category="Unknown" ) scanvi_ref.train(max_epochs=50) - + # Prepare query adata_query = adata_query[:, adata_ref.var_names].copy() adata_query.layers["counts"] = adata_query.X.copy() - + # Map query scvi.model.SCANVI.prepare_query_anndata(adata_query, scanvi_ref) scanvi_query = scvi.model.SCANVI.load_query_data(adata_query, scanvi_ref) scanvi_query.train(max_epochs=100, plan_kwargs={"weight_decay": 0.0}) - + # Get predictions adata_query.obs["predicted_cell_type"] = scanvi_query.predict() soft = scanvi_query.predict(soft=True) adata_query.obs["prediction_score"] = soft.max(axis=1) - + # Mark low confidence adata_query.obs["confident_prediction"] = adata_query.obs["prediction_score"] >= confidence_threshold - + # Add latent representation adata_query.obsm["X_scANVI"] = scanvi_query.get_latent_representation() - + return adata_query, scanvi_ref, scanvi_query # Usage diff --git a/archived/bio-research/skills/scvi-tools/references/rna_velocity_velovi.md b/archived/bio-research/skills/scvi-tools/references/rna_velocity_velovi.md index 1221247..751e78b 100644 --- a/archived/bio-research/skills/scvi-tools/references/rna_velocity_velovi.md +++ b/archived/bio-research/skills/scvi-tools/references/rna_velocity_velovi.md @@ -195,7 +195,7 @@ scv.tl.velocity_graph(adata) fig, axes = plt.subplots(1, 2, figsize=(12, 5)) scv.pl.velocity_embedding_stream( - adata, basis="umap", ax=axes[0], + adata, basis="umap", ax=axes[0], title="scVelo", show=False ) @@ -229,7 +229,7 @@ scv.pl.velocity( # Plot expression over latent time for gene in genes: fig, ax = plt.subplots(figsize=(6, 4)) - + sc.pl.scatter( adata, x="veloVI_latent_time", @@ -289,7 +289,7 @@ def run_velocity_analysis( ): """ Complete RNA velocity analysis with veloVI. - + Parameters ---------- adata : AnnData @@ -302,7 +302,7 @@ def run_velocity_analysis( Number of velocity genes max_epochs : int Training epochs - + Returns ------- AnnData with velocity and model @@ -310,42 +310,42 @@ def run_velocity_analysis( import scvi import scvelo as scv import scanpy as sc - + adata = adata.copy() - + # Preprocessing scv.pp.filter_and_normalize( adata, min_shared_counts=20, n_top_genes=n_top_genes ) - + # Compute moments (needed for some visualizations) scv.pp.moments(adata, n_pcs=30, n_neighbors=30) - + # Setup veloVI scvi.model.VELOVI.setup_anndata( adata, spliced_layer=spliced_layer, unspliced_layer=unspliced_layer ) - + # Train model = scvi.model.VELOVI(adata) model.train(max_epochs=max_epochs, early_stopping=True) - + # Get results adata.obs["latent_time"] = model.get_latent_time(n_samples=25) adata.layers["velocity"] = model.get_velocity(n_samples=25) - + # Compute velocity graph for visualization scv.tl.velocity_graph(adata, vkey="velocity") - + # Compute UMAP if not present if "X_umap" not in adata.obsm: sc.pp.neighbors(adata) sc.tl.umap(adata) - + return adata, model # Usage diff --git a/archived/bio-research/skills/scvi-tools/references/scrna_integration.md b/archived/bio-research/skills/scvi-tools/references/scrna_integration.md index 5e9c518..b84b289 100644 --- a/archived/bio-research/skills/scvi-tools/references/scrna_integration.md +++ b/archived/bio-research/skills/scvi-tools/references/scrna_integration.md @@ -332,7 +332,7 @@ def integrate_datasets( ): """ Integrate multiple scRNA-seq datasets. - + Parameters ---------- adatas : dict @@ -345,23 +345,23 @@ def integrate_datasets( Number of HVGs n_latent : int Latent dimensions - + Returns ------- AnnData with integrated representation """ import scvi import scanpy as sc - + # Add batch labels and concatenate for batch_name, adata in adatas.items(): adata.obs[batch_key] = batch_name - + adata = sc.concat(list(adatas.values()), label=batch_key) - + # Store counts adata.layers["counts"] = adata.X.copy() - + # HVG selection sc.pp.highly_variable_genes( adata, @@ -371,14 +371,14 @@ def integrate_datasets( layer="counts" ) adata = adata[:, adata.var["highly_variable"]].copy() - + # Train model if labels_key and labels_key in adata.obs.columns: # Use scANVI scvi.model.SCVI.setup_anndata(adata, layer="counts", batch_key=batch_key) scvi_model = scvi.model.SCVI(adata, n_latent=n_latent) scvi_model.train(max_epochs=200) - + model = scvi.model.SCANVI.from_scvi_model( scvi_model, labels_key=labels_key, @@ -392,15 +392,15 @@ def integrate_datasets( model = scvi.model.SCVI(adata, n_latent=n_latent) model.train(max_epochs=200) rep_key = "X_scVI" - + # Add representation adata.obsm[rep_key] = model.get_latent_representation() - + # Compute neighbors and UMAP sc.pp.neighbors(adata, use_rep=rep_key) sc.tl.umap(adata) sc.tl.leiden(adata) - + return adata, model # Usage diff --git a/archived/bio-research/skills/scvi-tools/scripts/model_utils.py b/archived/bio-research/skills/scvi-tools/scripts/model_utils.py index b3fa0d3..3db2ac9 100644 --- a/archived/bio-research/skills/scvi-tools/scripts/model_utils.py +++ b/archived/bio-research/skills/scvi-tools/scripts/model_utils.py @@ -43,7 +43,7 @@ def prepare_adata( ): """ Prepare AnnData for scvi-tools models. - + Parameters ---------- adata : AnnData @@ -62,29 +62,29 @@ def prepare_adata( Minimum cells per gene copy : bool Return copy of data - + Returns ------- AnnData prepared for scvi-tools """ if copy: adata = adata.copy() - + # Calculate QC metrics adata.var['mt'] = get_mito_genes(adata) sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], inplace=True) - + # Filter cells adata = adata[adata.obs['n_genes_by_counts'] >= min_genes].copy() adata = adata[adata.obs['n_genes_by_counts'] <= max_genes].copy() adata = adata[adata.obs['pct_counts_mt'] < max_mito_pct].copy() - + # Filter genes sc.pp.filter_genes(adata, min_cells=min_cells) - + # Store raw counts adata.layers["counts"] = adata.X.copy() - + # HVG selection if batch_key and batch_key in adata.obs.columns: sc.pp.highly_variable_genes( @@ -101,14 +101,14 @@ def prepare_adata( sc.pp.highly_variable_genes(adata, n_top_genes=n_top_genes) # Restore counts to X adata.X = adata.layers["counts"].copy() - + # Subset to HVGs adata = adata[:, adata.var['highly_variable']].copy() - + print(f"Prepared AnnData: {adata.shape}") if batch_key: print(f"Batches: {adata.obs[batch_key].nunique()}") - + return adata @@ -124,7 +124,7 @@ def train_scvi( ): """ Train scVI or scANVI model. - + Parameters ---------- adata : AnnData @@ -143,20 +143,20 @@ def train_scvi( Use early stopping use_gpu : bool Use GPU if available - + Returns ------- Trained model """ import scvi - + # Setup AnnData scvi.model.SCVI.setup_anndata( adata, layer="counts", batch_key=batch_key ) - + if labels_key and labels_key in adata.obs.columns: # Train scVI first scvi_model = scvi.model.SCVI( @@ -168,7 +168,7 @@ def train_scvi( max_epochs=max_epochs, early_stopping=early_stopping ) - + # Initialize scANVI model = scvi.model.SCANVI.from_scvi_model( scvi_model, @@ -176,7 +176,7 @@ def train_scvi( unlabeled_category="Unknown" ) model.train(max_epochs=max_epochs // 4) - + # Store representation adata.obsm["X_scANVI"] = model.get_latent_representation() else: @@ -190,10 +190,10 @@ def train_scvi( max_epochs=max_epochs, early_stopping=early_stopping ) - + # Store representation adata.obsm["X_scVI"] = model.get_latent_representation() - + return model @@ -205,7 +205,7 @@ def evaluate_integration( ) -> Dict[str, float]: """ Evaluate integration quality using basic metrics. - + Parameters ---------- adata : AnnData @@ -216,46 +216,46 @@ def evaluate_integration( Cell type column embedding_key : str Key in obsm for embedding - + Returns ------- Dictionary of metrics """ from sklearn.metrics import silhouette_score from sklearn.neighbors import NearestNeighbors - + X = adata.obsm[embedding_key] batch = adata.obs[batch_key].values labels = adata.obs[label_key].values - + metrics = {} - + # Silhouette scores try: # Cell type silhouette (higher = better separation) metrics["silhouette_label"] = silhouette_score(X, labels) - + # Batch silhouette (lower = better mixing) metrics["silhouette_batch"] = silhouette_score(X, batch) except Exception as e: warnings.warn(f"Silhouette calculation failed: {e}") - + # Batch mixing in neighbors try: nn = NearestNeighbors(n_neighbors=50) nn.fit(X) distances, indices = nn.kneighbors(X) - + batch_mixing = [] for i in range(len(X)): neighbor_batches = batch[indices[i]] unique_batches = len(np.unique(neighbor_batches)) batch_mixing.append(unique_batches / len(np.unique(batch))) - + metrics["batch_mixing"] = np.mean(batch_mixing) except Exception as e: warnings.warn(f"Batch mixing calculation failed: {e}") - + return metrics @@ -267,7 +267,7 @@ def get_marker_genes( ) -> Dict[str, List[str]]: """ Get marker genes using scVI differential expression. - + Parameters ---------- model : scvi model @@ -278,36 +278,36 @@ def get_marker_genes( Column to group cells by n_genes : int Number of top markers per group - + Returns ------- Dictionary of {group: [marker_genes]} """ markers = {} groups = adata.obs[groupby].unique() - + for group in groups: # Get DE results for this group vs rest de_results = model.differential_expression( groupby=groupby, group1=group ) - + # Filter and sort de_sig = de_results[ (de_results["is_de_fdr_0.05"] == True) & (de_results["lfc_mean"] > 0.5) ].sort_values("lfc_mean", ascending=False) - + markers[group] = de_sig.index[:n_genes].tolist() - + return markers def plot_training_history(model, save_path: Optional[str] = None): """ Plot model training history. - + Parameters ---------- model : scvi model @@ -316,9 +316,9 @@ def plot_training_history(model, save_path: Optional[str] = None): Path to save figure """ import matplotlib.pyplot as plt - + fig, axes = plt.subplots(1, 2, figsize=(12, 4)) - + # ELBO if "elbo_train" in model.history: axes[0].plot(model.history["elbo_train"], label="Train") @@ -328,7 +328,7 @@ def plot_training_history(model, save_path: Optional[str] = None): axes[0].set_ylabel("ELBO") axes[0].legend() axes[0].set_title("Training Loss") - + # Reconstruction if "reconstruction_loss_train" in model.history: axes[1].plot(model.history["reconstruction_loss_train"], label="Train") @@ -338,12 +338,12 @@ def plot_training_history(model, save_path: Optional[str] = None): axes[1].set_ylabel("Reconstruction Loss") axes[1].legend() axes[1].set_title("Reconstruction Loss") - + plt.tight_layout() - + if save_path: plt.savefig(save_path, dpi=150, bbox_inches="tight") - + return fig diff --git a/cowork-plugin-management/skills/cowork-plugin-customizer/LICENSE.txt b/cowork-plugin-management/skills/cowork-plugin-customizer/LICENSE.txt index 7a4a3ea..d645695 100644 --- a/cowork-plugin-management/skills/cowork-plugin-customizer/LICENSE.txt +++ b/cowork-plugin-management/skills/cowork-plugin-customizer/LICENSE.txt @@ -199,4 +199,4 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and - limitations under the License. \ No newline at end of file + limitations under the License. From 7f772d5bf0c8373e9d446e39559a925d5302179c Mon Sep 17 00:00:00 2001 From: Joost van der Laan Date: Wed, 25 Feb 2026 12:13:19 +0000 Subject: [PATCH 8/9] Use self-hosted runner with fallback for prek CI Co-Authored-By: Claude Opus 4.6 --- .github/workflows/prek.yml | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/.github/workflows/prek.yml b/.github/workflows/prek.yml index 6f4305a..da9a274 100644 --- a/.github/workflows/prek.yml +++ b/.github/workflows/prek.yml @@ -13,8 +13,24 @@ permissions: contents: read jobs: - prek: + determine-runner: runs-on: ubuntu-latest + outputs: + runner: ${{ steps.runner.outputs.use-runner }} + steps: + - name: Determine runner + id: runner + uses: mikehardy/runner-fallback-action@v1 + with: + github-token: ${{ secrets.GH_RUNNER_TOKEN }} + primary-runner: self-hosted-16-cores + fallback-runner: ubuntu-latest + organization: fuww + fallback-on-error: true + + prek: + runs-on: ${{ fromJson(needs.determine-runner.outputs.runner) }} + needs: [determine-runner] steps: - uses: actions/checkout@v4 - uses: DeterminateSystems/nix-installer-action@main From f404946da957beecdba872e4a9f2fb2366b7696e Mon Sep 17 00:00:00 2001 From: Joost van der Laan Date: Wed, 25 Feb 2026 12:52:19 +0000 Subject: [PATCH 9/9] Trigger CI