devcolor · William-Hill · Feb 24, 2026 · Feb 24, 2026 · Mar 28, 2026 · Mar 28, 2026
diff --git a/.gitignore b/.gitignore
@@ -183,3 +183,6 @@ operations/convert_institution_id_to_string.py
 operations/verify_institution_id.py
 .vercel
 .env.deploy
+
+# Training pipeline artifacts
+training_data/
diff --git a/codebenders-dashboard/app/api/courses/explain-pairing/route.ts b/codebenders-dashboard/app/api/courses/explain-pairing/route.ts
@@ -1,10 +1,7 @@
 import { type NextRequest, NextResponse } from "next/server"
 import { getPool } from "@/lib/db"
 import { canAccess, type Role } from "@/lib/roles"
-import { generateText } from "ai"
-import { createOpenAI } from "@ai-sdk/openai"
-
-const openai = createOpenAI({ apiKey: process.env.OPENAI_API_KEY || "" })
+import { generateExplanation } from "@/lib/model-client"
 
 const DELIVERY_LABELS: Record<string, string> = {
   F: "Face-to-Face",
@@ -18,7 +15,7 @@ export async function POST(request: NextRequest) {
     return NextResponse.json({ error: "Forbidden" }, { status: 403 })
   }
 
-  if (!process.env.OPENAI_API_KEY) {
+  if (process.env.MODEL_BACKEND !== "ollama" && !process.env.OPENAI_API_KEY) {
     return NextResponse.json({ error: "OpenAI API key not configured" }, { status: 500 })
   }
 
@@ -189,11 +186,7 @@ Write a concise analysis (3-4 sentences) that:
 
 Be practical and data-driven. Do not speculate beyond what the numbers show.`
 
-    const result = await generateText({
-      model: openai("gpt-4o-mini"),
-      prompt: llmPrompt,
-      maxOutputTokens: 320,
-    })
+    const result = { text: await generateExplanation(llmPrompt, 320) }
 
     return NextResponse.json({ stats, explanation: result.text })
   } catch (error) {

diff --git a/codebenders-dashboard/app/api/query-summary/route.ts b/codebenders-dashboard/app/api/query-summary/route.ts
@@ -1,17 +1,14 @@
 import { type NextRequest, NextResponse } from "next/server"
 import { canAccess, type Role } from "@/lib/roles"
-import { generateText } from "ai"
-import { createOpenAI } from "@ai-sdk/openai"
-
-const openai = createOpenAI({ apiKey: process.env.OPENAI_API_KEY || "" })
+import { generateSummary } from "@/lib/model-client"
 
 export async function POST(request: NextRequest) {
   const role = request.headers.get("x-user-role") as Role | null
   if (!role || !canAccess("/api/query-summary", role)) {
     return NextResponse.json({ error: "Forbidden" }, { status: 403 })
   }
 
-  if (!process.env.OPENAI_API_KEY) {
+  if (process.env.MODEL_BACKEND !== "ollama" && !process.env.OPENAI_API_KEY) {
     return NextResponse.json({ error: "OpenAI API key not configured" }, { status: 500 })
   }
 
@@ -47,12 +44,8 @@ ${JSON.stringify(sampleRows, null, 2)}
 Write a 2-3 sentence plain-English summary of what these results show. Be specific about the numbers. Do not speculate beyond the data. Address the advisor directly.`
 
   try {
-    const result = await generateText({
-      model: openai("gpt-4o-mini"),
-      prompt: llmPrompt,
-      maxOutputTokens: 200,
-    })
-    return NextResponse.json({ summary: result.text })
+    const summary = await generateSummary(llmPrompt, 200)
+    return NextResponse.json({ summary })
   } catch (error) {
     console.error("[query-summary] Error:", error)
     return NextResponse.json(

diff --git a/codebenders-dashboard/lib/model-client.ts b/codebenders-dashboard/lib/model-client.ts
@@ -0,0 +1,81 @@
+/**
+ * Model client adapter — routes inference to Ollama (fine-tuned) or
+ * OpenAI (fallback) based on MODEL_BACKEND env var.
+ */
+
+import { generateText } from "ai"
+import { createOpenAI } from "@ai-sdk/openai"
+
+const MODEL_BACKEND = process.env.MODEL_BACKEND || "openai"
+const SCHOOL_CODE = process.env.SCHOOL_CODE || "bishop-state"
+const OLLAMA_BASE_URL = process.env.OLLAMA_BASE_URL || "http://localhost:11434"
+const MODEL_SIZE = process.env.MODEL_SIZE || "9b"
+
+let _openai: ReturnType<typeof createOpenAI> | null = null
+
+function getOpenAI() {
+  if (!_openai) {
+    _openai = createOpenAI({ apiKey: process.env.OPENAI_API_KEY || "" })
+  }
+  return _openai
+}
+
+async function callOllama(model: string, prompt: string, maxTokens: number): Promise<string> {
+  const response = await fetch(`${OLLAMA_BASE_URL}/api/generate`, {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify({
+      model,
+      prompt,
+      stream: false,
+      options: {
+        temperature: 0.3,
+        num_predict: maxTokens,
+      },
+    }),
+  })
+
+  if (!response.ok) {
+    throw new Error(`Ollama error: ${response.status} ${response.statusText}`)
+  }
+
+  const data = await response.json()
+  return data.response
+}
+
+async function generate(
+  task: "explainer" | "summarizer",
+  prompt: string,
+  maxTokens: number,
+): Promise<string> {
+  if (MODEL_BACKEND === "ollama") {
+    const model = `${SCHOOL_CODE}-${task}:${MODEL_SIZE}`
+    return callOllama(model, prompt, maxTokens)
+  }
+  const result = await generateText({
+    model: getOpenAI()("gpt-4o-mini"),
+    prompt,
+    maxOutputTokens: maxTokens,
+  })
+  return result.text
+}
+
+/**
+ * Generate a course pairing explanation.
+ */
+export async function generateExplanation(
+  prompt: string,
+  maxTokens: number = 320,
+): Promise<string> {
+  return generate("explainer", prompt, maxTokens)
+}
+
+/**
+ * Generate a query result summary.
+ */
+export async function generateSummary(
+  prompt: string,
+  maxTokens: number = 200,
+): Promise<string> {
+  return generate("summarizer", prompt, maxTokens)
+}
diff --git a/docs/plans/2026-02-24-self-service-upload-design.md b/docs/plans/2026-02-24-self-service-upload-design.md
@@ -0,0 +1,170 @@
+# Design: Self-Service Data Upload (Issue #86)
+
+**Date:** 2026-02-24
+**Author:** Claude Code
+
+---
+
+## Overview
+
+Allow admin and IR users to upload institutional data files directly from the dashboard without
+needing direct database or server access. Two upload paths: course enrollment CSVs (end-to-end
+to Postgres) and PDP cohort/AR files (to Supabase Storage + GitHub Actions ML pipeline trigger).
+
+---
+
+## Scope
+
+**In scope:**
+- Course enrollment CSV → `course_enrollments` Postgres table (upsert)
+- PDP Cohort CSV / PDP AR (.xlsx) → Supabase Storage + GitHub Actions `repository_dispatch`
+- Preview step (first 10 rows + column validation) before commit
+- Role guard: admin and ir only
+
+**Out of scope:**
+- Upload history log (future issue)
+- Column remapping UI (columns must match known schema)
+- ML experiment tracking / MLflow (future issue)
+- Auto-triggering ML pipeline without a server (GitHub Actions is the trigger mechanism)
+
+---
+
+## Pages & Routing
+
+**New page:** `codebenders-dashboard/app/admin/upload/page.tsx`
+
+**Role guard:** Add to `lib/roles.ts` `ROUTE_PERMISSIONS`:
+```ts
+{ prefix: "/admin",     roles: ["admin", "ir"] },
+{ prefix: "/api/admin", roles: ["admin", "ir"] },
+```
+Middleware already enforces this pattern via `x-user-role` header — no other auth code needed.
+
+**Nav link:** Add "Upload Data" to `nav-header.tsx`, visible only to admin/ir roles.
+
+**New API routes:**
+- `POST /api/admin/upload/preview` — parse first 10 rows, return sample + validation summary
+- `POST /api/admin/upload/commit` — full ingest (course → Postgres; PDP/AR → Storage + Actions)
+
+---
+
+## UI Flow (3 States)
+
+### State 1 — Select & Drop
+- Dropdown: file type (`Course Enrollment CSV` | `PDP Cohort CSV` | `PDP AR File (.xlsx)`)
+- Drag-and-drop zone (click to pick; `.csv` for course/cohort, `.csv`+`.xlsx` for AR)
+- "Preview" button → calls `/api/admin/upload/preview`
+
+### State 2 — Preview
+- Shows: detected file type, estimated row count, first 10 rows in a table
+- Validation banner: lists missing required columns or warnings
+- "Confirm & Upload" → calls `/api/admin/upload/commit`
+- "Back" link to return to State 1
+
+### State 3 — Result
+- Course enrollments: `{ inserted, skipped, errors[] }` summary card
+- PDP/AR: "File accepted — ML pipeline queued in GitHub Actions" + link to Actions run
+- "Upload another file" resets to State 1
+
+---
+
+## API Routes
+
+### `POST /api/admin/upload/preview`
+
+**Input:** `multipart/form-data` with `file` and `fileType` fields
+
+**Logic:**
+1. Parse first 50 rows with `csv-parse` (CSV) or `xlsx` (Excel)
+2. Validate required columns exist for the given `fileType`
+3. Return `{ columns, sampleRows (first 10), rowCount (estimated), warnings[] }`
+
+### `POST /api/admin/upload/commit`
+
+**Input:** Same multipart form
+
+**Course enrollment path:**
+1. Stream-parse full CSV with `csv-parse` async iterator
+2. Batch-upsert 500 rows at a time into `course_enrollments` via `pg`
+3. Conflict target: `(student_guid, course_prefix, course_number, academic_term)`
+4. Return `{ inserted, skipped, errors[] }`
+
+**PDP/AR path:**
+1. Upload file to Supabase Storage bucket `pdp-uploads` via `@supabase/supabase-js`
+2. Call GitHub API `POST /repos/{owner}/{repo}/dispatches` with:
+   ```json
+   { "event_type": "ml-pipeline", "client_payload": { "file_path": "<storage-path>" } }
+   ```
+3. Return `{ status: "processing", actionsUrl: "https://github.com/{owner}/{repo}/actions" }`
+
+**Role enforcement:** Read `x-user-role` header (set by middleware); return 403 if not admin/ir.
+
+---
+
+## GitHub Actions Workflow
+
+**File:** `.github/workflows/ml-pipeline.yml`
+
+**Trigger:** `repository_dispatch` with `event_type: ml-pipeline`
+
+**Steps:**
+1. Checkout repo
+2. Set up Python with `venv`
+3. Install dependencies (`pip install -r requirements.txt`)
+4. Download uploaded file from Supabase Storage using `SUPABASE_SERVICE_KEY` secret
+5. Run `venv/bin/python ai_model/complete_ml_pipeline.py --input <downloaded-file-path>`
+6. Upload `ML_PIPELINE_REPORT.txt` as a GitHub Actions artifact (retained 90 days)
+
+**Required secrets:** `SUPABASE_URL`, `SUPABASE_SERVICE_KEY`, `GITHUB_TOKEN` (auto-provided)
+
+---
+
+## Required Column Schemas
+
+### Course Enrollment CSV
+Must include: `student_guid`, `course_prefix`, `course_number`, `academic_year`, `academic_term`
+Optional (all other `course_enrollments` columns): filled as NULL if absent
+
+### PDP Cohort CSV
+Must include: `Institution_ID`, `Cohort`, `Student_GUID`, `Cohort_Term`
+
+### PDP AR File (.xlsx)
+Must include: `Institution_ID`, `Cohort`, `Student_GUID` (first sheet parsed)
+
+---
+
+## New Packages
+
+| Package | Purpose |
+|---------|---------|
+| `csv-parse` | Streaming CSV parsing (async iterator mode) |
+| `xlsx` | Excel (.xlsx) parsing |
+
+---
+
+## New Files
+
+| File | Purpose |
+|------|---------|
+| `codebenders-dashboard/app/admin/upload/page.tsx` | Upload UI page |
+| `codebenders-dashboard/app/api/admin/upload/preview/route.ts` | Preview API route |
+| `codebenders-dashboard/app/api/admin/upload/commit/route.ts` | Commit API route |
+| `.github/workflows/ml-pipeline.yml` | GitHub Actions ML pipeline trigger |
+
+---
+
+## Supabase Changes
+
+**Storage bucket:** Create `pdp-uploads` bucket (private, authenticated access only).
+No new database migrations required — `course_enrollments` table already exists.
+
+**Bucket policy:** Only service role key can read/write. Signed URLs used for pipeline download.
+
+---
+
+## Constraints & Known Limitations
+
+- ML pipeline trigger via GitHub Actions means a ~30-60s delay before the pipeline starts
+- Vercel free tier has a 4.5 MB request body limit — large files should use Supabase Storage direct upload in a future iteration
+- No upload history log in this version (deferred)
+- Column remapping is out of scope — files must match the known schema