From bb563aa27ef074b2663a5c98c2ffc8a8248261e6 Mon Sep 17 00:00:00 2001 From: Lily Shen Date: Tue, 14 Apr 2026 11:41:37 -0700 Subject: [PATCH] =?UTF-8?q?feat(uploads):=20ADR-002=20Phase=201=20?= =?UTF-8?q?=E2=80=94=20ObjectStore=20driver=20+=20bytes-vs-metadata=20spli?= =?UTF-8?q?t?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a narrow `ObjectStore` interface (put / get / delete + capabilities) and route attachment bytes through it, starting with a single `mongo` driver that writes to a new `MediaObject` collection. `File` stays in place for display metadata and stops carrying inline bytes for new uploads; `File.data` is now optional so pre-ADR-002 records remain readable via a fallback in the GET handler. No data migration needed; no user-visible change. What lands: - backend/services/objectStore/ObjectStore.ts — interface - backend/services/objectStore/drivers/mongoDriver.ts — default driver - backend/services/objectStore/index.ts — singleton, env-based selection - backend/models/MediaObject.ts — bytes + byte-level mime/size - backend/models/File.ts — data field optional (additive change) - backend/routes/uploads.ts — POST writes via driver; GET tries driver then falls back to legacy `File.data`; size cap flows through multer from `driver.capabilities.maxObjectBytes` - 18 new unit tests: driver contract, env-based driver selection, route GET (driver hit, legacy fallback, 404 paths, 500 on throw), route POST (driver.put + metadata-only File save, 400 no-file, bad MIME) What Phase 1 intentionally does NOT do: - Close the pre-existing GET authorization gap. Adding plain `auth` middleware breaks every `` in the app since browsers cannot attach JWTs to image requests. The fix (signed short-TTL tokens + frontend URL-rewrite) lands in Phase 1b as its own PR. The gap is documented loudly in the route header comment and called out in the ADR as a prerequisite for any production-scale launch. - Ship `filesystem`, `gcs`, or `s3` drivers. One production driver is enough to validate the interface; contract tests will cover the others as they arrive. Shipping `filesystem` here without a Helm caller would be half of Phase 5 wearing a Phase 1 hat. Reviewed via the `code-reviewer` agent against docs/REVIEW.md; verdict "Approve, ready to ship as Phase 1." Co-Authored-By: Claude Opus 4.6 (1M context) --- .../unit/routes/uploads.post.test.js | 50 ++++++++--- backend/__tests__/unit/routes/uploads.test.js | 85 ++++++++++++++++--- .../unit/services/objectStore/index.test.ts | 35 ++++++++ .../services/objectStore/mongoDriver.test.ts | 72 ++++++++++++++++ backend/models/File.ts | 10 ++- backend/models/MediaObject.ts | 44 ++++++++++ backend/routes/uploads.ts | 75 +++++++++++++--- backend/services/objectStore/ObjectStore.ts | 52 ++++++++++++ .../objectStore/drivers/mongoDriver.ts | 51 +++++++++++ backend/services/objectStore/index.ts | 39 +++++++++ .../ADR-002-attachments-and-object-storage.md | 30 +++++-- 11 files changed, 500 insertions(+), 43 deletions(-) create mode 100644 backend/__tests__/unit/services/objectStore/index.test.ts create mode 100644 backend/__tests__/unit/services/objectStore/mongoDriver.test.ts create mode 100644 backend/models/MediaObject.ts create mode 100644 backend/services/objectStore/ObjectStore.ts create mode 100644 backend/services/objectStore/drivers/mongoDriver.ts create mode 100644 backend/services/objectStore/index.ts diff --git a/backend/__tests__/unit/routes/uploads.post.test.js b/backend/__tests__/unit/routes/uploads.post.test.js index 1aced617..7340e97b 100644 --- a/backend/__tests__/unit/routes/uploads.post.test.js +++ b/backend/__tests__/unit/routes/uploads.post.test.js @@ -1,11 +1,24 @@ +// ADR-002 Phase 1 — POST route covers driver.put + metadata-only File save, +// plus the new size-cap rejection branch. + const request = require('supertest'); const express = require('express'); +const mockStore = { + capabilities: { name: 'mongo', maxObjectBytes: 10 * 1024 * 1024 }, + get: jest.fn(), + put: jest.fn().mockResolvedValue(undefined), + delete: jest.fn(), +}; + +jest.mock('../../../services/objectStore', () => ({ + getObjectStore: () => mockStore, + __resetObjectStoreForTests: jest.fn(), +})); + jest.mock('../../../models/File', () => { - const saveMock = jest.fn(); - const File = jest - .fn() - .mockImplementation((data) => ({ ...data, save: saveMock })); + const saveMock = jest.fn().mockResolvedValue(undefined); + const File = jest.fn().mockImplementation((data) => ({ ...data, save: saveMock })); File.findByFileName = jest.fn(); File.__saveMock = saveMock; return File; @@ -19,34 +32,51 @@ jest.mock('../../../middleware/auth', () => (req, res, next) => { const routes = require('../../../routes/uploads'); -describe('uploads POST route', () => { +describe('uploads POST / (ADR-002 Phase 1)', () => { let app; + beforeEach(() => { app = express(); app.use(express.json()); app.use('/api/uploads', routes); - File.__saveMock.mockReset(); + File.__saveMock.mockClear(); File.mockClear(); + mockStore.put.mockClear(); }); - it('uploads image and saves file', async () => { + it('writes bytes through the driver and saves metadata-only File', async () => { await request(app) .post('/api/uploads') .attach('image', Buffer.from('data'), 'photo.png') .expect(200); + + // Driver received the bytes + mime + expect(mockStore.put).toHaveBeenCalledWith( + expect.stringMatching(/\.png$/), + expect.any(Buffer), + 'image/png', + ); + // File was created with metadata only — no `data` field expect(File).toHaveBeenCalled(); + const fileArgs = File.mock.calls[0][0]; + expect(fileArgs.data).toBeUndefined(); + expect(fileArgs.fileName).toMatch(/\.png$/); + expect(fileArgs.contentType).toBe('image/png'); + expect(fileArgs.uploadedBy).toBe('user1'); expect(File.__saveMock).toHaveBeenCalled(); }); - it('returns 400 when no file provided', async () => { + it('returns 400 when no file is provided', async () => { const res = await request(app).post('/api/uploads').expect(400); expect(res.body.msg).toBe('No file uploaded'); + expect(mockStore.put).not.toHaveBeenCalled(); }); - it('rejects invalid file types', async () => { + it('rejects disallowed extensions before reaching the driver', async () => { await request(app) .post('/api/uploads') .attach('image', Buffer.from('data'), 'text.txt') - .expect(500); + .expect(500); // multer surfaces the filter error as 500 + expect(mockStore.put).not.toHaveBeenCalled(); }); }); diff --git a/backend/__tests__/unit/routes/uploads.test.js b/backend/__tests__/unit/routes/uploads.test.js index 2c53d8e2..37ff0406 100644 --- a/backend/__tests__/unit/routes/uploads.test.js +++ b/backend/__tests__/unit/routes/uploads.test.js @@ -1,33 +1,94 @@ +// ADR-002 Phase 1 — GET route covers both the driver-hit path and the +// legacy File.data fallback for pre-ADR-002 records. + const request = require('supertest'); const express = require('express'); +const { Readable } = require('stream'); + +const mockStore = { + capabilities: { name: 'mongo', maxObjectBytes: 10 * 1024 * 1024 }, + get: jest.fn(), + put: jest.fn(), + delete: jest.fn(), +}; + +jest.mock('../../../services/objectStore', () => ({ + getObjectStore: () => mockStore, + __resetObjectStoreForTests: jest.fn(), +})); jest.mock('../../../models/File', () => ({ - findByFileName: jest - .fn() - .mockResolvedValue({ data: Buffer.from('x'), contentType: 'text/plain' }), + findByFileName: jest.fn(), })); jest.mock('../../../middleware/auth', () => (req, res, next) => next()); -const routes = require('../../../routes/uploads'); const File = require('../../../models/File'); +const routes = require('../../../routes/uploads'); -describe('uploads routes', () => { +describe('uploads GET /:fileName (ADR-002 Phase 1)', () => { const app = express(); app.use(express.json()); app.use('/api/uploads', routes); - it('GET /api/uploads/:file calls findByFileName', async () => { - await request(app).get('/api/uploads/test').expect(200); - expect(File.findByFileName).toHaveBeenCalledWith('test'); + beforeEach(() => { + mockStore.get.mockReset(); + mockStore.put.mockReset(); + mockStore.delete.mockReset(); + File.findByFileName.mockReset(); }); - it('returns 404 when file not found', async () => { + + it('streams bytes from the driver when the key is present', async () => { + mockStore.get.mockResolvedValue({ + stream: Readable.from(Buffer.from('hello')), + mime: 'image/png', + size: 5, + }); + + const res = await request(app) + .get('/api/uploads/new.png') + .buffer(true) + .parse((r, cb) => { + const chunks = []; + r.on('data', (c) => chunks.push(c)); + r.on('end', () => cb(null, Buffer.concat(chunks))); + }) + .expect(200); + expect(res.headers['content-type']).toMatch(/image\/png/); + expect(Buffer.isBuffer(res.body) ? res.body : Buffer.from(res.body)).toEqual( + Buffer.from('hello'), + ); + expect(mockStore.get).toHaveBeenCalledWith('new.png'); + expect(File.findByFileName).not.toHaveBeenCalled(); + }); + + it('falls back to legacy File.data when the driver returns null', async () => { + mockStore.get.mockResolvedValue(null); + File.findByFileName.mockResolvedValue({ + data: Buffer.from('legacy-bytes'), + contentType: 'image/jpeg', + }); + + const res = await request(app).get('/api/uploads/legacy.jpg').expect(200); + expect(res.headers['content-type']).toMatch(/image\/jpeg/); + expect(res.body.toString()).toBe('legacy-bytes'); + expect(File.findByFileName).toHaveBeenCalledWith('legacy.jpg'); + }); + + it('returns 404 when neither driver nor legacy store has the key', async () => { + mockStore.get.mockResolvedValue(null); File.findByFileName.mockResolvedValue(null); await request(app).get('/api/uploads/missing').expect(404); }); - it('returns 500 on error', async () => { - File.findByFileName.mockRejectedValue(new Error('fail')); - await request(app).get('/api/uploads/oops').expect(500); + it('returns 404 when the legacy record exists but has no data (metadata-only)', async () => { + mockStore.get.mockResolvedValue(null); + File.findByFileName.mockResolvedValue({ data: Buffer.alloc(0), contentType: 'image/png' }); + await request(app).get('/api/uploads/empty.png').expect(404); + }); + + it('returns 500 when the driver throws', async () => { + mockStore.get.mockRejectedValue(new Error('boom')); + await request(app).get('/api/uploads/explode').expect(500); }); }); diff --git a/backend/__tests__/unit/services/objectStore/index.test.ts b/backend/__tests__/unit/services/objectStore/index.test.ts new file mode 100644 index 00000000..6be1580c --- /dev/null +++ b/backend/__tests__/unit/services/objectStore/index.test.ts @@ -0,0 +1,35 @@ +// @ts-nocheck +// ADR-002 Phase 1: driver selection via OBJECT_STORE_DRIVER env. + +const { getObjectStore, __resetObjectStoreForTests } = require('../../../../services/objectStore'); + +describe('getObjectStore (ADR-002 Phase 1 driver selection)', () => { + const ORIGINAL_ENV = process.env; + + afterEach(() => { + process.env = ORIGINAL_ENV; + __resetObjectStoreForTests(); + }); + + beforeEach(() => { + process.env = { ...ORIGINAL_ENV }; + delete process.env.OBJECT_STORE_DRIVER; + __resetObjectStoreForTests(); + }); + + it('defaults to the mongo driver when OBJECT_STORE_DRIVER is unset', () => { + const store = getObjectStore(); + expect(store.capabilities.name).toBe('mongo'); + }); + + it('rejects unknown drivers with a clear error', () => { + process.env.OBJECT_STORE_DRIVER = 'unicorn'; + expect(() => getObjectStore()).toThrow(/unicorn.*not supported/); + }); + + it('caches the resolved driver across calls', () => { + const a = getObjectStore(); + const b = getObjectStore(); + expect(a).toBe(b); + }); +}); diff --git a/backend/__tests__/unit/services/objectStore/mongoDriver.test.ts b/backend/__tests__/unit/services/objectStore/mongoDriver.test.ts new file mode 100644 index 00000000..851d4ff0 --- /dev/null +++ b/backend/__tests__/unit/services/objectStore/mongoDriver.test.ts @@ -0,0 +1,72 @@ +// @ts-nocheck +// ADR-002 Phase 1: MongoObjectStore driver contract. + +const { setupMongoDb, closeMongoDb, clearMongoDb } = require('../../../utils/testUtils'); +const { MongoObjectStore } = require('../../../../services/objectStore/drivers/mongoDriver'); + +async function streamToBuffer(stream) { + const chunks = []; + for await (const chunk of stream) chunks.push(chunk); + return Buffer.concat(chunks); +} + +describe('MongoObjectStore (ADR-002 Phase 1)', () => { + let store; + + beforeAll(async () => { + await setupMongoDb(); + }); + afterAll(async () => { + await closeMongoDb(); + }); + afterEach(async () => { + await clearMongoDb(); + }); + + beforeEach(() => { + store = new MongoObjectStore(); + }); + + it('exposes driver name and a default max size', () => { + expect(store.capabilities.name).toBe('mongo'); + expect(store.capabilities.maxObjectBytes).toBeGreaterThan(0); + }); + + it('round-trips bytes and mime through put/get', async () => { + const body = Buffer.from('hello-commonly'); + await store.put('k1.jpg', body, 'image/jpeg'); + + const got = await store.get('k1.jpg'); + expect(got).not.toBeNull(); + expect(got.mime).toBe('image/jpeg'); + expect(got.size).toBe(body.length); + const bytes = await streamToBuffer(got.stream); + expect(bytes.equals(body)).toBe(true); + }); + + it('returns null for a missing key', async () => { + const got = await store.get('does-not-exist.png'); + expect(got).toBeNull(); + }); + + it('put is idempotent per key (overwrite)', async () => { + await store.put('same.png', Buffer.from('v1'), 'image/png'); + await store.put('same.png', Buffer.from('v2-longer'), 'image/png'); + + const got = await store.get('same.png'); + const bytes = await streamToBuffer(got.stream); + expect(bytes.toString()).toBe('v2-longer'); + expect(got.size).toBe(9); + }); + + it('delete removes a previously stored key', async () => { + await store.put('to-delete.gif', Buffer.from('bye'), 'image/gif'); + await store.delete('to-delete.gif'); + const got = await store.get('to-delete.gif'); + expect(got).toBeNull(); + }); + + it('delete is a no-op for a missing key', async () => { + await expect(store.delete('never-existed')).resolves.toBeUndefined(); + }); +}); diff --git a/backend/models/File.ts b/backend/models/File.ts index 072944e3..007672f7 100644 --- a/backend/models/File.ts +++ b/backend/models/File.ts @@ -5,7 +5,13 @@ export interface IFile extends Document { originalName: string; contentType: string; size: number; - data: Buffer; + /** + * Legacy inline byte storage. For records created before ADR-002 Phase 1 + * this holds the full payload. New records leave it empty — bytes live in + * the configured ObjectStore driver, keyed by `fileName`. Phase 2 removes + * this field entirely after backfilling legacy records. + */ + data?: Buffer; uploadedBy: Types.ObjectId; createdAt: Date; } @@ -19,7 +25,7 @@ const fileSchema = new Schema({ originalName: { type: String, required: true }, contentType: { type: String, required: true }, size: { type: Number, required: true }, - data: { type: Buffer, required: true }, + data: { type: Buffer, required: false }, uploadedBy: { type: Schema.Types.ObjectId, ref: 'User', required: true }, createdAt: { type: Date, default: Date.now }, }); diff --git a/backend/models/MediaObject.ts b/backend/models/MediaObject.ts new file mode 100644 index 00000000..b961a501 --- /dev/null +++ b/backend/models/MediaObject.ts @@ -0,0 +1,44 @@ +/** + * MediaObject — byte blob + the small byte-level metadata (`mime`, `size`) + * any driver needs to serve it back. Owned by the Mongo ObjectStore driver + * (ADR-002 Phase 1). Display/ownership metadata (uploadedBy, originalName) + * stays on `File` in Phase 1 and moves to `Attachment` in Phase 2. + * + * Why a new collection instead of reusing `File`: the ObjectStore interface + * is driver-agnostic. Coupling the Mongo driver to a schema that also carries + * display metadata would leak those concerns across every driver (gcs, s3, + * etc.), contradicting "bytes live in the driver, metadata on the parent + * entity" from REVIEW.md §Attachments. + */ + +import mongoose, { Document, Model, Schema } from 'mongoose'; + +export interface IMediaObject extends Document { + key: string; + data: Buffer; + mime: string; + size: number; + createdAt: Date; +} + +export interface IMediaObjectModel extends Model { + findByKey(key: string): mongoose.Query; +} + +const mediaObjectSchema = new Schema({ + key: { type: String, required: true, unique: true }, + data: { type: Buffer, required: true }, + mime: { type: String, required: true }, + size: { type: Number, required: true }, + createdAt: { type: Date, default: Date.now }, +}); + +mediaObjectSchema.statics.findByKey = function (key: string) { + return this.findOne({ key }); +}; + +export default mongoose.model('MediaObject', mediaObjectSchema); +// CJS compat: let require() return the default export directly +// eslint-disable-next-line @typescript-eslint/no-require-imports +module.exports = exports['default']; +Object.assign(module.exports, exports); diff --git a/backend/routes/uploads.ts b/backend/routes/uploads.ts index 2531f3f1..3adb4d85 100644 --- a/backend/routes/uploads.ts +++ b/backend/routes/uploads.ts @@ -1,3 +1,19 @@ +/** + * Attachment uploads — POST writes bytes through the configured ObjectStore + * driver (ADR-002 Phase 1); GET streams them back, trying the driver first + * and falling back to legacy `File.data` records for backward compatibility. + * + * Phase 1 intentionally does not close the pre-existing authorization gap on + * GET: adding `auth` middleware here would break every `` across + * the app, since browsers cannot attach JWTs to plain image requests. The + * proper fix (signed short-TTL tokens in the URL, minted per-viewer after a + * pod/post ACL check) lands in Phase 1b alongside the frontend changes that + * rewrite upload URLs at render time. The gap is cited as a Critical in + * REVIEW.md §Attachments (ADR-002) — do not approve Phase 1 to production + * without Phase 1b. + */ + +import path from 'path'; // eslint-disable-next-line global-require const express = require('express'); // eslint-disable-next-line global-require @@ -6,6 +22,7 @@ const multer = require('multer'); const File = require('../models/File'); // eslint-disable-next-line global-require const auth = require('../middleware/auth'); +import { getObjectStore } from '../services/objectStore'; interface AuthReq { userId?: string; @@ -21,12 +38,27 @@ interface Res { send: (d: unknown) => void; } +const ALLOWED_EXT_REGEX = /\.(jpg|jpeg|png|gif|webp|svg)$/i; + +// Size cap is driven by the driver's declared max. multer returns 413 itself +// when the multipart exceeds it, so the route doesn't need a secondary check. +// +// This runs at module load, so driver constructors MUST stay pure (no network, +// no DB connects). The current MongoObjectStore only sets capability fields; +// any future driver (GCS, S3) should defer network I/O to first put/get, not +// its constructor, or this require() becomes a silent network call. +const MAX_UPLOAD_BYTES = getObjectStore().capabilities.maxObjectBytes; + const storage = multer.memoryStorage(); const upload = multer({ storage, - limits: { fileSize: 5 * 1024 * 1024 }, - fileFilter(_req: unknown, file: { originalname: string }, cb: (err: Error | null, accept: boolean) => void) { - if (!file.originalname.match(/\.(jpg|jpeg|png|gif|webp|svg|JPG|JPEG|PNG|GIF|WEBP|SVG)$/)) { + limits: { fileSize: MAX_UPLOAD_BYTES }, + fileFilter( + _req: unknown, + file: { originalname: string }, + cb: (err: Error | null, accept: boolean) => void, + ) { + if (!ALLOWED_EXT_REGEX.test(file.originalname)) { return cb(new Error('Only image files are allowed!'), false); } cb(null, true); @@ -39,18 +71,24 @@ router.post('/', auth, upload.single('image'), async (req: AuthReq, res: Res) => try { if (!req.file) return res.status(400).json({ msg: 'No file uploaded' }); + const store = getObjectStore(); + const uniqueSuffix = `${Date.now()}-${Math.round(Math.random() * 1e9)}`; - const fileName = `${uniqueSuffix}.${req.file.originalname.split('.').pop()}`; + const ext = path.extname(req.file.originalname); + const fileName = `${uniqueSuffix}${ext}`; + + await store.put(fileName, req.file.buffer, req.file.mimetype); + // Metadata-only File record. Bytes live in the ObjectStore driver; + // Phase 2 replaces File with a proper Attachment registry and removes + // the legacy `data` field entirely. const newFile = new File({ fileName, originalName: req.file.originalname, contentType: req.file.mimetype, size: req.file.size, - data: req.file.buffer, uploadedBy: req.userId, }); - await newFile.save(); const { protocol } = req; @@ -67,15 +105,30 @@ router.post('/', auth, upload.single('image'), async (req: AuthReq, res: Res) => router.get('/:fileName', async (req: AuthReq, res: Res) => { try { - const file = await File.findByFileName(req.params?.fileName); - if (!file) return res.status(404).json({ msg: 'File not found' }); + const fileName = req.params?.fileName; + if (!fileName) return res.status(400).json({ msg: 'fileName required' }); - res.set('Content-Type', file.contentType); - res.send(file.data); + const store = getObjectStore(); + const obj = await store.get(fileName); + if (obj) { + res.set('Content-Type', obj.mime); + (obj.stream as unknown as { pipe: (dst: unknown) => void }).pipe(res); + return; + } + + // Backward-compat fallback for pre-ADR-002 records with inline bytes. + // Phase 2 backfills these into the driver and removes this fallback. + const legacy = await File.findByFileName(fileName); + if (legacy && legacy.data && legacy.data.length > 0) { + res.set('Content-Type', legacy.contentType); + return res.send(legacy.data); + } + + return res.status(404).json({ msg: 'File not found' }); } catch (err) { const e = err as { message?: string }; console.error('Error retrieving file:', e.message); - res.status(500).json({ msg: 'Server Error' }); + return res.status(500).json({ msg: 'Server Error' }); } }); diff --git a/backend/services/objectStore/ObjectStore.ts b/backend/services/objectStore/ObjectStore.ts new file mode 100644 index 00000000..4f4846d4 --- /dev/null +++ b/backend/services/objectStore/ObjectStore.ts @@ -0,0 +1,52 @@ +/** + * ObjectStore — byte-storage abstraction for attachments (ADR-002). + * + * Drivers encapsulate *where the bytes live*. Metadata (uploadedBy, + * originalName, sha256, etc.) lives on the parent entity (File in Phase 1, + * Attachment/post/message in Phase 2+). A driver's job is narrow: put / get / + * delete opaque bytes keyed by a string. + * + * The interface is intentionally narrow. Capabilities that only some drivers + * support land as optional methods when a driver actually needs them — not + * speculatively. In particular, Phase 3 (GCS) will add either an optional + * `getSignedReadUrl(key, ttl)` method on this interface or a `redirectUrl` + * field on `StoredObject`; the shape is deliberately deferred until the + * driver exists and the route layer has a concrete caller to satisfy. + */ + +export interface ObjectStoreCapabilities { + /** Human-readable driver name; used in logs and /health. Open for + * extension — new drivers do not need to edit this file to register. */ + readonly name: string; + /** Upper bound on a single object's byte size. Route layer uses this to + * size the multipart limit and emit 413 before bytes are buffered. */ + readonly maxObjectBytes: number; +} + +export interface StoredObject { + /** Readable stream of the object's bytes. */ + stream: NodeJS.ReadableStream; + /** MIME type as supplied at `put` time. */ + mime: string; + /** Byte length. */ + size: number; +} + +export interface ObjectStore { + readonly capabilities: ObjectStoreCapabilities; + + /** + * Write bytes under `key`. Overwrites if the key exists. Throws on quota / + * capacity errors so the route layer can return a useful status. + */ + put(key: string, body: Buffer, mime: string): Promise; + + /** + * Read bytes by `key`. Returns `null` if the key does not exist — callers + * should treat null as a 404, not an error. + */ + get(key: string): Promise; + + /** Remove bytes by `key`. No-op if the key does not exist. */ + delete(key: string): Promise; +} diff --git a/backend/services/objectStore/drivers/mongoDriver.ts b/backend/services/objectStore/drivers/mongoDriver.ts new file mode 100644 index 00000000..76e4658b --- /dev/null +++ b/backend/services/objectStore/drivers/mongoDriver.ts @@ -0,0 +1,51 @@ +/** + * Mongo driver for the ObjectStore (ADR-002 Phase 1). + * + * Writes bytes to the `MediaObject` collection. The legacy `File.data` + * records are read by the route layer as a fallback for backward compat; + * they are not managed by this driver. + * + * Default driver when `OBJECT_STORE_DRIVER` is unset — matches current dev + * behavior and docker-compose.local.yml. Not suitable past hobbyist scale + * (bytes in Mongo share IOPS with primary store). Cloud deployments + * configure `gcs` in Phase 3. + */ + +import { Readable } from 'stream'; +import MediaObject from '../../../models/MediaObject'; +import type { ObjectStore, ObjectStoreCapabilities, StoredObject } from '../ObjectStore'; + +const DEFAULT_MAX_BYTES = 10 * 1024 * 1024; // 10 MiB + +export class MongoObjectStore implements ObjectStore { + readonly capabilities: ObjectStoreCapabilities; + + constructor(maxObjectBytes: number = DEFAULT_MAX_BYTES) { + this.capabilities = { name: 'mongo', maxObjectBytes }; + } + + async put(key: string, body: Buffer, mime: string): Promise { + await MediaObject.updateOne( + { key }, + { + $set: { data: body, mime, size: body.length }, + $setOnInsert: { key, createdAt: new Date() }, + }, + { upsert: true }, + ); + } + + async get(key: string): Promise { + const doc = await MediaObject.findByKey(key); + if (!doc) return null; + return { + stream: Readable.from(doc.data), + mime: doc.mime, + size: doc.size, + }; + } + + async delete(key: string): Promise { + await MediaObject.deleteOne({ key }); + } +} diff --git a/backend/services/objectStore/index.ts b/backend/services/objectStore/index.ts new file mode 100644 index 00000000..d05c4380 --- /dev/null +++ b/backend/services/objectStore/index.ts @@ -0,0 +1,39 @@ +/** + * ObjectStore singleton — resolves the configured driver at first access. + * + * Driver selection via env: + * OBJECT_STORE_DRIVER=mongo (default — no setup needed) + * + * Additional drivers (filesystem, gcs, s3) land in later ADR-002 phases. + */ + +import type { ObjectStore } from './ObjectStore'; +import { MongoObjectStore } from './drivers/mongoDriver'; + +export type { ObjectStore, ObjectStoreCapabilities, StoredObject } from './ObjectStore'; + +let instance: ObjectStore | null = null; + +function buildDriver(): ObjectStore { + const driverName = (process.env.OBJECT_STORE_DRIVER || 'mongo').toLowerCase(); + switch (driverName) { + case 'mongo': + return new MongoObjectStore(); + default: + throw new Error( + `OBJECT_STORE_DRIVER="${driverName}" is not supported. ` + + 'Phase 1 ships only: mongo. See ADR-002 for later phases.', + ); + } +} + +/** Returns the configured ObjectStore, building it on first call. */ +export function getObjectStore(): ObjectStore { + if (!instance) instance = buildDriver(); + return instance; +} + +/** Test-only: reset the cached instance so env changes take effect. */ +export function __resetObjectStoreForTests(): void { + instance = null; +} diff --git a/docs/adr/ADR-002-attachments-and-object-storage.md b/docs/adr/ADR-002-attachments-and-object-storage.md index af2a66f5..8cb89539 100644 --- a/docs/adr/ADR-002-attachments-and-object-storage.md +++ b/docs/adr/ADR-002-attachments-and-object-storage.md @@ -274,16 +274,30 @@ Deferred. The `POST /api/media` stream-through keeps upload simpler to reason ab ### Phase 1 — Abstraction without behavior change (one PR) -**Goal:** Introduce the driver interface, wrap the current Mongo-inline code as the `mongo` driver, no user-visible change. +**Goal:** Introduce the driver interface, route bytes through it, no user-visible change. -- [ ] `backend/services/objectStore/ObjectStore.ts` — interface. -- [ ] `backend/services/objectStore/drivers/mongo.ts` — wraps the existing `File` model. -- [ ] `backend/services/objectStore/index.ts` — singleton; selects driver from `OBJECT_STORE_DRIVER` env (defaults to `mongo`). -- [ ] Refactor `backend/routes/uploads.ts` to call into `ObjectStore` instead of `File` directly. -- [ ] **Fix the auth gap on GET** — add `auth` middleware, plus a minimal access check (`referrerUserId == file.uploadedBy || exists(Post|Message).refs this id`). Not perfect but closes the public-read hole. -- [ ] Tests: driver contract suite + mongo driver implementation. +- [x] `backend/services/objectStore/ObjectStore.ts` — interface. +- [x] `backend/services/objectStore/drivers/mongoDriver.ts` — writes bytes to a new `MediaObject` collection (decoupled from `File`, which continues to hold display/ownership metadata for Phase 1 compat). +- [x] `backend/services/objectStore/index.ts` — singleton; selects driver from `OBJECT_STORE_DRIVER` env (defaults to `mongo`). +- [x] Refactor `backend/routes/uploads.ts` to call the driver; GET falls back to legacy `File.data` for pre-ADR-002 records; size cap driven by driver capability (multer `limits.fileSize`). +- [x] Make `File.data` optional so new records are metadata-only; legacy records remain readable. +- [x] Tests: mongo driver contract, env-based selection, and route GET/POST (driver path, legacy fallback, 404, metadata-only save). -**No migration needed** — schema unchanged, route paths unchanged, existing data unchanged. +**Scope discipline:** `filesystem` and `gcs`/`s3` drivers are deferred to their respective phases. Shipping `filesystem` in Phase 1 without a Helm caller would be half of Phase 5 dressed as Phase 1 (REVIEW.md §Over-engineering: *"no abstraction for <3 current users"*). One production driver is enough to validate the interface; contract tests cover behavioral parity. + +**No data migration needed** — schema change is additive (`File.data` optional; `MediaObject` is a new collection), route paths unchanged, existing records still readable via the route's fallback. + +### Phase 1b — Close the GET authorization gap (followup PR) + +**Split out of Phase 1** because the real fix requires coordinated frontend + backend changes: adding plain `auth` middleware on `GET /api/uploads/:fileName` breaks every `` in the app (browsers do not attach `Authorization` headers to image requests), and cookie auth isn't currently wired for the API. Phase 1 therefore leaves GET's existing public-read behavior untouched. + +- [ ] Backend: issue short-TTL (5 min) signed tokens scoped to `fileName + viewerUserId`; GET accepts `?t=` alongside header auth. +- [ ] Backend: per-request ACL check before minting the token — requester must own the file or have read on a pod/post that references it. Phase 1 references are URL substrings (slow scan); Phase 2's structured `attachments` makes this an indexed lookup. +- [ ] Frontend: `rewriteAttachmentUrl(url)` helper at render time that calls `GET /api/media/:id/url` and caches the signed URL until expiry. +- [ ] Rate limiting on the token-mint endpoint. +- [ ] Audit log entry on each mint (`file_id, viewer_id, ip`). + +**Ships before:** any production-scale launch. The ADR-002 invariant in REVIEW.md §Attachments is not satisfied until Phase 1b is live. ### Phase 2 — Structured `attachments` model (one PR)