adobe · rarescheseli · Mar 6, 2026 · Mar 5, 2026 · Mar 5, 2026 · Mar 6, 2026
diff --git a/packages/spacecat-shared-drs-client/README.md b/packages/spacecat-shared-drs-client/README.md
@@ -0,0 +1,159 @@
+# Spacecat Shared - DRS Client
+
+A JavaScript client for the Data Retrieval Service (DRS) API, part of the SpaceCat Shared library. It supports job submission (prompt generation, web scraping), scrape result lookups, and brand detection triggers.
+
+## Installation
+
+Install the package using npm:
+
+```bash
+npm install @adobe/spacecat-shared-drs-client
+```
+
+## Configuration
+
+Set the following environment variables:
+
+- `DRS_API_URL` — Base URL of the DRS API
+- `DRS_API_KEY` — API key for authentication
+
+## Usage
+
+### Creating an instance from Helix UniversalContext
+
+```js
+import DrsClient from '@adobe/spacecat-shared-drs-client';
+
+const client = DrsClient.createFrom(context);
+```
+
+### Constructor
+
+```js
+import DrsClient from '@adobe/spacecat-shared-drs-client';
+
+const client = new DrsClient({
+  apiBaseUrl: '<DRS_API_URL>',
+  apiKey: '<DRS_API_KEY>',
+}, log);
+```
+
+### Methods
+
+#### submitScrapeJob(params)
+
+Submits a web scraping job via the Bright Data provider.
+
+```js
+import { SCRAPE_DATASET_IDS } from '@adobe/spacecat-shared-drs-client';
+
+const result = await client.submitScrapeJob({
+  datasetId: SCRAPE_DATASET_IDS.YOUTUBE_VIDEOS,
+  siteId: 'site-uuid',
+  urls: ['https://www.youtube.com/watch?v=abc123'],
+  priority: 'HIGH', // optional, defaults to 'HIGH'. Also accepts 'LOW'
+});
+// Returns: { job_id: '...', ... }
+```
+
+Valid `datasetId` values (available via `SCRAPE_DATASET_IDS`):
+- `youtube_videos`
+- `youtube_comments`
+- `reddit_posts`
+- `reddit_comments`
+- `wikipedia`
+
+#### lookupScrapeResults(params)
+
+Looks up scraping results for an array of URLs.
+
+```js
+const lookup = await client.lookupScrapeResults({
+  datasetId: SCRAPE_DATASET_IDS.REDDIT_POSTS,
+  siteId: 'site-uuid',
+  urls: ['https://www.reddit.com/r/technology/comments/abc123/post_title/'],
+});
+// Returns:
+// {
+//   results: [
+//     { url: '...', status: 'available', presigned_url: '...', scraped_at: '...', expires_in: 3600 },
+//     { url: '...', status: 'scraping', job_id: '...', message: '...' },
+//     { url: '...', status: 'not_found', message: '...' },
+//   ],
+//   summary: { total: 3, available: 1, scraping: 1, not_found: 1 }
+// }
+```
+
+#### submitPromptGenerationJob(params)
+
+Submits a prompt generation job.
+
+```js
+const result = await client.submitPromptGenerationJob({
+  baseUrl: 'https://example.com',
+  brandName: 'Example',
+  audience: 'consumers',
+  siteId: 'site-uuid',
+  imsOrgId: 'org-uuid',
+  region: 'US',       // optional, defaults to 'US'
+  numPrompts: 50,     // optional, defaults to 50
+  source: 'onboarding', // optional, defaults to 'onboarding'
+});
+```
+
+#### triggerBrandDetection(siteId, options?)
+
+Triggers brand detection re-analysis for a site.
+
+```js
+await client.triggerBrandDetection('site-uuid', { batchId: 'batch-abc', priority: 'HIGH' });
+```
+
+#### getJob(jobId)
+
+Retrieves job status and details.
+
+```js
+const job = await client.getJob('job-uuid');
+```
+
+#### submitJob(params)
+
+Submits a generic job to DRS. Used internally by the higher-level methods, but available for custom job types.
+
+```js
+const result = await client.submitJob({
+  provider_id: 'custom-provider',
+  parameters: { /* ... */ },
+});
+```
+
+## Testing
+
+To run tests:
+
+```bash
+npm test
+```
+
+## Linting
+
+Lint your code:
+
+```bash
+npm run lint
+```
+
+## Cleaning
+
+To remove `node_modules` and `package-lock.json`:
+
+```bash
+npm run clean
+```
+
+## Additional Information
+
+- **Repository**: [GitHub](https://github.com/adobe/spacecat-shared.git)
+- **Issue Tracking**: [GitHub Issues](https://github.com/adobe/spacecat-shared/issues)
+- **License**: Apache-2.0
diff --git a/packages/spacecat-shared-drs-client/src/index.d.ts b/packages/spacecat-shared-drs-client/src/index.d.ts
@@ -26,6 +26,45 @@ interface PromptGenerationParams {
   imsOrgId: string;
 }
 
+export type ScrapeDatasetId = typeof SCRAPE_DATASET_IDS[keyof typeof SCRAPE_DATASET_IDS];
+
+interface ScrapeJobParams {
+  datasetId: ScrapeDatasetId;
+  siteId: string;
+  urls: string[];
+  priority?: 'HIGH' | 'LOW';
+}
+
+interface ScrapeLookupParams {
+  datasetId: ScrapeDatasetId;
+  siteId: string;
+  urls: string[];
+}
+
+export type ScrapeLookupStatus = 'available' | 'scraping' | 'not_found';
+
+export interface ScrapeLookupResult {
+  url: string;
+  status: ScrapeLookupStatus;
+  scraped_at?: string;
+  presigned_url?: string;
+  expires_in?: number;
+  job_id?: string;
+  message?: string;
+}
+
+export interface ScrapeLookupSummary {
+  total: number;
+  available: number;
+  scraping: number;
+  not_found: number;
+}
+
+export interface ScrapeLookupResponse {
+  results: ScrapeLookupResult[];
+  summary: ScrapeLookupSummary;
+}
+
 interface BrandDetectionOptions {
   batchId?: string;
   priority?: string;
@@ -42,8 +81,18 @@ declare class DrsClient {
   isConfigured(): boolean;
   submitJob(params: Record<string, unknown>): Promise<DrsJobResult>;
   submitPromptGenerationJob(params: PromptGenerationParams): Promise<DrsJobResult>;
+  submitScrapeJob(params: ScrapeJobParams): Promise<DrsJobResult>;
+  lookupScrapeResults(params: ScrapeLookupParams): Promise<ScrapeLookupResponse | null>;
   triggerBrandDetection(siteId: string, options?: BrandDetectionOptions): Promise<Record<string, unknown> | null>;
   getJob(jobId: string): Promise<Record<string, unknown>>;
 }
 
+export declare const SCRAPE_DATASET_IDS: Readonly<{
+  YOUTUBE_VIDEOS: 'youtube_videos';
+  YOUTUBE_COMMENTS: 'youtube_comments';
+  REDDIT_POSTS: 'reddit_posts';
+  REDDIT_COMMENTS: 'reddit_comments';
+  WIKIPEDIA: 'wikipedia';
+}>;
+
 export default DrsClient;
diff --git a/packages/spacecat-shared-drs-client/src/index.js b/packages/spacecat-shared-drs-client/src/index.js
@@ -12,6 +12,16 @@
 
 import { hasText, tracingFetch as fetch } from '@adobe/spacecat-shared-utils';
 
+export const SCRAPE_DATASET_IDS = Object.freeze({
+  YOUTUBE_VIDEOS: 'youtube_videos',
+  YOUTUBE_COMMENTS: 'youtube_comments',
+  REDDIT_POSTS: 'reddit_posts',
+  REDDIT_COMMENTS: 'reddit_comments',
+  WIKIPEDIA: 'wikipedia',
+});
+
+const VALID_SCRAPE_DATASET_IDS = new Set(Object.values(SCRAPE_DATASET_IDS));
+
 export default class DrsClient {
   /**
    * Creates a DrsClient from a universal context object.
@@ -139,6 +149,72 @@ export default class DrsClient {
     });
   }
 
+  /**
+   * Submits a scrape job to DRS via the Bright Data provider.
+   * @param {object} params
+   * @param {string} params.datasetId - One of SCRAPE_DATASET_IDS values
+   * @param {string} params.siteId - SpaceCat site ID
+   * @param {string[]} params.urls - URLs to scrape
+   * @param {string} [params.priority='HIGH'] - Job priority (HIGH or LOW)
+   * @returns {Promise<object>} Job result with job_id
+   */
+  async submitScrapeJob({
+    datasetId,
+    siteId,
+    urls,
+    priority = 'HIGH',
+  }) {
+    if (!VALID_SCRAPE_DATASET_IDS.has(datasetId)) {
+      throw new Error(`Invalid dataset_id "${datasetId}". Must be one of: ${[...VALID_SCRAPE_DATASET_IDS].join(', ')}`);
+    }
+    if (!Array.isArray(urls) || urls.length === 0) {
+      throw new Error('urls must be a non-empty array of strings');
+    }
+    if (!hasText(siteId)) {
+      throw new Error('siteId is required');
+    }
+
+    this.log.info(`Submitting DRS scrape job for dataset ${datasetId}`, { datasetId, siteId, urlCount: urls.length });
+
+    return this.submitJob({
+      provider_id: 'brightdata',
+      priority,
+      parameters: {
+        dataset_id: datasetId,
+        site_id: siteId,
+        urls,
+      },
+    });
+  }
+
+  /**
+   * Looks up scraping results for an array of URLs.
+   * @param {object} params
+   * @param {string} params.datasetId - One of SCRAPE_DATASET_IDS values
+   * @param {string} params.siteId - SpaceCat site ID
+   * @param {string[]} params.urls - URLs to look up
+   * @returns {Promise<object>} Lookup results
+   */
+  async lookupScrapeResults({ datasetId, siteId, urls }) {
+    if (!VALID_SCRAPE_DATASET_IDS.has(datasetId)) {
+      throw new Error(`Invalid dataset_id "${datasetId}". Must be one of: ${[...VALID_SCRAPE_DATASET_IDS].join(', ')}`);
+    }
+    if (!Array.isArray(urls) || urls.length === 0) {
+      throw new Error('urls must be a non-empty array of strings');
+    }
+    if (!hasText(siteId)) {
+      throw new Error('siteId is required');
+    }
+
+    this.log.info(`Looking up scrape results for dataset ${datasetId}`, { datasetId, siteId, urlCount: urls.length });
+
+    return this.#request('POST', '/url-lookup', {
+      dataset_id: datasetId,
+      site_id: siteId,
+      urls,
+    });
+  }
+
   /**
    * Triggers brand detection re-analysis on existing data for a site.
    * @param {string} siteId - SpaceCat site ID