soda-cli/command_tree.txt at main · sodadata/soda-cli · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
Full Command Tree (WIP)

Legend:
  ✅  implemented with real API call
  🔌  API endpoint exists, not yet wired up in CLI
  🏠  local operation, no API needed
  ❌  no public API endpoint (placeholder / future)

sodacli
  │
  │  Global flags (all commands):
  │    --output table|json|csv    # auto-detects TTY
  │    --profile <name>           # override active profile
  │    --no-color
  │    --quiet
  │    --verbose
  │    --no-interactive           # for CI/CD and AI agents
  │    --version
  │
  ├── version                               # print version info  🏠
  │
  ├── auth
  │   ├── login   [--host <host>]            # default: cloud.soda.io; use cloud.us.soda.io for US region  ✅
  │   │           [--api-key-id <id>]
  │   │           [--api-key-secret <secret>]
  │   │           [--profile <name>]
  │   │           # wizard → host → API key ID + secret → test → save to ~/.soda/credentials
  │   │           # generate API keys: https://docs.soda.io/reference/generate-api-keys
  │   ├── logout  [--profile <name>]  🏠
  │   ├── status                            # active profile, api key id, connection health  ✅
  │   └── switch  <profile>  🏠
  │
  ├── init                                  # scaffold soda.yml + configs/ + contracts/  🏠
  │
  ├── dashboard                             # org dashboard: datasets, results, incidents, recent jobs  ❌ [NOT YET IMPLEMENTABLE - API work required]
  │
  ├── datasource  (alias: ds)
  │   ├── onboard  <config-file-or-datasource-id>       # guided setup: create or connect to datasource + configure all datasets  ✅
  │   │   # pass a YAML config file → creates a new datasource
  │   │   # pass a datasource ID → skips creation, runs onboard flow on existing datasource
  │   │   [--runner <name>]                             # route through a Soda Runner (only when creating; auto-detects if only one)
  │   │   [--monitoring]                                # enable default metric monitors  ✅
  │   │   [--no-monitoring]                             # skip monitoring setup
  │   │   [--profiling]                                 # enable dataset profiling  ✅
  │   │   [--no-profiling]                              # skip profiling setup
  │   │   [--contracts copilot|skeleton|none]                # generate contracts for all datasets (default: none)  🔌 [API wired, backend may not persist]
  │   │   # when all action flags provided: fully non-interactive, onboards all datasets automatically
  │   │   # interactive wizard: runner → discovery poll → dataset select → monitoring → profiling → contracts → verify
  │   │   # after datasource create, suggests: sodacli datasource onboard <id>
  │   │
  │   ├── create  <config-file>                         # register datasource from a YAML connection config  ✅
  │   │   [--runner <runner-id>]                        # route through a Soda Runner (auto-detects if only one)
  │   │   # config file must have: type, name, connection details
  │   │   # returns datasource ID + discovery scan ID
  │   │   # after success, suggests next steps: sodacli datasource onboard <id> or sodacli dataset onboard <dataset-id>
  │   │
  │   ├── test-connection  <config-file>                # test connection via Soda Runner (async poll)  ✅
  │   │   [--runner <runner-id>]                      # route through a specific runner (auto-detects if only one)
  │   │
  │   ├── list                                          # list datasources  ✅
  │   │
  │   ├── get  <id>                                     # show datasource details  ✅
  │   │
  │   ├── update  <id>                                  # update a datasource  ✅
  │   │   [--label <text>]                             # new label
  │   │   [--runner <runner-id>]                       # change runner
  │   │   [--config <file>]                            # update connection config
  │   │
  │   ├── diagnostics  <id>                             # view or configure diagnostics warehouse  ✅
  │       [--enable|--disable]                          # toggle diagnostics warehouse on/off
  │       [--warehouse same|<config-file>]              # reuse datasource connection or provide a separate config
  │       [--table-template <tpl>]                      # table name template (e.g. {dataset_name})
  │       [--collect-results|--no-collect-results]      # store check results and scan history
  │       [--collect-failed-rows|--no-collect-failed-rows]  # store failed rows
  │       [--expose-failed-rows-query|--no-expose-failed-rows-query]
  │       [--max-failed-rows <n>]                       # max failed rows to store
  │       [--failed-rows-location <text>]               # message about where failed rows can be found
  │       [--failed-rows-cta|--no-failed-rows-cta]      # enable/disable CTA button in Cloud
  │       [--failed-rows-cta-title <text>]              # CTA button title
  │       [--failed-rows-cta-url <url>]                 # CTA button URL
  │       [--failed-rows-strategy <type>]               # useDefaultMaxRowCount|absolute|percentage
  │       [--failed-rows-threshold <n>]                # threshold value (required for absolute|percentage, >= 1)
  │       [--failed-rows-threshold-condition <cond>]   # greaterThan|lessThan (default: greaterThan)
  │       # uses read-modify-write: fetches current config, applies changes, posts back
  │       └── test-connection   <id>                    # test the diagnostics warehouse connection  ❌
  │
  │   └── delete  <id>  ✅
  │
  ├── contract
  │   ├── list                                        # list all contracts in Soda Cloud  ✅
  │   ├── create    --dataset <ds/db/schema/table>  🔌 [API wired, backend may not persist yet]
  │   │   --mode skeleton|copilot     # default: skeleton
  │   │   --output <file>
  │   │   [--no-wait]                 # start generation and return immediately (copilot mode only)
  │   │   # skeleton → async: POST /contracts/actions/createSkeleton → poll status → fetch contract
  │   │   # copilot  → async: POST /contracts/actions/generate → poll status → fetch contract (requires license)
  │   ├── lint (alias: validate)  [<file...>]         # validate contract YAML against JSON schema  🏠
  │   │   # validates structure, properties, and types against embedded contract schema
  │   │   # supports multiple files and glob patterns; defaults to contracts/*.yml
  │   │   # no auth or network required
  │   ├── push      [<file>]                         # push contract definition to cloud (upsert)  ✅
  │   │   # reads 'dataset:' field from file to find/create the contract
  │   ├── pull      <identifier>                      # pull contract from cloud → <table>.yml  ✅
  │   │   # identifier is the dataset qualified name: datasource/db/schema/table
  │   ├── diff      [<file>]                         # local vs cloud diff (dataset read from file)  ✅
  │   ├── copilot   [<file>]  [<prompt>]  [--dataset <fqn>]  ❌
  │   │   # no args          → wizard: generate or improve?
  │   │   # file, no prompt  → wizard: what to improve?
  │   │   # --dataset only   → generate from scratch, no prompt needed
  │   │   # file + prompt    → improve existing contract
  │   │   # --no-interactive → fails with clear error describing what's missing
  │   │   --output <file>
  │   ├── verify    <file|dqn>                                 # verify contract checks against data
  │   │   [--datasource <file>]            # datasource config file (required with --local)
  │   │   [--local]                        # run locally via soda-core  🔌 [requires soda-core on PATH]
  │   │   [--push]                         # push results to Soda Cloud (useful with --local)
  │   │   [--no-wait]                      # start verification and return immediately (cloud mode only)  ✅
  │   │   [--set key=value]               # runtime variable overrides (repeatable)
  │   │   # default: push contract → POST /contracts/{id}/verify → poll GET /scans/{id} → results  ✅
  │   │   # --local: shell out to soda-core, requires --datasource, prompts install if missing
  │   │   # exit codes: 0=pass 1=checks failed 2=error 3=auth error
  │   └── proposal                         # PR flow for published contracts  ❌ [NOT YET IMPLEMENTABLE - API work required]
  │       ├── list   [--status open|done|all]
  │       ├── pull   <id>  [--revision <n>]         # download a proposal locally
  │       ├── push   <id>  [<file>]  [--message]    # submit changes
  │       └── close  <id>  [--status done|wontdo]
  │
  ├── job  (alias: scan)
  │   ├── list  ❌ [NOT YET IMPLEMENTABLE - API work required]
  │   │   [--datasource <id>]
  │   │   [--dataset <id>]
  │   │   [--type contract|monitor|all]
  │   │   [--status passing|failing|running|error]
  │   │
  │   │   # ID          DATASOURCE    DATASET     TYPE      STATUS     DATE
  │   │   # sc_abc123   pg_prod       orders      contract  ✗ failing  2026-03-05 08:12
  │   │   # sc_def456   pg_prod       users       monitor   ✓ passing  2026-03-05 06:45
  │   │
  │   ├── status  <id>              # show scan/job status, checks, timing  ✅
  │   ├── logs    <id>  [--follow]  ✅
  │   └── cancel  <id>              # cancel a running scan  🔌 [CLI wired, API returns 404]
  │
  ├── results                              # contract checks + monitor alerts in one place  ✅ (checks only; monitors pending API)
  │   └── list  ✅
  │       [--dataset <id>]                # filter by dataset ID (server-side)
  │       [--dataset-name <pattern>]      # filter by qualified name substring, case-insensitive (client-side)
  │       [--status passing|failing|error]  # client-side filter (API does not support server-side filtering)
  │       [--type check|monitor|all]      # monitor returns graceful "not available" message
  │       [--limit <n>]                   # default: 10
  │       [--sort dataset|name|column|status|date]  # default: date
  │       [--order asc|desc]              # default: desc
  │       [--from <date>]                 # YYYY-MM-DD or ISO8601; show results on or after this date
  │       [--until <date>]                # YYYY-MM-DD or ISO8601; show results on or before this date (inclusive)
  │
  │       # DATASET ID   DATASET                                            TYPE   NAME                          COLUMN    STATUS     DATE
  │       # ds_abc123    snowflakeproduct/SODA_PRODUCT/PUBLIC/ORDERS        check  row_count > 0                           ✓ passing  2026-03-05 08:12
  │       # ds_abc123    snowflakeproduct/SODA_PRODUCT/PUBLIC/ORDERS        check  no_nulls                      order_id  ✗ failing  2026-03-05 08:12
  │       # ds_def456    pg/prod/users                                      check  No missing values             email     ✓ passing  2026-03-05 06:45
  │
  ├── dataset
  │   ├── list    [--filter <query>] [--datasource <name>] [--tag <tag>]  ✅
  │   │   # shows onboarded datasets from ListDatasets + discovered (not yet onboarded) from ListDiscoveredDatasets
  │   │   # columns: id, name, datasource, status (onboarded|not onboarded), checks, monitors, updated
  │   ├── get     <id>                                  # show dataset details  ✅
  │   │   # name, qualified name, datasource, DQ status, checks, incidents, partition column, tags, cloud URL
  │   ├── update  <id>  [--owner <user-id>]  [--tag <tag>]  ✅
  │   │   # --tag is repeatable and replaces all existing tags
  │   │   # --owner takes a user ID (use `sodacli iam user list` to find it)
  │   ├── delete  <id>  ✅
  │   │
  │   ├── attributes  <id>                              # list dataset attributes  🔌 [CLI wired, API documented but returns HTML on dev]
  │   │
  │   ├── time-partition  <id>  --column <col>  ✅
  │   │   # affects both profiling and metric monitoring (anomaly detection window)
  │   │   # no --column → view current partition column (via GET /datasets/{id})
  │   │
  │   ├── profiling  <id>  [--enable|--disable]  ✅
  │   │              [--schedule <cron>]  [--timezone <tz>]
  │   │              [--sampling-rows <n>]
  │   │   # no flags → show current profiling data + column stats
  │   │   └── refresh  <id>                           # trigger a new profiling run  ❌
  │   │
  │   ├── diagnostics  <id>                             # view or configure diagnostics warehouse overrides  ✅
  │   │   # no flags → show current settings (GET /api/v1/datasets/{id}/diagnosticsWarehouse)
  │   │   [--collect-results|--no-collect-results]      # store check results and scan history  ✅
  │   │   [--collect-failed-rows|--no-collect-failed-rows]  ✅
  │   │
  │   ├── onboard  <id>                                  # guided setup for an existing dataset  ✅
  │   │   [--monitoring]                                # enable default metric monitors  ✅
  │   │   [--no-monitoring]                             # skip monitoring setup
  │   │   [--profiling]                                 # enable dataset profiling  ✅
  │   │   [--no-profiling]                              # skip profiling setup
  │   │   [--contracts copilot|skeleton|none]                # generate contract (default: none)  🔌 [API wired, backend may not persist]
  │   │   # when all action flags provided: fully non-interactive
  │   │   # interactive wizard: monitoring → profiling → contracts → verify
  │   │   # works with datasets already in Soda Cloud
  │   │
  │   └── permissions  ✅
  │       ├── list    <id>  ✅
  │       ├── assign  <id>  --role <role-id>  --user <user-id>|--group <group-id>  ✅
  │       │   # read-modify-write on responsibilities array
  │       └── revoke  <id>  --role <role-id>  --user <user-id>|--group <group-id>  ✅
  │           # read-modify-write on responsibilities array
  │
  ├── monitor
  │   ├── list    --dataset <id>  ✅
  │   │           [--type column|custom|dataset]
  │   │           # --dataset is required (no global monitor list endpoint in public API)
  │   │
  │   ├── config  <dataset-id>  [--enable|--disable]  ✅
  │   │           [--schedule <cron>]  [--timezone <tz>]
  │   │   # no flags → show current config (enabled, schedule, monitor count)  ✅
  │   │   # --enable/--disable/--schedule update via POST /metricMonitoring
  │   │
  │   ├── add     --dataset <id>  --type column|custom|dataset
  │   │
  │   │     # --type column  ✅
  │   │     # --column <col>                    (required)
  │   │     # --metric <metric-type>            (required; tab-completion available)
  │   │     #   count, missing-pct, duplicate-pct, distinct-count,
  │   │     #   min, max, avg, sum, std-dev, variance, q1, median, q3,
  │   │     #   min-length, max-length, avg-length, freshness
  │   │     # [--group-by <col>]               (repeatable; partitions monitor by column values)
  │   │     # [--exclude-values <col=v1,v2>]   (repeatable; excludes values from a --group-by column)
  │   │
  │   │     # --type dataset  ❌ [no write endpoint in public API]
  │   │     # --metric <metric-type>
  │   │     #   row-count, freshness, schema, rows-inserted, row-count-change, timeliness
  │   │     # dataset monitors exist by default but enabling them requires a write endpoint
  │   │
  │   │     # --type custom  ✅
  │   │     # --name <name>                     (required)
  │   │     # --sql <query>  |  --sql-file <path>  (required)
  │   │     # --result-metric <col>             (required)
  │   │     # [--column <col>]
  │   │
  │   ├── update  <id>  --dataset <id>  ✅
  │   │   [--enable|--disable]                        # toggle monitor on/off
  │   │   [--sql <query>]                             # update SQL (custom monitors only)
  │   │   [--name <name>]                             # update name (custom monitors only)
  │   │   [--result-metric <col>]                     # update result metric (custom monitors only)
  │   │   # auto-detects monitor type (column vs custom) from dataset config
  │   └── delete  <id>  --dataset <id>  ✅
  │       # looks up monitor type from dataset config, then calls appropriate DELETE
  │
  ├── incident
  │   ├── list    [--status reported|investigating|fixing|resolved]  [--dataset <id>]  🔌 [CLI wired, API documented but returns HTML on dev]
  │   ├── get     <id>  🔌
  │   └── update  <id>  🔌
  │       [--title <text>]
  │       [--severity minor|major|critical]
  │       [--description <text>]
  │       [--assigned-to <email>]
  │       [--status reported|investigating|fixing|resolved]
  │
  ├── notification
  │   ├── rule
  │   │   ├── list  ❌ [NOT YET IMPLEMENTABLE - API work required]
  │   │   ├── add    --name <n>  ❌
  │   │   │          --source check|monitor|all
  │   │   │          --alert warn-fail|fail-only|anomaly
  │   │   │          # scope filters (optional — omit = applies to all of that source):
  │   │   │          [--datasource <label>]
  │   │   │          [--dataset <label>]
  │   │   │          [--dataset-owner <email>]
  │   │   │          [--dataset-tag <tag>]
  │   │   │          # check-only filters:
  │   │   │          [--check-name <val>]       # supports "contains:value"
  │   │   │          [--check-owner <email>]
  │   │   │          # monitor-only filters:
  │   │   │          [--monitor-type <type>]
  │   │   │          # recipients:
  │   │   │          --notify <email|role-id>   (repeatable)
  │   │   │          # options:
  │   │   │          [--granular-results]
  │   │   │          [--message <text>]
  │   │   ├── update <id>  [same flags as add]  ❌
  │   │   └── delete <id>  ❌
  │   │
  │   └── integration
  │       ├── list  ❌
  │       ├── add    slack|teams|webhook  [integration-specific flags]  ❌
  │       ├── test   <id>  ❌
  │       └── delete <id>  ❌
  │
  ├── iam
  │   ├── role
  │   │   ├── list    [--scope global|dataset]  ✅ (dataset-scoped roles only; global roles not in public API)
  │   │   ├── create  --name <n>  --scope global|dataset  ❌
  │   │   │   [--description <text>]
  │   │   │   [--permission <permission>]  (repeatable)
  │   │   │   # permissions: create-api-keys|create-datasets|manage-attributes|
  │   │   │   #              manage-datasources|manage-notification-rules|
  │   │   │   #              manage-org-settings|manage-scan-definitions
  │   │   ├── delete  <id>  ❌
  │   │   └── show    <id>                               # list permissions in this role  ❌
  │   │
  │   ├── user
  │   │   ├── list  ✅
  │   │   ├── invite  --email <email>                    # invite user to org (repeatable, max 10)  ✅
  │   │   ├── remove  <user-id>                          # remove user from org  ❌
  │   │   ├── assign  <user-id>  --role <role-id>  ❌
  │   │   └── revoke  <user-id>  --role <role-id>  ❌
  │   │
  │   ├── group
  │   │   ├── list  ✅
  │   │   ├── create  --name <n>  [--member <email>]  (repeatable)  ✅
  │   │   ├── update  <id>  [--name <n>]  ✅
  │   │   │            [--add-member <email>]     (repeatable)
  │   │   │            [--remove-member <email>]  (repeatable)
  │   │   ├── delete  <id>  ✅
  │   │   ├── assign  <group-id>  --role <role-id>  ❌
  │   │   └── revoke  <group-id>  --role <role-id>  ❌
  │   │
  │   └── service-account
  │       ├── list  ❌ [NOT YET IMPLEMENTABLE - API work required]
  │       ├── create  --name <n>  --email <email>  ❌
  │       └── delete  <id>  ❌
  │
  ├── runner
  │   ├── list    ✅
  │   ├── get     <runner-id>  ✅
  │   ├── create  --name <n>   # returns API key credentials (shown once)  ✅
  │   └── delete  <runner-id>  ✅
  │
  ├── secret
  │   ├── list                             # list all secrets  ✅
  │   ├── get     <id>                     # show secret details  ✅
  │   ├── create  --name <n>  [--value <v>]  ✅
  │   │   # value encrypted client-side (AES-256-GCM + RSA-OAEP) before sending
  │   │   # --value omitted → masked interactive prompt (or pipe via stdin)
  │   │   # reference in configs: ${secret.NAME}
  │   ├── update  <id>  [--value <v>]  ✅
  │   │   # same input methods as create: --value flag, stdin pipe, or masked prompt
  │   └── delete  <id>  ✅
  │
  └── completion  bash|zsh|fish  🏠