Skip to content

Commit dae0d2e

Browse files
refactor: cli and fixing errors
1 parent ebf8141 commit dae0d2e

19 files changed

Lines changed: 2856 additions & 737 deletions

File tree

flowyml/cli/evals.py

Lines changed: 62 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
import json
88
import logging
99

10-
import click
10+
import rich_click as click
11+
from flowyml.cli.rich_utils import recho
1112

1213
logger = logging.getLogger(__name__)
1314

@@ -57,7 +58,7 @@ def run_eval(data, scorers, experiment, threshold, output, fmt):
5758
else:
5859
eval_ds = EvalDataset(name="cli_dataset", data=raw_data)
5960
else:
60-
click.echo(f"Unsupported data format: {data}. Use .csv or .json")
61+
recho(f"[red]❌Unsupported data format: {data}. Use .csv or .json")
6162
raise SystemExit(1)
6263

6364
# Build scorers
@@ -67,14 +68,14 @@ def run_eval(data, scorers, experiment, threshold, output, fmt):
6768
scorer = get_scorer(s_name, threshold=threshold)
6869
scorer_list.append(scorer)
6970
except ValueError as e:
70-
click.echo(f"{e}")
71+
recho(f"[red]❌{e}")
7172
raise SystemExit(1)
7273

7374
if not scorer_list:
74-
click.echo("❌ No scorers specified. Use --scorers <name>")
75+
recho("[red]❌No scorers specified. Use --scorers <name>")
7576
raise SystemExit(1)
7677

77-
click.echo(f"🔄 Running {len(scorer_list)} scorer(s) on {eval_ds.num_examples} examples...")
78+
recho(f"🔄 Running {len(scorer_list)} scorer(s) on {eval_ds.num_examples} examples...")
7879

7980
# Run evaluation
8081
result = evaluate(
@@ -86,39 +87,39 @@ def run_eval(data, scorers, experiment, threshold, output, fmt):
8687

8788
# Display results
8889
if fmt == "json":
89-
click.echo(json.dumps(result.to_dict(), indent=2, default=str))
90+
recho(json.dumps(result.to_dict(), indent=2, default=str))
9091
elif fmt == "summary":
91-
click.echo(f"\n📊 Evaluation Summary (ID: {result.eval_id[:8]})")
92-
click.echo(f" Dataset: {result.dataset_name} ({eval_ds.num_examples} examples)")
93-
click.echo(f" Passed: {'✅' if result.passed else '❌'}")
94-
click.echo(f" Pass Rate: {result.pass_rate:.1%}")
95-
click.echo("\n Scores:")
92+
recho(f"\n📊 Evaluation Summary (ID: {result.eval_id[:8]})")
93+
recho(f" Dataset: {result.dataset_name} ({eval_ds.num_examples} examples)")
94+
recho(f" Passed: {'✅' if result.passed else '❌'}")
95+
recho(f" Pass Rate: {result.pass_rate:.1%}")
96+
recho("\n Scores:")
9697
for name, value in result.summary.items():
9798
status = "✅" if result.scores.get(name, [{}])[0].passed is not False else "❌"
98-
click.echo(f" {status} {name}: {value:.4f}")
99+
recho(f" {status} {name}: {value:.4f}")
99100
else:
100101
# Table format
101-
click.echo(f"\n{'─' * 60}")
102-
click.echo(f" 📊 Evaluation Results | ID: {result.eval_id[:8]}")
103-
click.echo(f"{'─' * 60}")
104-
click.echo(f" {'Scorer':<25} {'Score':>10} {'Status':>8}")
105-
click.echo(f" {'─' * 45}")
102+
recho(f"\n{'─' * 60}")
103+
recho(f" 📊 Evaluation Results | ID: {result.eval_id[:8]}")
104+
recho(f"{'─' * 60}")
105+
recho(f" {'Scorer':<25} {'Score':>10} {'Status':>8}")
106+
recho(f" {'─' * 45}")
106107
for name, value in result.summary.items():
107108
feedbacks = result.scores.get(name, [])
108109
passed = feedbacks[0].passed if feedbacks else None
109110
status = "✅" if passed is True else ("❌" if passed is False else "—")
110-
click.echo(f" {name:<25} {value:>10.4f} {status:>8}")
111-
click.echo(f"{'─' * 60}")
112-
click.echo(
111+
recho(f" {name:<25} {value:>10.4f} {status:>8}")
112+
recho(f"{'─' * 60}")
113+
recho(
113114
f" Overall: {'✅ PASSED' if result.passed else '❌ FAILED'} | Pass Rate: {result.pass_rate:.1%}",
114115
)
115-
click.echo(f"{'─' * 60}")
116+
recho(f"{'─' * 60}")
116117

117118
# Save output
118119
if output:
119120
with open(output, "w") as f:
120121
json.dump(result.to_dict(), f, indent=2, default=str)
121-
click.echo(f"\n💾 Results saved to {output}")
122+
recho(f"\n💾 Results saved to {output}")
122123

123124

124125
@eval_cli.command("list")
@@ -132,10 +133,10 @@ def list_evals(experiment, limit):
132133
133134
flowyml eval list -e my_experiment -n 10
134135
"""
135-
click.echo("📋 Recent Evaluation Runs")
136-
click.echo(f"{'─' * 70}")
137-
click.echo(f" {'ID':<10} {'Experiment':<20} {'Status':<12} {'Scorers':<20} {'Date'}")
138-
click.echo(f" {'─' * 65}")
136+
recho("📋 Recent Evaluation Runs")
137+
recho(f"{'─' * 70}")
138+
recho(f" {'ID':<10} {'Experiment':<20} {'Status':<12} {'Scorers':<20} {'Date'}")
139+
recho(f" {'─' * 65}")
139140

140141
try:
141142
from flowyml.storage.sql import SQLMetadataStore
@@ -148,7 +149,7 @@ def list_evals(experiment, limit):
148149
eval_runs = [r for r in eval_runs if experiment in r.get("pipeline_name", "")]
149150

150151
if not eval_runs:
151-
click.echo(" No evaluation runs found.")
152+
recho(" No evaluation runs found.")
152153
else:
153154
for run in eval_runs[:limit]:
154155
run_id = run.get("run_id", "")[:8]
@@ -159,12 +160,12 @@ def list_evals(experiment, limit):
159160
if len(scorer_names) > 3:
160161
scorers_str += f" +{len(scorer_names)-3}"
161162
date = run.get("start_time", "—")[:16]
162-
click.echo(f" {run_id:<10} {exp:<20} {status:<12} {scorers_str:<20} {date}")
163+
recho(f" {run_id:<10} {exp:<20} {status:<12} {scorers_str:<20} {date}")
163164

164165
except Exception as e:
165-
click.echo(f" ⚠️ Could not load runs: {e}")
166+
recho(f" ⚠️ Could not load runs: {e}")
166167

167-
click.echo(f"{'─' * 70}")
168+
recho(f"{'─' * 70}")
168169

169170

170171
@eval_cli.command("show")
@@ -181,16 +182,16 @@ def show_eval(eval_id):
181182
store = SQLMetadataStore()
182183
run = store.load_run(eval_id)
183184
if not run:
184-
click.echo(f"Evaluation '{eval_id}' not found")
185+
recho(f"[red]❌Evaluation '{eval_id}' not found")
185186
raise SystemExit(1)
186187

187-
click.echo(f"\n📊 Evaluation: {eval_id}")
188-
click.echo(json.dumps(run, indent=2, default=str))
188+
recho(f"\n📊 Evaluation: {eval_id}")
189+
recho(json.dumps(run, indent=2, default=str))
189190

190191
except SystemExit:
191192
raise
192193
except Exception as e:
193-
click.echo(f"Error: {e}")
194+
recho(f"[red]❌Error: {e}")
194195

195196

196197
@eval_cli.command("compare")
@@ -203,11 +204,11 @@ def compare_evals(eval_ids, threshold):
203204
flowyml eval compare abc12345 def67890
204205
"""
205206
if len(eval_ids) < 2:
206-
click.echo("❌ Need at least 2 evaluation IDs to compare")
207+
recho("[red]❌Need at least 2 evaluation IDs to compare")
207208
raise SystemExit(1)
208209

209-
click.echo(f"\n📊 Comparing {len(eval_ids)} Evaluations")
210-
click.echo(f"{'─' * 70}")
210+
recho(f"\n📊 Comparing {len(eval_ids)} Evaluations")
211+
recho(f"{'─' * 70}")
211212

212213
try:
213214
from flowyml.storage.sql import SQLMetadataStore
@@ -219,34 +220,34 @@ def compare_evals(eval_ids, threshold):
219220
if run:
220221
runs.append(run)
221222
else:
222-
click.echo(f" ⚠️ Could not load: {eid}")
223+
recho(f" ⚠️ Could not load: {eid}")
223224

224225
if len(runs) >= 2:
225226
metrics_a = runs[0].get("metrics", {})
226227
metrics_b = runs[1].get("metrics", {})
227228
all_metrics = set(metrics_a.keys()) | set(metrics_b.keys())
228229

229-
click.echo(
230+
recho(
230231
f" {'Metric':<20} {eval_ids[0][:8]:>10} {eval_ids[1][:8]:>10} {'Delta':>10} {'Status':>8}",
231232
)
232-
click.echo(f" {'─' * 60}")
233+
recho(f" {'─' * 60}")
233234

234235
for metric in sorted(all_metrics):
235236
val_a = metrics_a.get(metric, "—")
236237
val_b = metrics_b.get(metric, "—")
237238
if isinstance(val_a, (int, float)) and isinstance(val_b, (int, float)):
238239
delta = val_a - val_b
239240
status = "⬆️" if delta > threshold else ("⬇️" if delta < -threshold else "➡️")
240-
click.echo(
241+
recho(
241242
f" {metric:<20} {val_a:>10.4f} {val_b:>10.4f} {delta:>+10.4f} {status}",
242243
)
243244
else:
244-
click.echo(f" {metric:<20} {str(val_a):>10} {str(val_b):>10}")
245+
recho(f" {metric:<20} {str(val_a):>10} {str(val_b):>10}")
245246

246247
except Exception as e:
247-
click.echo(f"Error: {e}")
248+
recho(f"[red]❌Error: {e}")
248249

249-
click.echo(f"{'─' * 70}")
250+
recho(f"{'─' * 70}")
250251

251252

252253
@eval_cli.command("scorers")
@@ -268,18 +269,18 @@ def list_available_scorers(scorer_type):
268269

269270
scorers = list_scorers(scorer_type)
270271

271-
click.echo("\n🎯 Available Scorers")
272+
recho("\n🎯 Available Scorers")
272273
if scorer_type:
273-
click.echo(f" (filtered: {scorer_type})")
274-
click.echo(f"{'─' * 70}")
275-
click.echo(f" {'Name':<25} {'Type':<18} {'Description'}")
276-
click.echo(f" {'─' * 65}")
274+
recho(f" (filtered: {scorer_type})")
275+
recho(f"{'─' * 70}")
276+
recho(f" {'Name':<25} {'Type':<18} {'Description'}")
277+
recho(f" {'─' * 65}")
277278

278279
for s in scorers:
279-
click.echo(f" {s['name']:<25} {s['type']:<18} {s['description'][:30]}")
280+
recho(f" {s['name']:<25} {s['type']:<18} {s['description'][:30]}")
280281

281-
click.echo(f"{'─' * 70}")
282-
click.echo(f" Total: {len(scorers)} scorer(s)")
282+
recho(f"{'─' * 70}")
283+
recho(f" Total: {len(scorers)} scorer(s)")
283284

284285

285286
@eval_cli.command("assert")
@@ -320,7 +321,7 @@ def assert_eval(data, scorers, min_score, max_score, pass_rate, fail_on_error):
320321
else:
321322
eval_ds = EvalDataset(name="assert_dataset", data=raw_data)
322323
else:
323-
click.echo(f"Unsupported data format: {data}")
324+
recho(f"[red]❌Unsupported data format: {data}")
324325
raise SystemExit(1)
325326

326327
# Build scorers
@@ -329,7 +330,7 @@ def assert_eval(data, scorers, min_score, max_score, pass_rate, fail_on_error):
329330
try:
330331
scorer_list.append(get_scorer(s_name))
331332
except ValueError as e:
332-
click.echo(f"{e}")
333+
recho(f"[red]❌{e}")
333334
raise SystemExit(1)
334335

335336
# Run evaluation
@@ -351,19 +352,19 @@ def assert_eval(data, scorers, min_score, max_score, pass_rate, fail_on_error):
351352
all_passed = False
352353

353354
# Display results
354-
click.echo(f"\n{'─' * 60}")
355-
click.echo(" 🔍 Assertion Results")
356-
click.echo(f"{'─' * 60}")
355+
recho(f"\n{'─' * 60}")
356+
recho(" 🔍 Assertion Results")
357+
recho(f"{'─' * 60}")
357358

358359
for a in assertions.results:
359360
status = "✅" if a.passed else "❌"
360-
click.echo(f" {status} {a.name}: {a.message}")
361+
recho(f" {status} {a.name}: {a.message}")
361362

362-
click.echo(f"{'─' * 60}")
363+
recho(f"{'─' * 60}")
363364

364365
if all_passed:
365-
click.echo(" ✅ All assertions PASSED")
366+
recho(" ✅ All assertions PASSED")
366367
else:
367-
click.echo(" ❌ Some assertions FAILED")
368+
recho(" ❌ Some assertions FAILED")
368369
if fail_on_error:
369370
raise SystemExit(1)

0 commit comments

Comments
 (0)