77import json
88import logging
99
10- import click
10+ import rich_click as click
11+ from flowyml .cli .rich_utils import recho
1112
1213logger = logging .getLogger (__name__ )
1314
@@ -57,7 +58,7 @@ def run_eval(data, scorers, experiment, threshold, output, fmt):
5758 else :
5859 eval_ds = EvalDataset (name = "cli_dataset" , data = raw_data )
5960 else :
60- click . echo (f"❌ Unsupported data format: { data } . Use .csv or .json" )
61+ recho (f"[red]❌ Unsupported data format: { data } . Use .csv or .json" )
6162 raise SystemExit (1 )
6263
6364 # Build scorers
@@ -67,14 +68,14 @@ def run_eval(data, scorers, experiment, threshold, output, fmt):
6768 scorer = get_scorer (s_name , threshold = threshold )
6869 scorer_list .append (scorer )
6970 except ValueError as e :
70- click . echo (f"❌ { e } " )
71+ recho (f"[red]❌ { e } " )
7172 raise SystemExit (1 )
7273
7374 if not scorer_list :
74- click . echo ( "❌ No scorers specified. Use --scorers <name>" )
75+ recho ( "[red]❌ No scorers specified. Use --scorers <name>" )
7576 raise SystemExit (1 )
7677
77- click . echo (f"🔄 Running { len (scorer_list )} scorer(s) on { eval_ds .num_examples } examples..." )
78+ recho (f"🔄 Running { len (scorer_list )} scorer(s) on { eval_ds .num_examples } examples..." )
7879
7980 # Run evaluation
8081 result = evaluate (
@@ -86,39 +87,39 @@ def run_eval(data, scorers, experiment, threshold, output, fmt):
8687
8788 # Display results
8889 if fmt == "json" :
89- click . echo (json .dumps (result .to_dict (), indent = 2 , default = str ))
90+ recho (json .dumps (result .to_dict (), indent = 2 , default = str ))
9091 elif fmt == "summary" :
91- click . echo (f"\n 📊 Evaluation Summary (ID: { result .eval_id [:8 ]} )" )
92- click . echo (f" Dataset: { result .dataset_name } ({ eval_ds .num_examples } examples)" )
93- click . echo (f" Passed: { '✅' if result .passed else '❌' } " )
94- click . echo (f" Pass Rate: { result .pass_rate :.1%} " )
95- click . echo ("\n Scores:" )
92+ recho (f"\n 📊 Evaluation Summary (ID: { result .eval_id [:8 ]} )" )
93+ recho (f" Dataset: { result .dataset_name } ({ eval_ds .num_examples } examples)" )
94+ recho (f" Passed: { '✅' if result .passed else '❌' } " )
95+ recho (f" Pass Rate: { result .pass_rate :.1%} " )
96+ recho ("\n Scores:" )
9697 for name , value in result .summary .items ():
9798 status = "✅" if result .scores .get (name , [{}])[0 ].passed is not False else "❌"
98- click . echo (f" { status } { name } : { value :.4f} " )
99+ recho (f" { status } { name } : { value :.4f} " )
99100 else :
100101 # Table format
101- click . echo (f"\n { '─' * 60 } " )
102- click . echo (f" 📊 Evaluation Results | ID: { result .eval_id [:8 ]} " )
103- click . echo (f"{ '─' * 60 } " )
104- click . echo (f" { 'Scorer' :<25} { 'Score' :>10} { 'Status' :>8} " )
105- click . echo (f" { '─' * 45 } " )
102+ recho (f"\n { '─' * 60 } " )
103+ recho (f" 📊 Evaluation Results | ID: { result .eval_id [:8 ]} " )
104+ recho (f"{ '─' * 60 } " )
105+ recho (f" { 'Scorer' :<25} { 'Score' :>10} { 'Status' :>8} " )
106+ recho (f" { '─' * 45 } " )
106107 for name , value in result .summary .items ():
107108 feedbacks = result .scores .get (name , [])
108109 passed = feedbacks [0 ].passed if feedbacks else None
109110 status = "✅" if passed is True else ("❌" if passed is False else "—" )
110- click . echo (f" { name :<25} { value :>10.4f} { status :>8} " )
111- click . echo (f"{ '─' * 60 } " )
112- click . echo (
111+ recho (f" { name :<25} { value :>10.4f} { status :>8} " )
112+ recho (f"{ '─' * 60 } " )
113+ recho (
113114 f" Overall: { '✅ PASSED' if result .passed else '❌ FAILED' } | Pass Rate: { result .pass_rate :.1%} " ,
114115 )
115- click . echo (f"{ '─' * 60 } " )
116+ recho (f"{ '─' * 60 } " )
116117
117118 # Save output
118119 if output :
119120 with open (output , "w" ) as f :
120121 json .dump (result .to_dict (), f , indent = 2 , default = str )
121- click . echo (f"\n 💾 Results saved to { output } " )
122+ recho (f"\n 💾 Results saved to { output } " )
122123
123124
124125@eval_cli .command ("list" )
@@ -132,10 +133,10 @@ def list_evals(experiment, limit):
132133
133134 flowyml eval list -e my_experiment -n 10
134135 """
135- click . echo ("📋 Recent Evaluation Runs" )
136- click . echo (f"{ '─' * 70 } " )
137- click . echo (f" { 'ID' :<10} { 'Experiment' :<20} { 'Status' :<12} { 'Scorers' :<20} { 'Date' } " )
138- click . echo (f" { '─' * 65 } " )
136+ recho ("📋 Recent Evaluation Runs" )
137+ recho (f"{ '─' * 70 } " )
138+ recho (f" { 'ID' :<10} { 'Experiment' :<20} { 'Status' :<12} { 'Scorers' :<20} { 'Date' } " )
139+ recho (f" { '─' * 65 } " )
139140
140141 try :
141142 from flowyml .storage .sql import SQLMetadataStore
@@ -148,7 +149,7 @@ def list_evals(experiment, limit):
148149 eval_runs = [r for r in eval_runs if experiment in r .get ("pipeline_name" , "" )]
149150
150151 if not eval_runs :
151- click . echo (" No evaluation runs found." )
152+ recho (" No evaluation runs found." )
152153 else :
153154 for run in eval_runs [:limit ]:
154155 run_id = run .get ("run_id" , "" )[:8 ]
@@ -159,12 +160,12 @@ def list_evals(experiment, limit):
159160 if len (scorer_names ) > 3 :
160161 scorers_str += f" +{ len (scorer_names )- 3 } "
161162 date = run .get ("start_time" , "—" )[:16 ]
162- click . echo (f" { run_id :<10} { exp :<20} { status :<12} { scorers_str :<20} { date } " )
163+ recho (f" { run_id :<10} { exp :<20} { status :<12} { scorers_str :<20} { date } " )
163164
164165 except Exception as e :
165- click . echo (f" ⚠️ Could not load runs: { e } " )
166+ recho (f" ⚠️ Could not load runs: { e } " )
166167
167- click . echo (f"{ '─' * 70 } " )
168+ recho (f"{ '─' * 70 } " )
168169
169170
170171@eval_cli .command ("show" )
@@ -181,16 +182,16 @@ def show_eval(eval_id):
181182 store = SQLMetadataStore ()
182183 run = store .load_run (eval_id )
183184 if not run :
184- click . echo (f"❌ Evaluation '{ eval_id } ' not found" )
185+ recho (f"[red]❌ Evaluation '{ eval_id } ' not found" )
185186 raise SystemExit (1 )
186187
187- click . echo (f"\n 📊 Evaluation: { eval_id } " )
188- click . echo (json .dumps (run , indent = 2 , default = str ))
188+ recho (f"\n 📊 Evaluation: { eval_id } " )
189+ recho (json .dumps (run , indent = 2 , default = str ))
189190
190191 except SystemExit :
191192 raise
192193 except Exception as e :
193- click . echo (f"❌ Error: { e } " )
194+ recho (f"[red]❌ Error: { e } " )
194195
195196
196197@eval_cli .command ("compare" )
@@ -203,11 +204,11 @@ def compare_evals(eval_ids, threshold):
203204 flowyml eval compare abc12345 def67890
204205 """
205206 if len (eval_ids ) < 2 :
206- click . echo ( "❌ Need at least 2 evaluation IDs to compare" )
207+ recho ( "[red]❌ Need at least 2 evaluation IDs to compare" )
207208 raise SystemExit (1 )
208209
209- click . echo (f"\n 📊 Comparing { len (eval_ids )} Evaluations" )
210- click . echo (f"{ '─' * 70 } " )
210+ recho (f"\n 📊 Comparing { len (eval_ids )} Evaluations" )
211+ recho (f"{ '─' * 70 } " )
211212
212213 try :
213214 from flowyml .storage .sql import SQLMetadataStore
@@ -219,34 +220,34 @@ def compare_evals(eval_ids, threshold):
219220 if run :
220221 runs .append (run )
221222 else :
222- click . echo (f" ⚠️ Could not load: { eid } " )
223+ recho (f" ⚠️ Could not load: { eid } " )
223224
224225 if len (runs ) >= 2 :
225226 metrics_a = runs [0 ].get ("metrics" , {})
226227 metrics_b = runs [1 ].get ("metrics" , {})
227228 all_metrics = set (metrics_a .keys ()) | set (metrics_b .keys ())
228229
229- click . echo (
230+ recho (
230231 f" { 'Metric' :<20} { eval_ids [0 ][:8 ]:>10} { eval_ids [1 ][:8 ]:>10} { 'Delta' :>10} { 'Status' :>8} " ,
231232 )
232- click . echo (f" { '─' * 60 } " )
233+ recho (f" { '─' * 60 } " )
233234
234235 for metric in sorted (all_metrics ):
235236 val_a = metrics_a .get (metric , "—" )
236237 val_b = metrics_b .get (metric , "—" )
237238 if isinstance (val_a , (int , float )) and isinstance (val_b , (int , float )):
238239 delta = val_a - val_b
239240 status = "⬆️" if delta > threshold else ("⬇️" if delta < - threshold else "➡️" )
240- click . echo (
241+ recho (
241242 f" { metric :<20} { val_a :>10.4f} { val_b :>10.4f} { delta :>+10.4f} { status } " ,
242243 )
243244 else :
244- click . echo (f" { metric :<20} { str (val_a ):>10} { str (val_b ):>10} " )
245+ recho (f" { metric :<20} { str (val_a ):>10} { str (val_b ):>10} " )
245246
246247 except Exception as e :
247- click . echo (f"❌ Error: { e } " )
248+ recho (f"[red]❌ Error: { e } " )
248249
249- click . echo (f"{ '─' * 70 } " )
250+ recho (f"{ '─' * 70 } " )
250251
251252
252253@eval_cli .command ("scorers" )
@@ -268,18 +269,18 @@ def list_available_scorers(scorer_type):
268269
269270 scorers = list_scorers (scorer_type )
270271
271- click . echo ("\n 🎯 Available Scorers" )
272+ recho ("\n 🎯 Available Scorers" )
272273 if scorer_type :
273- click . echo (f" (filtered: { scorer_type } )" )
274- click . echo (f"{ '─' * 70 } " )
275- click . echo (f" { 'Name' :<25} { 'Type' :<18} { 'Description' } " )
276- click . echo (f" { '─' * 65 } " )
274+ recho (f" (filtered: { scorer_type } )" )
275+ recho (f"{ '─' * 70 } " )
276+ recho (f" { 'Name' :<25} { 'Type' :<18} { 'Description' } " )
277+ recho (f" { '─' * 65 } " )
277278
278279 for s in scorers :
279- click . echo (f" { s ['name' ]:<25} { s ['type' ]:<18} { s ['description' ][:30 ]} " )
280+ recho (f" { s ['name' ]:<25} { s ['type' ]:<18} { s ['description' ][:30 ]} " )
280281
281- click . echo (f"{ '─' * 70 } " )
282- click . echo (f" Total: { len (scorers )} scorer(s)" )
282+ recho (f"{ '─' * 70 } " )
283+ recho (f" Total: { len (scorers )} scorer(s)" )
283284
284285
285286@eval_cli .command ("assert" )
@@ -320,7 +321,7 @@ def assert_eval(data, scorers, min_score, max_score, pass_rate, fail_on_error):
320321 else :
321322 eval_ds = EvalDataset (name = "assert_dataset" , data = raw_data )
322323 else :
323- click . echo (f"❌ Unsupported data format: { data } " )
324+ recho (f"[red]❌ Unsupported data format: { data } " )
324325 raise SystemExit (1 )
325326
326327 # Build scorers
@@ -329,7 +330,7 @@ def assert_eval(data, scorers, min_score, max_score, pass_rate, fail_on_error):
329330 try :
330331 scorer_list .append (get_scorer (s_name ))
331332 except ValueError as e :
332- click . echo (f"❌ { e } " )
333+ recho (f"[red]❌ { e } " )
333334 raise SystemExit (1 )
334335
335336 # Run evaluation
@@ -351,19 +352,19 @@ def assert_eval(data, scorers, min_score, max_score, pass_rate, fail_on_error):
351352 all_passed = False
352353
353354 # Display results
354- click . echo (f"\n { '─' * 60 } " )
355- click . echo (" 🔍 Assertion Results" )
356- click . echo (f"{ '─' * 60 } " )
355+ recho (f"\n { '─' * 60 } " )
356+ recho (" 🔍 Assertion Results" )
357+ recho (f"{ '─' * 60 } " )
357358
358359 for a in assertions .results :
359360 status = "✅" if a .passed else "❌"
360- click . echo (f" { status } { a .name } : { a .message } " )
361+ recho (f" { status } { a .name } : { a .message } " )
361362
362- click . echo (f"{ '─' * 60 } " )
363+ recho (f"{ '─' * 60 } " )
363364
364365 if all_passed :
365- click . echo (" ✅ All assertions PASSED" )
366+ recho (" ✅ All assertions PASSED" )
366367 else :
367- click . echo (" ❌ Some assertions FAILED" )
368+ recho (" ❌ Some assertions FAILED" )
368369 if fail_on_error :
369370 raise SystemExit (1 )
0 commit comments