-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathload_iris.py
More file actions
38 lines (27 loc) · 1.12 KB
/
load_iris.py
File metadata and controls
38 lines (27 loc) · 1.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import argparse
import os
from pathlib import Path
import pandas as pd
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
def main():
# Create argument parser
parser = argparse.ArgumentParser(description='Materialize dataset files.')
# Add arguments
parser.add_argument('--output_dir', type=str, help='output directory where dataset files will be saved.', default=os.getcwd())
parser.add_argument('--name', type=str, help='name of the dataset', default='iris')
# Parse arguments
args = parser.parse_args()
iris = datasets.load_iris(as_frame=True)
# Prepare features
features = iris.data
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)
features_scaled_df = pd.DataFrame(features_scaled, columns=features.columns)
# Prepare labels
labels_df = iris.target.to_frame(name='label')
# Write to disk
features_scaled_df.to_csv(Path(args.output_dir) / f'{args.name}.features.csv', index_label='id')
labels_df.to_csv(Path(args.output_dir) / f'{args.name}.labels.csv', index_label='id')
if __name__ == "__main__":
main()