data_path = "dbfs:/databricks-datasets/nyctaxi/tripdata/yellow/yellow_tripdata_2019-01.csv.gz" df = spark.read.csv(data_path, header=True, inferSchema=True)
df.write.mode("overwrite").parquet("/mnt/sample-data/nyc-taxi")
display(df.limit(10))
data_path = "dbfs:/databricks-datasets/nyctaxi/tripdata/yellow/yellow_tripdata_2019-01.csv.gz" df = spark.read.csv(data_path, header=True, inferSchema=True)
df.write.mode("overwrite").parquet("/mnt/sample-data/nyc-taxi")
display(df.limit(10))