From 79c68ee9e7a4a9e4715369487e1408200623b4ae Mon Sep 17 00:00:00 2001 From: Liu Haixin Date: Thu, 12 Mar 2026 00:07:34 -0400 Subject: [PATCH] "Add full interactive dashboard with proposal, KPI cards, and visualizations" --- streamlit_app.py | 560 +++++++++++++++++++++++++++++++++++++++++++++-- utils.py | 201 ++++++++++++++--- 2 files changed, 720 insertions(+), 41 deletions(-) diff --git a/streamlit_app.py b/streamlit_app.py index 4e225ba..bfc5a1c 100644 --- a/streamlit_app.py +++ b/streamlit_app.py @@ -1,19 +1,551 @@ +import pandas as pd +import plotly.express as px +import plotly.graph_objects as go import streamlit as st +from utils import ( + TRANSIT_MODES, + get_holiday_df, + get_latest_recovery, + get_weekday_weekend_comparison, + load_mta_data, +) + st.set_page_config(page_title="MTA Ridership Dashboard", layout="wide") st.title("🚇 MTA Ridership Recovery Dashboard") -st.markdown(""" -This dashboard explores MTA ridership trends and COVID-19 recovery patterns -across different transit services in New York City. -""") - -st.subheader("Research Questions") -st.markdown(""" -1. How do weekday vs. weekend travel patterns differ across MTA services? -2. How have holidays impacted ridership? -3. What are the recovery rates across different MTA services since COVID-19? -""") - -st.markdown("---") -st.markdown("**Team bouncing-penguin:** Haixin & Hanghai Li") +st.markdown( + "Exploring MTA ridership trends and COVID-19 recovery patterns " + "across transit services in New York City." +) + +# --------------------------------------------------------------------------- +# Tabs: Dashboard vs Proposal +# --------------------------------------------------------------------------- +tab_dashboard, tab_proposal = st.tabs(["📊 Dashboard", "📝 Proposal"]) + +# =========================== +# DATA LOADING (cached) +# =========================== + + +@st.cache_data(ttl=3600) +def fetch_data(): + return load_mta_data() + + +try: + df = fetch_data() + data_loaded = True +except Exception as e: + data_loaded = False + st.error(f"Failed to load data: {e}") + +# =========================== +# DASHBOARD TAB +# =========================== +with tab_dashboard: + if not data_loaded: + st.warning("Could not load MTA data. Please try again later.") + st.stop() + + # -- Sidebar filters -- + st.sidebar.header("Filters") + + min_date = df["date"].min().date() + max_date = df["date"].max().date() + date_range = st.sidebar.date_input( + "Date range", + value=(min_date, max_date), + min_value=min_date, + max_value=max_date, + ) + if len(date_range) == 2: + start_date, end_date = date_range + mask = (df["date"].dt.date >= start_date) & (df["date"].dt.date <= end_date) + filtered = df[mask].copy() + else: + filtered = df.copy() + + selected_modes = st.sidebar.multiselect( + "Transit modes", + options=list(TRANSIT_MODES.keys()), + default=["Subway", "Bus", "LIRR", "Metro-North"], + ) + + rolling_window = st.sidebar.slider( + "Rolling average (days)", min_value=1, max_value=60, value=7 + ) + + # ------------------------------------------------------- + # Section 1: KPI Cards + # ------------------------------------------------------- + st.subheader("Current Recovery Snapshot") + st.caption("Average recovery rate over the most recent 30 days in the dataset") + + recovery = get_latest_recovery(filtered, days=30) + kpi_cols = st.columns(len(recovery)) + for i, (mode, rate) in enumerate(recovery.items()): + with kpi_cols[i]: + st.metric( + label=mode, + value=f"{rate:.0%}", + delta=None, + ) + + st.markdown("---") + + # ------------------------------------------------------- + # Section 2: Recovery Trend (interactive plotly) + # ------------------------------------------------------- + st.subheader("Recovery Trend Over Time") + + fig_recovery = go.Figure() + for mode in selected_modes: + col = TRANSIT_MODES[mode]["recovery"] + if col not in filtered.columns: + continue + series = filtered.set_index("date")[col].rolling(rolling_window).mean() + fig_recovery.add_trace( + go.Scatter( + x=series.index, + y=series.values, + mode="lines", + name=mode, + ) + ) + + # Baseline + fig_recovery.add_hline( + y=1.0, + line_dash="dash", + line_color="gray", + annotation_text="Pre-pandemic baseline (100%)", + ) + + fig_recovery.update_layout( + yaxis_title="% of Pre-Pandemic Ridership", + xaxis_title="Date", + hovermode="x unified", + legend=dict(orientation="h", y=-0.15), + height=500, + yaxis_tickformat=".0%", + ) + st.plotly_chart(fig_recovery, use_container_width=True) + + # ------------------------------------------------------- + # Section 3: Total Ridership Trend + # ------------------------------------------------------- + st.subheader("Total Daily Ridership") + + fig_total = go.Figure() + for mode in selected_modes: + col = TRANSIT_MODES[mode]["ridership"] + if col not in filtered.columns: + continue + series = filtered.set_index("date")[col].rolling(rolling_window).mean() + fig_total.add_trace( + go.Scatter( + x=series.index, + y=series.values, + mode="lines", + name=mode, + ) + ) + + fig_total.update_layout( + yaxis_title="Daily Ridership", + xaxis_title="Date", + hovermode="x unified", + legend=dict(orientation="h", y=-0.15), + height=500, + ) + st.plotly_chart(fig_total, use_container_width=True) + + # ------------------------------------------------------- + # Section 4: Weekday vs Weekend + # ------------------------------------------------------- + st.subheader("Weekday vs Weekend Recovery") + + available_years = sorted(filtered["year"].unique()) + selected_year = st.selectbox( + "Select year for comparison", + options=["All Years"] + available_years, + index=0, + ) + + year_val = None if selected_year == "All Years" else int(selected_year) + comparison = get_weekday_weekend_comparison(filtered, year=year_val) + + if not comparison.empty: + # Grouped bar chart + comp_melted = comparison.melt( + id_vars="Transit Mode", + value_vars=["Weekday Avg Recovery", "Weekend Avg Recovery"], + var_name="Day Type", + value_name="Recovery Rate", + ) + comp_melted["Day Type"] = comp_melted["Day Type"].str.replace( + " Avg Recovery", "" + ) + + fig_wkday = px.bar( + comp_melted, + x="Transit Mode", + y="Recovery Rate", + color="Day Type", + barmode="group", + color_discrete_map={"Weekday": "#636EFA", "Weekend": "#EF553B"}, + ) + fig_wkday.update_layout( + yaxis_tickformat=".0%", + yaxis_title="Avg Recovery Rate (% of Pre-Pandemic)", + height=450, + ) + st.plotly_chart(fig_wkday, use_container_width=True) + + # Weekday vs Weekend gap over time (monthly) + st.markdown("**Monthly Weekday-Weekend Gap (Subway)**") + subway_col = TRANSIT_MODES["Subway"]["recovery"] + if subway_col in filtered.columns: + monthly = ( + filtered.groupby(["year_month", "is_weekend"])[subway_col] + .mean() + .unstack() + .rename(columns={False: "Weekday", True: "Weekend"}) + ) + monthly["Gap"] = monthly["Weekend"] - monthly["Weekday"] + monthly = monthly.reset_index() + + fig_gap = px.bar( + monthly, + x="year_month", + y="Gap", + color="Gap", + color_continuous_scale=["#EF553B", "#CCCCCC", "#636EFA"], + color_continuous_midpoint=0, + ) + fig_gap.update_layout( + xaxis_title="Month", + yaxis_title="Weekend - Weekday Recovery Gap", + yaxis_tickformat=".0%", + height=350, + showlegend=False, + xaxis=dict(tickangle=-45, dtick=3), + ) + st.plotly_chart(fig_gap, use_container_width=True) + st.caption( + "Positive values mean weekend recovery is higher than weekday. " + "This is consistent with reduced weekday commuting due to remote work." + ) + + # ------------------------------------------------------- + # Section 5: Holiday Impact + # ------------------------------------------------------- + st.subheader("Holiday & Event Impact on Ridership") + + holidays_df = get_holiday_df() + holiday_names = sorted(holidays_df["holiday"].unique()) + selected_holidays = st.multiselect( + "Select holidays/events to highlight", + options=holiday_names, + default=["Thanksgiving", "Christmas", "Congestion Pricing Launch"], + ) + + if selected_holidays: + fig_holiday = go.Figure() + + # Plot subway recovery as the background line + subway_col = TRANSIT_MODES["Subway"]["recovery"] + if subway_col in filtered.columns: + series = filtered.set_index("date")[subway_col].rolling(7).mean() + fig_holiday.add_trace( + go.Scatter( + x=series.index, + y=series.values, + mode="lines", + name="Subway (7-day avg)", + line=dict(color="#636EFA"), + ) + ) + + # Add vertical lines for selected holidays + colors = px.colors.qualitative.Set2 + sel_holidays = holidays_df[holidays_df["holiday"].isin(selected_holidays)] + for i, holiday in enumerate(selected_holidays): + dates = sel_holidays[sel_holidays["holiday"] == holiday]["date"] + color = colors[i % len(colors)] + for j, d in enumerate(dates): + if filtered["date"].min() <= d <= filtered["date"].max(): + fig_holiday.add_vline( + x=d, + line_dash="dot", + line_color=color, + annotation_text=holiday if j == 0 else None, + annotation_position="top left", + ) + + fig_holiday.update_layout( + yaxis_title="Subway Recovery (% of Pre-Pandemic)", + xaxis_title="Date", + yaxis_tickformat=".0%", + hovermode="x unified", + height=500, + ) + st.plotly_chart(fig_holiday, use_container_width=True) + + # Holiday impact table + st.markdown("**Average Subway Ridership Around Holidays**") + impact_rows = [] + for _, row in sel_holidays.iterrows(): + h_date = row["date"] + # 3-day window around the holiday + window = filtered[ + (filtered["date"] >= h_date - pd.Timedelta(days=1)) + & (filtered["date"] <= h_date + pd.Timedelta(days=1)) + ] + # Surrounding week for comparison + baseline = filtered[ + (filtered["date"] >= h_date - pd.Timedelta(days=8)) + & (filtered["date"] < h_date - pd.Timedelta(days=1)) + ] + if not window.empty and not baseline.empty: + h_avg = window[subway_col].mean() + b_avg = baseline[subway_col].mean() + impact_rows.append( + { + "Holiday": row["holiday"], + "Date": h_date.strftime("%Y-%m-%d"), + "Holiday Recovery": f"{h_avg:.0%}", + "Prior Week Recovery": f"{b_avg:.0%}", + "Change": f"{h_avg - b_avg:+.0%}", + } + ) + if impact_rows: + st.dataframe( + pd.DataFrame(impact_rows), + use_container_width=True, + hide_index=True, + ) + + # ------------------------------------------------------- + # Section 6: Year-over-Year Recovery + # ------------------------------------------------------- + st.subheader("Year-over-Year Recovery by Transit Mode") + + yearly_rows = [] + for year in sorted(filtered["year"].unique()): + year_data = filtered[filtered["year"] == year] + for mode, cols in TRANSIT_MODES.items(): + col = cols["recovery"] + if col in year_data.columns: + avg = year_data[col].mean() + yearly_rows.append( + { + "Year": str(year), + "Transit Mode": mode, + "Avg Recovery": avg, + } + ) + + if yearly_rows: + yearly_df = pd.DataFrame(yearly_rows) + fig_yearly = px.bar( + yearly_df, + x="Year", + y="Avg Recovery", + color="Transit Mode", + barmode="group", + ) + fig_yearly.update_layout( + yaxis_title="Avg Recovery Rate (% of Pre-Pandemic)", + yaxis_tickformat=".0%", + height=450, + ) + st.plotly_chart(fig_yearly, use_container_width=True) + + # ------------------------------------------------------- + # Section 7: Day-of-Week Heatmap + # ------------------------------------------------------- + st.subheader("Ridership by Day of Week") + + heatmap_mode = st.selectbox( + "Select transit mode for heatmap", + options=list(TRANSIT_MODES.keys()), + index=0, + ) + heatmap_col = TRANSIT_MODES[heatmap_mode]["recovery"] + if heatmap_col in filtered.columns: + pivot = filtered.groupby(["year", "day_name"])[heatmap_col].mean().reset_index() + day_order = [ + "Monday", + "Tuesday", + "Wednesday", + "Thursday", + "Friday", + "Saturday", + "Sunday", + ] + pivot["day_name"] = pd.Categorical( + pivot["day_name"], categories=day_order, ordered=True + ) + pivot = pivot.sort_values(["year", "day_name"]) + pivot_wide = pivot.pivot(index="day_name", columns="year", values=heatmap_col) + + fig_heat = px.imshow( + pivot_wide, + color_continuous_scale="RdYlGn", + aspect="auto", + labels=dict(x="Year", y="Day of Week", color="Recovery %"), + zmin=0, + zmax=1.2, + ) + fig_heat.update_layout(height=350) + st.plotly_chart(fig_heat, use_container_width=True) + + # ------------------------------------------------------- + # Footer + # ------------------------------------------------------- + st.markdown("---") + st.caption( + "Data source: [MTA Daily Ridership Data](https://data.ny.gov/Transportation/" + "MTA-Daily-Ridership-Data-Beginning-2020/vxuj-8kew) | " + "Team bouncing-penguin: Haixin Liu & Hanghai Li" + ) + +# =========================== +# PROPOSAL TAB +# =========================== +with tab_proposal: + st.header("Project Proposal") + + st.subheader("Background and Motivation") + st.markdown(""" + The COVID-19 pandemic caused an unprecedented drop in public transit ridership across + New York City. At its lowest point in April 2020, subway ridership fell to roughly + 10% of normal levels, and other MTA services experienced similar declines. Since then, + ridership has been gradually recovering, but the pace and pattern of that recovery + has varied significantly depending on the transit mode, time of week, and even + specific events or holidays. + + As of late 2025, subway ridership has climbed back to about 85% of pre-pandemic levels, + while the Long Island Rail Road (LIRR) has reached 92% and Metro-North sits at around 88%. + Bridges and tunnels traffic has actually exceeded pre-pandemic levels at roughly 105%, + suggesting a shift in how New Yorkers choose to commute. Paratransit ridership has surged + to 161% of pre-pandemic levels, pointing to growing demand for accessible transit options. + These differences raise interesting questions about what drives recovery in different parts + of the transit system and whether these patterns will continue. + + Understanding these recovery dynamics matters not just for transit planning but also + for broader urban policy. Transit ridership affects fare revenue, congestion, air quality, + and economic activity across the region. + """) + + st.subheader("Research Questions") + st.markdown(""" + We started this project with three main research questions. After working through the + data over the past few weeks, we've refined them based on what we've actually been + able to observe: + + **1. How do weekday vs. weekend ridership patterns differ across MTA services, and + has that gap changed over time?** + + Our original question was simply about weekday vs. weekend differences, but we've found + that the more interesting story is how that gap has evolved. Early in the pandemic, + weekend ridership actually recovered faster than weekday ridership for subways and + commuter rail, likely due to remote work reducing weekday commuting. We want to explore + whether that trend has continued or whether weekday ridership is catching up as + return-to-office policies have become more common. + + **2. How do holidays and major events affect ridership across different transit modes?** + + We initially framed this broadly, but we're now focusing on specific events: major holidays + (Thanksgiving, Christmas, July 4th, New Year's), large-scale events (marathon, Times Square + NYE), and policy changes like the launch of congestion pricing in early 2025. The congestion + pricing angle is particularly interesting because it directly connects transit policy to + ridership behavior. + + **3. Which transit modes have recovered fastest, and what factors explain the differences?** + + This remains our core question, but we've added more nuance. Rather than just looking at + which mode recovered fastest, we're now also examining the rate of recovery over time. + For example, LIRR's recovery accelerated after Grand Central Madison opened, and bus + ridership got a boost from the Queens Bus Network Redesign. We want to see whether these + service improvements show up clearly in the data. + """) + + st.subheader("Dataset") + st.markdown(""" + We're using the **MTA Daily Ridership Data** from the New York State Open Data portal + (data.ny.gov), which is updated daily and covers all major MTA services starting from + March 2020. + + The dataset includes daily total ridership estimates and the percentage of comparable + pre-pandemic day ridership for each transit mode: Subways, Buses, LIRR, Metro-North, + Access-A-Ride, and Bridges & Tunnels. This gives us both absolute numbers and a built-in + recovery metric (the pre-pandemic percentage), which makes cross-mode comparison + straightforward. + + One limitation we've noticed is that the "comparable pre-pandemic day" metric can be noisy + around holidays, since the comparison day may not perfectly match the current day's + conditions. We handle this by using rolling averages for trend analysis instead of relying + on individual daily values. + + We pull the data directly from the NYC Open Data API so the dashboard always reflects + the most recent available data without needing manual updates. + """) + + st.subheader("Methodology") + st.markdown(""" + Our analysis approach includes the following: + + - **Recovery trend analysis:** We track the pre-pandemic percentage for each transit mode + over time, using 7-day and 30-day rolling averages to smooth out daily fluctuations. + This helps us identify the overall trajectory and any inflection points. + + - **Weekday vs. weekend comparison:** We categorize each day as weekday or weekend, + then compare average ridership and recovery rates for each transit mode across these + two groups. We also look at how this gap has changed year over year. + + - **Holiday and event impact:** We flag known holidays and major events in the data + and examine ridership patterns in the days surrounding them. We compare holiday + ridership to the surrounding week's average to quantify the impact. + + - **Cross-mode comparison:** We rank transit modes by their recovery rate and visualize + them side by side. We also look at whether modes that serve different geographic areas + or rider demographics have recovered differently. + + All visualizations use Plotly for interactivity, allowing users to zoom in on specific + time periods, toggle transit modes on and off, and hover over data points for details. + """) + + st.subheader("Preliminary Findings") + st.markdown(""" + Based on our analysis so far, here are the key patterns we've identified: + + - **Commuter rail is recovering faster than subway and bus.** LIRR leads at 92% recovery, + followed by Metro-North at 88%, while subway sits at 85%. This may reflect the + return-to-office trend among suburban commuters and service improvements like Grand + Central Madison. + + - **Bridges and tunnels traffic has fully recovered and then some**, currently at 105% of + pre-pandemic levels. This suggests some riders may have permanently shifted from transit + to driving, or that overall regional travel volume has increased. + + - **Weekend ridership recovery has been proportionally stronger than weekday ridership** + for subway and bus, consistent with the shift toward remote and hybrid work reducing + traditional weekday commuting. + + - **Paratransit demand has surged well beyond pre-pandemic levels** (161%), indicating + growing need for accessible transit services that goes beyond simple pandemic recovery. + + - **Recovery is not linear.** There are clear seasonal dips (winter holidays, summer), + and specific events like congestion pricing launch appear to have boosted transit + ridership in early 2025. + + These findings are preliminary and will be refined as we build out the full dashboard + with interactive visualizations. + """) + + st.markdown("---") + st.markdown("**Team bouncing-penguin:** Haixin Liu & Hanghai Li") diff --git a/utils.py b/utils.py index 75a4eff..5e7a5d6 100644 --- a/utils.py +++ b/utils.py @@ -1,5 +1,13 @@ -import pandas as pd import matplotlib.pyplot as plt +import pandas as pd + + +def load_mta_data() -> pd.DataFrame: + """Load MTA ridership data from NYC Open Data API.""" + url = "https://data.ny.gov/resource/vxuj-8kew.csv?$limit=50000" + df = pd.read_csv(url) + df = clean_mta_df(df) + return df def clean_mta_df(df: pd.DataFrame) -> pd.DataFrame: @@ -11,9 +19,163 @@ def clean_mta_df(df: pd.DataFrame) -> pd.DataFrame: out["date"] = pd.to_datetime(out["date"]) out = out.sort_values("date").reset_index(drop=True) + # Normalize column names: API may return _of_ format, + # we standardize to _pct_of_ to match our tests and code + rename_map = {} + for col in out.columns: + if "_of_comparable_pre_pandemic_day" in col and "_pct_of_" not in col: + new_col = col.replace( + "_of_comparable_pre_pandemic_day", + "_pct_of_comparable_pre_pandemic_day", + ) + rename_map[col] = new_col + if rename_map: + out = out.rename(columns=rename_map) + + # Make sure numeric columns are actually numeric + numeric_cols = [ + "subways_total_estimated_ridership", + "subways_pct_of_comparable_pre_pandemic_day", + "buses_total_estimated_ridership", + "buses_pct_of_comparable_pre_pandemic_day", + "lirr_total_estimated_ridership", + "lirr_pct_of_comparable_pre_pandemic_day", + "metro_north_total_estimated_ridership", + "metro_north_pct_of_comparable_pre_pandemic_day", + "bridges_and_tunnels_total_traffic", + "bridges_and_tunnels_pct_of_comparable_pre_pandemic_day", + ] + for col in numeric_cols: + if col in out.columns: + out[col] = pd.to_numeric(out[col], errors="coerce") + + # Add useful time columns + out["day_of_week"] = out["date"].dt.dayofweek + out["day_name"] = out["date"].dt.day_name() + out["is_weekend"] = out["day_of_week"] >= 5 + out["year"] = out["date"].dt.year + out["month"] = out["date"].dt.month + out["year_month"] = out["date"].dt.to_period("M").astype(str) + return out +# Mapping from friendly names to column names +TRANSIT_MODES = { + "Subway": { + "ridership": "subways_total_estimated_ridership", + "recovery": "subways_pct_of_comparable_pre_pandemic_day", + }, + "Bus": { + "ridership": "buses_total_estimated_ridership", + "recovery": "buses_pct_of_comparable_pre_pandemic_day", + }, + "LIRR": { + "ridership": "lirr_total_estimated_ridership", + "recovery": "lirr_pct_of_comparable_pre_pandemic_day", + }, + "Metro-North": { + "ridership": "metro_north_total_estimated_ridership", + "recovery": "metro_north_pct_of_comparable_pre_pandemic_day", + }, + "Bridges & Tunnels": { + "ridership": "bridges_and_tunnels_total_traffic", + "recovery": "bridges_and_tunnels_pct_of_comparable_pre_pandemic_day", + }, +} + + +# US federal holidays and NYC-relevant events +HOLIDAYS = { + "New Year's Day": [ + "2020-01-01", + "2021-01-01", + "2022-01-01", + "2023-01-01", + "2024-01-01", + "2025-01-01", + "2026-01-01", + ], + "Independence Day": [ + "2020-07-04", + "2021-07-04", + "2022-07-04", + "2023-07-04", + "2024-07-04", + "2025-07-04", + ], + "Thanksgiving": [ + "2020-11-26", + "2021-11-25", + "2022-11-24", + "2023-11-23", + "2024-11-28", + "2025-11-27", + ], + "Christmas": [ + "2020-12-25", + "2021-12-25", + "2022-12-25", + "2023-12-25", + "2024-12-25", + "2025-12-25", + ], + "NYC Marathon": [ + "2021-11-07", + "2022-11-06", + "2023-11-05", + "2024-11-03", + "2025-11-02", + ], + "Congestion Pricing Launch": ["2025-01-05"], +} + + +def get_holiday_df() -> pd.DataFrame: + """Return a dataframe of holiday dates and names.""" + rows = [] + for name, dates in HOLIDAYS.items(): + for d in dates: + rows.append({"date": pd.to_datetime(d), "holiday": name}) + return pd.DataFrame(rows) + + +def get_latest_recovery(df: pd.DataFrame, days: int = 30) -> dict: + """Get the average recovery rate for each transit mode over the last N days.""" + recent = df.sort_values("date").tail(days) + result = {} + for mode, cols in TRANSIT_MODES.items(): + col = cols["recovery"] + if col in recent.columns: + val = recent[col].mean() + result[mode] = val + return result + + +def get_weekday_weekend_comparison(df: pd.DataFrame, year: int = None) -> pd.DataFrame: + """Compare weekday vs weekend recovery rates by transit mode.""" + data = df.copy() + if year: + data = data[data["year"] == year] + + rows = [] + for mode, cols in TRANSIT_MODES.items(): + col = cols["recovery"] + if col not in data.columns: + continue + weekday_avg = data[~data["is_weekend"]][col].mean() + weekend_avg = data[data["is_weekend"]][col].mean() + rows.append( + { + "Transit Mode": mode, + "Weekday Avg Recovery": weekday_avg, + "Weekend Avg Recovery": weekend_avg, + "Gap (Weekend - Weekday)": weekend_avg - weekday_avg, + } + ) + return pd.DataFrame(rows) + + def plot_ridership_recovery(df: pd.DataFrame) -> plt.Figure: """Plot MTA ridership recovery by transit mode as % of pre-pandemic levels.""" required_cols = [ @@ -23,58 +185,43 @@ def plot_ridership_recovery(df: pd.DataFrame) -> plt.Figure: "lirr_pct_of_comparable_pre_pandemic_day", "metro_north_pct_of_comparable_pre_pandemic_day", ] - missing = [c for c in required_cols if c not in df.columns] if missing: raise KeyError(f"Missing required columns: {missing}") - plot_df = df.copy() - plot_df["subways_pct_of_comparable_pre_pandemic_day"] = ( - plot_df["subways_pct_of_comparable_pre_pandemic_day"] * 100 - ) - plot_df["buses_pct_of_comparable_pre_pandemic_day"] = ( - plot_df["buses_pct_of_comparable_pre_pandemic_day"] * 100 - ) - plot_df["lirr_pct_of_comparable_pre_pandemic_day"] = ( - plot_df["lirr_pct_of_comparable_pre_pandemic_day"] * 100 - ) - plot_df["metro_north_pct_of_comparable_pre_pandemic_day"] = ( - plot_df["metro_north_pct_of_comparable_pre_pandemic_day"] * 100 - ) - fig, ax = plt.subplots(figsize=(14, 7)) ax.plot( - plot_df["date"], - plot_df["subways_pct_of_comparable_pre_pandemic_day"], + df["date"], + df["subways_pct_of_comparable_pre_pandemic_day"], label="Subway", alpha=0.8, linewidth=1.2, ) ax.plot( - plot_df["date"], - plot_df["buses_pct_of_comparable_pre_pandemic_day"], + df["date"], + df["buses_pct_of_comparable_pre_pandemic_day"], label="Bus", alpha=0.8, linewidth=1.2, ) ax.plot( - plot_df["date"], - plot_df["lirr_pct_of_comparable_pre_pandemic_day"], + df["date"], + df["lirr_pct_of_comparable_pre_pandemic_day"], label="LIRR", alpha=0.8, linewidth=1.2, ) ax.plot( - plot_df["date"], - plot_df["metro_north_pct_of_comparable_pre_pandemic_day"], + df["date"], + df["metro_north_pct_of_comparable_pre_pandemic_day"], label="Metro-North", alpha=0.8, linewidth=1.2, ) ax.axhline( - y=100, + y=1.0, color="gray", linestyle="--", linewidth=1.5, @@ -90,7 +237,7 @@ def plot_ridership_recovery(df: pd.DataFrame) -> plt.Figure: ) ax.legend(loc="lower right", fontsize=10) ax.grid(True, alpha=0.3) - ax.set_ylim(0, 150) + ax.set_ylim(0, 1.5) fig.tight_layout() - return fig \ No newline at end of file + return fig