From 58556f8b2987f46b655d01287408c96af5325e94 Mon Sep 17 00:00:00 2001 From: Eisuke Kobayashi Date: Fri, 27 Mar 2026 14:16:20 -0400 Subject: [PATCH 1/3] add md --- Homepage.py | 243 ++++++++++++++++++++++++++++++++++++++++++++++ dataloading.md | 4 + research.ipynb | 2 +- test_eia_part3.py | 115 ++++++++++++++++++++++ 4 files changed, 363 insertions(+), 1 deletion(-) create mode 100644 Homepage.py create mode 100644 dataloading.md create mode 100644 test_eia_part3.py diff --git a/Homepage.py b/Homepage.py new file mode 100644 index 0000000..61cb4bd --- /dev/null +++ b/Homepage.py @@ -0,0 +1,243 @@ +import matplotlib.pyplot as plt +import pandas as pd +import streamlit as st +from google.cloud import bigquery +from google.oauth2 import service_account + +from tests.eia_part3 import latest_value + +st.set_page_config(page_title="Weekly U.S. Petroleum Supply", layout="wide") +st.title("The Correlation between Weekly U.S. Petroleum Product Supplied and WTI Crude Oil Price") +st.subheader("Team Members: Irina, Indra") +st.caption("Source: U.S. Energy Information Administration (EIA)") + +# ========================= +# Project Proposal +# ========================= +with st.expander("Project Proposal", expanded=False): + st.subheader("Project Overview") + st.write( + """ + This project analyzes weekly U.S. petroleum product supplied data and + WTI crude oil spot price data using the EIA API. Our goal is to explore + how petroleum supply and crude oil prices evolve over time and whether + they exhibit similar patterns during major economic or energy market events. + """ + ) + + st.subheader("Datasets") + st.markdown( + """ + - **Weekly U.S. Petroleum Product Supplied** + https://www.eia.gov/opendata/browser/petroleum/cons/wpsup + + - **Weekly WTI Crude Oil Spot Price (RWTC)** + https://www.eia.gov/opendata/browser/petroleum/pri/spt + """ + ) + + st.subheader("Research Questions") + st.markdown( + """ + 1. How has U.S. petroleum product supplied changed since 2012? + 2. How has WTI crude oil price changed over the same period? + 3. Do petroleum supply and crude oil prices show similar patterns over time? + 4. Are there noticeable disruptions during major events such as the COVID-19 period? + """ + ) + + st.subheader("Link to the notebook") + st.markdown( + "[Project Notebook](https://github.com/advanced-computing/giggling-wombat/blob/main/project.ipynb)" + ) + + st.subheader("Target Visualization") + st.markdown( + """ + - Weekly time-series line chart of U.S. petroleum product supplied + - Weekly time-series line chart of WTI crude oil price + - Visual comparison of trends between the two series + """ + ) + + st.subheader("Known Unknowns and Challenges") + st.markdown( + """ + - Petroleum product supplied is a proxy for demand rather than a direct measure + - Weekly data can be noisy and may obscure long-term trends + - Oil prices and supply may react to different economic forces + - The project depends on API data retrieval instead of downloadable CSV files + """ + ) + +st.divider() + +PROJECT_ID = "sipa-adv-c-giggling-wombat" +TABLE_ID = f"{PROJECT_ID}.petroleum_supply.weekly_supply" + + +@st.cache_resource +def get_bq_client(): + credentials = service_account.Credentials.from_service_account_info( + st.secrets["gcp_service_account"] + ) + return bigquery.Client( + credentials=credentials, + project=credentials.project_id, + ) + + +@st.cache_data(ttl=60 * 60) +def load_supply_data() -> pd.DataFrame: + client = get_bq_client() + query = f""" + SELECT week, total_product_supplied + FROM `{TABLE_ID}` + ORDER BY week + """ + df = client.query(query).to_dataframe() + df["week"] = pd.to_datetime(df["week"]) + df["total_product_supplied"] = pd.to_numeric(df["total_product_supplied"], errors="coerce") + df = df.dropna(subset=["week", "total_product_supplied"]) + return df + + +@st.cache_data(ttl=60 * 60) +def load_supply_product_data() -> pd.DataFrame: + client = get_bq_client() + query = """ + SELECT week, product_name, product_supplied + FROM `sipa-adv-c-giggling-wombat.petroleum_supply.weekly_supply_by_product` + ORDER BY week + """ + df = client.query(query).to_dataframe() + df["week"] = pd.to_datetime(df["week"]) + df["product_supplied"] = pd.to_numeric(df["product_supplied"], errors="coerce") + df = df.dropna(subset=["week", "product_name", "product_supplied"]) + return df + + +try: + weekly_total = load_supply_data() +except Exception as e: + st.error(f"Failed to load supply data from BigQuery: {e}") + st.stop() + +if weekly_total.empty: + st.error("No supply data found in BigQuery.") + st.stop() + +# ========================= +# Interactive Filters +# ========================= +st.sidebar.header("Filters") + +min_week = weekly_total["week"].min().date() +max_week = weekly_total["week"].max().date() + +start_week = st.sidebar.date_input( + "Start week", + value=min_week, + min_value=min_week, + max_value=max_week, + key="supply_start_week", +) + +end_week = st.sidebar.date_input( + "End week", + value=max_week, + min_value=min_week, + max_value=max_week, + key="supply_end_week", +) + +if start_week > end_week: + st.error("Start week must be earlier than or equal to end week.") + st.stop() + +filtered_total = weekly_total[ + (weekly_total["week"] >= pd.to_datetime(start_week)) + & (weekly_total["week"] <= pd.to_datetime(end_week)) +].copy() + +if filtered_total.empty: + st.warning("No data available for the selected date range.") + st.stop() + +weekly_by_product = load_supply_product_data() + +filtered_product = weekly_by_product[ + (weekly_by_product["week"] >= pd.to_datetime(start_week)) + & (weekly_by_product["week"] <= pd.to_datetime(end_week)) +].copy() + +product_options = sorted(filtered_product["product_name"].dropna().unique().tolist()) + +selected_products = st.sidebar.multiselect( + "Select product(s)", + options=product_options, + default=product_options[:3] if len(product_options) >= 3 else product_options, # noqa: PLR2004 + key="product_filter", +) + +try: + latest_total = latest_value( + filtered_total, + date_col="week", + value_col="total_product_supplied", + ) +except Exception: + latest_total = None + +c1, c2 = st.columns(2) +c1.metric("Weeks in selected range", f"{filtered_total.shape[0]:,}") +c2.metric( + "Latest total (sum of products)", + f"{latest_total:,.0f}" if latest_total is not None else "—", +) + +st.divider() +st.subheader("Total Product Supplied (Weekly, All Products Summed)") + +fig, ax = plt.subplots() +ax.plot(filtered_total["week"], filtered_total["total_product_supplied"]) +ax.set_xlabel("Week") +ax.set_ylabel("Total Product Supplied") +st.pyplot(fig) + +with st.expander("Show total supply data table"): + st.dataframe( + filtered_total.sort_values("week", ascending=False), + use_container_width=True, + ) + +st.caption( + "Note: 'Product supplied' is often used as a proxy for consumption. " + "This visualization is descriptive (not causal)." +) + +st.divider() +st.subheader("Product-Level Weekly Supply") + +if not selected_products: + st.warning("Please select at least one product from the sidebar.") +else: + product_plot_df = filtered_product[ + filtered_product["product_name"].isin(selected_products) + ].copy() + + fig2, ax2 = plt.subplots() + for product in selected_products: + temp = product_plot_df[product_plot_df["product_name"] == product] + ax2.plot(temp["week"], temp["product_supplied"], label=product) + + ax2.set_xlabel("Week") + ax2.set_ylabel("Product Supplied") + ax2.legend() + st.pyplot(fig2) + + with st.expander("Show product-level data table"): + st.dataframe( + product_plot_df.sort_values(["product_name", "week"], ascending=[True, False]), + use_container_width=True, + ) diff --git a/dataloading.md b/dataloading.md new file mode 100644 index 0000000..4ba1f35 --- /dev/null +++ b/dataloading.md @@ -0,0 +1,4 @@ +# Electricity Price Data +* For each month from 2017/01 to the latest available month, fetch the zipfile data from the source site via API, extract all csv files inside the zipfile, and concatenate them. +* Create the table in Big Query from the first month(2017/01) only. +* Append data to the table from 2017/02 up to the latest available month. \ No newline at end of file diff --git a/research.ipynb b/research.ipynb index ebab691..7d65b2a 100644 --- a/research.ipynb +++ b/research.ipynb @@ -629,7 +629,7 @@ ], "metadata": { "kernelspec": { - "display_name": ".venv", + "display_name": ".venv (3.13.12)", "language": "python", "name": "python3" }, diff --git a/test_eia_part3.py b/test_eia_part3.py new file mode 100644 index 0000000..768516c --- /dev/null +++ b/test_eia_part3.py @@ -0,0 +1,115 @@ +import pandas as pd +import pytest +from eia_part3 import ( + add_week_ending_friday_column, + build_df_from_eia_data, + coerce_numeric_and_dropna, + filter_since, + latest_value, + sum_by_week, + validate_required_columns, +) + +# Constants used in tests to satisfy Ruff PLR2004 (no "magic numbers" in comparisons) +EXPECTED_FIRST_VALUE = 100 +EXPECTED_FILTERED_VALUE = 2 +EXPECTED_LATEST_VALUE = 500.0 +EXPECTED_WEEK_COUNT = 2 +EXPECTED_WEEK1_SUM = 17 +EXPECTED_WEEK2_SUM = 3 +EXPECTED_SINGLE_VALUE = 10 + + +def test_build_df_from_eia_data_parses_and_drops_bad_rows(): + # includes: valid row, invalid date, invalid value + data = [ + {"period": "2012-01-06", "value": "100"}, + {"period": "not-a-date", "value": "200"}, + {"period": "2012-01-13", "value": "not-a-number"}, + ] + df = build_df_from_eia_data(data) + + # only the first row should survive + assert len(df) == 1 + assert pd.api.types.is_datetime64_any_dtype(df["week"]) + assert pd.api.types.is_numeric_dtype(df["value"]) + assert df["week"].iloc[0] == pd.to_datetime("2012-01-06") + assert df["value"].iloc[0] == EXPECTED_FIRST_VALUE + + +def test_filter_since_keeps_2012_and_after(): + data = [ + {"period": "2011-12-30", "value": "1"}, + {"period": "2012-01-06", "value": "2"}, + ] + df = build_df_from_eia_data(data) + df2 = filter_since(df, date_col="week", start_date="2012-01-01") + + assert len(df2) == 1 + assert df2["week"].iloc[0] == pd.to_datetime("2012-01-06") + assert df2["value"].iloc[0] == EXPECTED_FILTERED_VALUE + + +def test_latest_value_returns_value_of_most_recent_date_even_if_unsorted(): + # intentionally unsorted + data = [ + {"period": "2012-01-13", "value": "300"}, + {"period": "2012-01-06", "value": "100"}, + {"period": "2012-01-20", "value": "500"}, + ] + df = build_df_from_eia_data(data) + v = latest_value(df, date_col="week", value_col="value") + + assert v == EXPECTED_LATEST_VALUE + + +def test_latest_value_raises_on_empty_df(): + with pytest.raises(ValueError): + latest_value(pd.DataFrame(), date_col="week", value_col="value") + + +def test_sum_by_week_sums_duplicates(): + data = [ + {"period": "2012-01-06", "value": "10"}, + {"period": "2012-01-06", "value": "7"}, + {"period": "2012-01-13", "value": "3"}, + ] + df = build_df_from_eia_data(data) + out = sum_by_week(df, date_col="week", value_col="value") + + assert list(out.columns) == ["week", "value"] + assert len(out) == EXPECTED_WEEK_COUNT + assert ( + out.loc[out["week"] == pd.to_datetime("2012-01-06"), "value"].iloc[0] == EXPECTED_WEEK1_SUM + ) + assert ( + out.loc[out["week"] == pd.to_datetime("2012-01-13"), "value"].iloc[0] == EXPECTED_WEEK2_SUM + ) + + +def test_validate_required_columns_passes_when_present(): + df = pd.DataFrame({"week": [pd.to_datetime("2012-01-06")], "value": [1]}) + validate_required_columns(df, ["week", "value"]) # should not raise + + +def test_validate_required_columns_raises_when_missing(): + df = pd.DataFrame({"week": [pd.to_datetime("2012-01-06")]}) + with pytest.raises(ValueError): + validate_required_columns(df, ["week", "value"]) + + +def test_add_week_ending_friday_column_creates_expected_week_ending(): + # Pick a date that is not Friday to make the test obvious + df = pd.DataFrame({"week": pd.to_datetime(["2012-01-03"])}) # Tuesday + out = add_week_ending_friday_column(df, date_col="week", new_col="week_ending") + + # Week ending Friday should be 2012-01-06 at midnight + assert out["week_ending"].iloc[0] == pd.Timestamp("2012-01-06") + + +def test_coerce_numeric_and_dropna_drops_invalid_values(): + df = pd.DataFrame({"value": ["10", "not-a-number", None]}) + out = coerce_numeric_and_dropna(df, value_col="value") + + assert len(out) == 1 + assert out["value"].iloc[0] == EXPECTED_SINGLE_VALUE From f0685d568d0f248c155b66e4d64a3bc5a6d912e6 Mon Sep 17 00:00:00 2001 From: Eisuke Kobayashi Date: Fri, 27 Mar 2026 22:38:47 -0400 Subject: [PATCH 2/3] change the design of our website --- market_analysis.py | 56 +++++++++------------- streamlit_app.py | 7 +++ test_eia_part3.py | 115 --------------------------------------------- 3 files changed, 29 insertions(+), 149 deletions(-) delete mode 100644 test_eia_part3.py diff --git a/market_analysis.py b/market_analysis.py index f108ddb..e553a81 100644 --- a/market_analysis.py +++ b/market_analysis.py @@ -28,6 +28,7 @@ # ------ API loaders ------- @st.cache_data(ttl=3600) def load_nyiso_realtime(selected_month) -> any: + start_date = datetime.datetime.strptime(selected_month, "%Y-%m-%d") if start_date.month == 12: @@ -42,7 +43,6 @@ def load_nyiso_realtime(selected_month) -> any: AND Time_Stamp < '{end_date.strftime("%Y-%m-%d")}' """ df = pandas_gbq.read_gbq(sql, credentials=credentials) - return df @@ -158,7 +158,7 @@ def render_intro() -> None: st.divider() -def render_electricity_section(realtime_df: pd.DataFrame) -> None: +def render_electricity_section() -> None: st.header("Electricity Market Overview") st.write( @@ -168,9 +168,27 @@ def render_electricity_section(realtime_df: pd.DataFrame) -> None: """ ) + # input month + year = st.selectbox("Year", range(2017, 2027), index=9) + month = st.selectbox("Month", range(1, 13)) + selected_month = datetime.date(year, month, 1) + selected_month_str = selected_month.strftime("%Y-%m-%d") + + if selected_month > datetime.date.today(): + st.error("No data available.") + st.stop() + + try: + realtime_df = load_nyiso_realtime(selected_month_str) + except Exception as exc: + st.error( + f"Failed to load NYISO electricity data from online public source: {exc}" + ) + return + + # input zones zones = sorted(realtime_df["Name"].dropna().unique().tolist()) default_zone = "N.Y.C." if "N.Y.C." in zones else zones[0] - zone = st.selectbox("Select a NYISO zone", zones, index=zones.index(default_zone)) zone_df = realtime_df.loc[realtime_df["Name"] == zone].copy() zone_df = zone_df.sort_values("Time_Stamp") @@ -294,38 +312,8 @@ def render_comparison_section(gas_available: bool) -> None: # ------ Main ------ def main() -> None: render_sidebar() - - st.sidebar.subheader("Electricity Data Controls") - nyiso_month = st.sidebar.text_input( - "NYISO month (YYYYMM)", - value="202602", - help="Example: 202602 for February 2026", - ) - try: - nyiso_month_datetime = datetime.datetime.strptime(nyiso_month, "%Y%m") - - if nyiso_month_datetime < datetime.datetime(2017, 1, 1): - st.error("No data available. Please fill months after 2017") - st.stop() - - selected_month = nyiso_month_datetime.strftime("%Y-%m-%d") - - except ValueError: - st.error("Invalid form. Please write in YYYYMM") - st.stop() - render_intro() - - try: - realtime_df = load_nyiso_realtime(selected_month) - except Exception as exc: - st.error( - f"Failed to load NYISO electricity data from online public source: {exc}" - ) - return - - # Electricity always renders if available - render_electricity_section(realtime_df) + render_electricity_section() gas_available = False try: diff --git a/streamlit_app.py b/streamlit_app.py index 5084720..2a6367a 100644 --- a/streamlit_app.py +++ b/streamlit_app.py @@ -1,7 +1,14 @@ import streamlit as st +import time + +start_time = time.time() + page_proposal = st.Page("proposal.py", title="Our Proposal") page_market = st.Page("market_analysis.py", title="Energy Market Dashboard") pg = st.navigation([page_proposal, page_market]) pg.run() + +elapsed = time.time() - start_time +st.caption(f"Page loaded in {elapsed:.2f} seconds") diff --git a/test_eia_part3.py b/test_eia_part3.py deleted file mode 100644 index 768516c..0000000 --- a/test_eia_part3.py +++ /dev/null @@ -1,115 +0,0 @@ -import pandas as pd -import pytest -from eia_part3 import ( - add_week_ending_friday_column, - build_df_from_eia_data, - coerce_numeric_and_dropna, - filter_since, - latest_value, - sum_by_week, - validate_required_columns, -) - -# Constants used in tests to satisfy Ruff PLR2004 (no "magic numbers" in comparisons) -EXPECTED_FIRST_VALUE = 100 -EXPECTED_FILTERED_VALUE = 2 -EXPECTED_LATEST_VALUE = 500.0 -EXPECTED_WEEK_COUNT = 2 -EXPECTED_WEEK1_SUM = 17 -EXPECTED_WEEK2_SUM = 3 -EXPECTED_SINGLE_VALUE = 10 - - -def test_build_df_from_eia_data_parses_and_drops_bad_rows(): - # includes: valid row, invalid date, invalid value - data = [ - {"period": "2012-01-06", "value": "100"}, - {"period": "not-a-date", "value": "200"}, - {"period": "2012-01-13", "value": "not-a-number"}, - ] - df = build_df_from_eia_data(data) - - # only the first row should survive - assert len(df) == 1 - assert pd.api.types.is_datetime64_any_dtype(df["week"]) - assert pd.api.types.is_numeric_dtype(df["value"]) - assert df["week"].iloc[0] == pd.to_datetime("2012-01-06") - assert df["value"].iloc[0] == EXPECTED_FIRST_VALUE - - -def test_filter_since_keeps_2012_and_after(): - data = [ - {"period": "2011-12-30", "value": "1"}, - {"period": "2012-01-06", "value": "2"}, - ] - df = build_df_from_eia_data(data) - df2 = filter_since(df, date_col="week", start_date="2012-01-01") - - assert len(df2) == 1 - assert df2["week"].iloc[0] == pd.to_datetime("2012-01-06") - assert df2["value"].iloc[0] == EXPECTED_FILTERED_VALUE - - -def test_latest_value_returns_value_of_most_recent_date_even_if_unsorted(): - # intentionally unsorted - data = [ - {"period": "2012-01-13", "value": "300"}, - {"period": "2012-01-06", "value": "100"}, - {"period": "2012-01-20", "value": "500"}, - ] - df = build_df_from_eia_data(data) - v = latest_value(df, date_col="week", value_col="value") - - assert v == EXPECTED_LATEST_VALUE - - -def test_latest_value_raises_on_empty_df(): - with pytest.raises(ValueError): - latest_value(pd.DataFrame(), date_col="week", value_col="value") - - -def test_sum_by_week_sums_duplicates(): - data = [ - {"period": "2012-01-06", "value": "10"}, - {"period": "2012-01-06", "value": "7"}, - {"period": "2012-01-13", "value": "3"}, - ] - df = build_df_from_eia_data(data) - out = sum_by_week(df, date_col="week", value_col="value") - - assert list(out.columns) == ["week", "value"] - assert len(out) == EXPECTED_WEEK_COUNT - assert ( - out.loc[out["week"] == pd.to_datetime("2012-01-06"), "value"].iloc[0] == EXPECTED_WEEK1_SUM - ) - assert ( - out.loc[out["week"] == pd.to_datetime("2012-01-13"), "value"].iloc[0] == EXPECTED_WEEK2_SUM - ) - - -def test_validate_required_columns_passes_when_present(): - df = pd.DataFrame({"week": [pd.to_datetime("2012-01-06")], "value": [1]}) - validate_required_columns(df, ["week", "value"]) # should not raise - - -def test_validate_required_columns_raises_when_missing(): - df = pd.DataFrame({"week": [pd.to_datetime("2012-01-06")]}) - with pytest.raises(ValueError): - validate_required_columns(df, ["week", "value"]) - - -def test_add_week_ending_friday_column_creates_expected_week_ending(): - # Pick a date that is not Friday to make the test obvious - df = pd.DataFrame({"week": pd.to_datetime(["2012-01-03"])}) # Tuesday - out = add_week_ending_friday_column(df, date_col="week", new_col="week_ending") - - # Week ending Friday should be 2012-01-06 at midnight - assert out["week_ending"].iloc[0] == pd.Timestamp("2012-01-06") - - -def test_coerce_numeric_and_dropna_drops_invalid_values(): - df = pd.DataFrame({"value": ["10", "not-a-number", None]}) - out = coerce_numeric_and_dropna(df, value_col="value") - - assert len(out) == 1 - assert out["value"].iloc[0] == EXPECTED_SINGLE_VALUE From d3501cad8a300ef2e9aa7e928aea1fa158a583bb Mon Sep 17 00:00:00 2001 From: Eisuke Kobayashi Date: Fri, 27 Mar 2026 22:41:16 -0400 Subject: [PATCH 3/3] delete unused file --- Homepage.py | 243 ---------------------------------------------------- 1 file changed, 243 deletions(-) delete mode 100644 Homepage.py diff --git a/Homepage.py b/Homepage.py deleted file mode 100644 index 61cb4bd..0000000 --- a/Homepage.py +++ /dev/null @@ -1,243 +0,0 @@ -import matplotlib.pyplot as plt -import pandas as pd -import streamlit as st -from google.cloud import bigquery -from google.oauth2 import service_account - -from tests.eia_part3 import latest_value - -st.set_page_config(page_title="Weekly U.S. Petroleum Supply", layout="wide") -st.title("The Correlation between Weekly U.S. Petroleum Product Supplied and WTI Crude Oil Price") -st.subheader("Team Members: Irina, Indra") -st.caption("Source: U.S. Energy Information Administration (EIA)") - -# ========================= -# Project Proposal -# ========================= -with st.expander("Project Proposal", expanded=False): - st.subheader("Project Overview") - st.write( - """ - This project analyzes weekly U.S. petroleum product supplied data and - WTI crude oil spot price data using the EIA API. Our goal is to explore - how petroleum supply and crude oil prices evolve over time and whether - they exhibit similar patterns during major economic or energy market events. - """ - ) - - st.subheader("Datasets") - st.markdown( - """ - - **Weekly U.S. Petroleum Product Supplied** - https://www.eia.gov/opendata/browser/petroleum/cons/wpsup - - - **Weekly WTI Crude Oil Spot Price (RWTC)** - https://www.eia.gov/opendata/browser/petroleum/pri/spt - """ - ) - - st.subheader("Research Questions") - st.markdown( - """ - 1. How has U.S. petroleum product supplied changed since 2012? - 2. How has WTI crude oil price changed over the same period? - 3. Do petroleum supply and crude oil prices show similar patterns over time? - 4. Are there noticeable disruptions during major events such as the COVID-19 period? - """ - ) - - st.subheader("Link to the notebook") - st.markdown( - "[Project Notebook](https://github.com/advanced-computing/giggling-wombat/blob/main/project.ipynb)" - ) - - st.subheader("Target Visualization") - st.markdown( - """ - - Weekly time-series line chart of U.S. petroleum product supplied - - Weekly time-series line chart of WTI crude oil price - - Visual comparison of trends between the two series - """ - ) - - st.subheader("Known Unknowns and Challenges") - st.markdown( - """ - - Petroleum product supplied is a proxy for demand rather than a direct measure - - Weekly data can be noisy and may obscure long-term trends - - Oil prices and supply may react to different economic forces - - The project depends on API data retrieval instead of downloadable CSV files - """ - ) - -st.divider() - -PROJECT_ID = "sipa-adv-c-giggling-wombat" -TABLE_ID = f"{PROJECT_ID}.petroleum_supply.weekly_supply" - - -@st.cache_resource -def get_bq_client(): - credentials = service_account.Credentials.from_service_account_info( - st.secrets["gcp_service_account"] - ) - return bigquery.Client( - credentials=credentials, - project=credentials.project_id, - ) - - -@st.cache_data(ttl=60 * 60) -def load_supply_data() -> pd.DataFrame: - client = get_bq_client() - query = f""" - SELECT week, total_product_supplied - FROM `{TABLE_ID}` - ORDER BY week - """ - df = client.query(query).to_dataframe() - df["week"] = pd.to_datetime(df["week"]) - df["total_product_supplied"] = pd.to_numeric(df["total_product_supplied"], errors="coerce") - df = df.dropna(subset=["week", "total_product_supplied"]) - return df - - -@st.cache_data(ttl=60 * 60) -def load_supply_product_data() -> pd.DataFrame: - client = get_bq_client() - query = """ - SELECT week, product_name, product_supplied - FROM `sipa-adv-c-giggling-wombat.petroleum_supply.weekly_supply_by_product` - ORDER BY week - """ - df = client.query(query).to_dataframe() - df["week"] = pd.to_datetime(df["week"]) - df["product_supplied"] = pd.to_numeric(df["product_supplied"], errors="coerce") - df = df.dropna(subset=["week", "product_name", "product_supplied"]) - return df - - -try: - weekly_total = load_supply_data() -except Exception as e: - st.error(f"Failed to load supply data from BigQuery: {e}") - st.stop() - -if weekly_total.empty: - st.error("No supply data found in BigQuery.") - st.stop() - -# ========================= -# Interactive Filters -# ========================= -st.sidebar.header("Filters") - -min_week = weekly_total["week"].min().date() -max_week = weekly_total["week"].max().date() - -start_week = st.sidebar.date_input( - "Start week", - value=min_week, - min_value=min_week, - max_value=max_week, - key="supply_start_week", -) - -end_week = st.sidebar.date_input( - "End week", - value=max_week, - min_value=min_week, - max_value=max_week, - key="supply_end_week", -) - -if start_week > end_week: - st.error("Start week must be earlier than or equal to end week.") - st.stop() - -filtered_total = weekly_total[ - (weekly_total["week"] >= pd.to_datetime(start_week)) - & (weekly_total["week"] <= pd.to_datetime(end_week)) -].copy() - -if filtered_total.empty: - st.warning("No data available for the selected date range.") - st.stop() - -weekly_by_product = load_supply_product_data() - -filtered_product = weekly_by_product[ - (weekly_by_product["week"] >= pd.to_datetime(start_week)) - & (weekly_by_product["week"] <= pd.to_datetime(end_week)) -].copy() - -product_options = sorted(filtered_product["product_name"].dropna().unique().tolist()) - -selected_products = st.sidebar.multiselect( - "Select product(s)", - options=product_options, - default=product_options[:3] if len(product_options) >= 3 else product_options, # noqa: PLR2004 - key="product_filter", -) - -try: - latest_total = latest_value( - filtered_total, - date_col="week", - value_col="total_product_supplied", - ) -except Exception: - latest_total = None - -c1, c2 = st.columns(2) -c1.metric("Weeks in selected range", f"{filtered_total.shape[0]:,}") -c2.metric( - "Latest total (sum of products)", - f"{latest_total:,.0f}" if latest_total is not None else "—", -) - -st.divider() -st.subheader("Total Product Supplied (Weekly, All Products Summed)") - -fig, ax = plt.subplots() -ax.plot(filtered_total["week"], filtered_total["total_product_supplied"]) -ax.set_xlabel("Week") -ax.set_ylabel("Total Product Supplied") -st.pyplot(fig) - -with st.expander("Show total supply data table"): - st.dataframe( - filtered_total.sort_values("week", ascending=False), - use_container_width=True, - ) - -st.caption( - "Note: 'Product supplied' is often used as a proxy for consumption. " - "This visualization is descriptive (not causal)." -) - -st.divider() -st.subheader("Product-Level Weekly Supply") - -if not selected_products: - st.warning("Please select at least one product from the sidebar.") -else: - product_plot_df = filtered_product[ - filtered_product["product_name"].isin(selected_products) - ].copy() - - fig2, ax2 = plt.subplots() - for product in selected_products: - temp = product_plot_df[product_plot_df["product_name"] == product] - ax2.plot(temp["week"], temp["product_supplied"], label=product) - - ax2.set_xlabel("Week") - ax2.set_ylabel("Product Supplied") - ax2.legend() - st.pyplot(fig2) - - with st.expander("Show product-level data table"): - st.dataframe( - product_plot_df.sort_values(["product_name", "week"], ascending=[True, False]), - use_container_width=True, - )