From 58556f8b2987f46b655d01287408c96af5325e94 Mon Sep 17 00:00:00 2001
From: Eisuke Kobayashi <eisuke7296@gmail.com>
Date: Fri, 27 Mar 2026 14:16:20 -0400
Subject: [PATCH 1/3] add md

---
 Homepage.py       | 243 ++++++++++++++++++++++++++++++++++++++++++++++
 dataloading.md    |   4 +
 research.ipynb    |   2 +-
 test_eia_part3.py | 115 ++++++++++++++++++++++
 4 files changed, 363 insertions(+), 1 deletion(-)
 create mode 100644 Homepage.py
 create mode 100644 dataloading.md
 create mode 100644 test_eia_part3.py

diff --git a/Homepage.py b/Homepage.py
new file mode 100644
index 0000000..61cb4bd
--- /dev/null
+++ b/Homepage.py
@@ -0,0 +1,243 @@
+import matplotlib.pyplot as plt
+import pandas as pd
+import streamlit as st
+from google.cloud import bigquery
+from google.oauth2 import service_account
+
+from tests.eia_part3 import latest_value
+
+st.set_page_config(page_title="Weekly U.S. Petroleum Supply", layout="wide")
+st.title("The Correlation between Weekly U.S. Petroleum Product Supplied and WTI Crude Oil Price")
+st.subheader("Team Members: Irina, Indra")
+st.caption("Source: U.S. Energy Information Administration (EIA)")
+
+# =========================
+# Project Proposal
+# =========================
+with st.expander("Project Proposal", expanded=False):
+    st.subheader("Project Overview")
+    st.write(
+        """
+        This project analyzes weekly U.S. petroleum product supplied data and
+        WTI crude oil spot price data using the EIA API. Our goal is to explore
+        how petroleum supply and crude oil prices evolve over time and whether
+        they exhibit similar patterns during major economic or energy market events.
+        """
+    )
+
+    st.subheader("Datasets")
+    st.markdown(
+        """
+        - **Weekly U.S. Petroleum Product Supplied**
+          https://www.eia.gov/opendata/browser/petroleum/cons/wpsup
+
+        - **Weekly WTI Crude Oil Spot Price (RWTC)**
+          https://www.eia.gov/opendata/browser/petroleum/pri/spt
+        """
+    )
+
+    st.subheader("Research Questions")
+    st.markdown(
+        """
+        1. How has U.S. petroleum product supplied changed since 2012?
+        2. How has WTI crude oil price changed over the same period?
+        3. Do petroleum supply and crude oil prices show similar patterns over time?
+        4. Are there noticeable disruptions during major events such as the COVID-19 period?
+        """
+    )
+
+    st.subheader("Link to the notebook")
+    st.markdown(
+        "[Project Notebook](https://github.com/advanced-computing/giggling-wombat/blob/main/project.ipynb)"
+    )
+
+    st.subheader("Target Visualization")
+    st.markdown(
+        """
+        - Weekly time-series line chart of U.S. petroleum product supplied
+        - Weekly time-series line chart of WTI crude oil price
+        - Visual comparison of trends between the two series
+        """
+    )
+
+    st.subheader("Known Unknowns and Challenges")
+    st.markdown(
+        """
+        - Petroleum product supplied is a proxy for demand rather than a direct measure
+        - Weekly data can be noisy and may obscure long-term trends
+        - Oil prices and supply may react to different economic forces
+        - The project depends on API data retrieval instead of downloadable CSV files
+        """
+    )
+
+st.divider()
+
+PROJECT_ID = "sipa-adv-c-giggling-wombat"
+TABLE_ID = f"{PROJECT_ID}.petroleum_supply.weekly_supply"
+
+
+@st.cache_resource
+def get_bq_client():
+    credentials = service_account.Credentials.from_service_account_info(
+        st.secrets["gcp_service_account"]
+    )
+    return bigquery.Client(
+        credentials=credentials,
+        project=credentials.project_id,
+    )
+
+
+@st.cache_data(ttl=60 * 60)
+def load_supply_data() -> pd.DataFrame:
+    client = get_bq_client()
+    query = f"""
+        SELECT week, total_product_supplied
+        FROM `{TABLE_ID}`
+        ORDER BY week
+    """
+    df = client.query(query).to_dataframe()
+    df["week"] = pd.to_datetime(df["week"])
+    df["total_product_supplied"] = pd.to_numeric(df["total_product_supplied"], errors="coerce")
+    df = df.dropna(subset=["week", "total_product_supplied"])
+    return df
+
+
+@st.cache_data(ttl=60 * 60)
+def load_supply_product_data() -> pd.DataFrame:
+    client = get_bq_client()
+    query = """
+        SELECT week, product_name, product_supplied
+        FROM `sipa-adv-c-giggling-wombat.petroleum_supply.weekly_supply_by_product`
+        ORDER BY week
+    """
+    df = client.query(query).to_dataframe()
+    df["week"] = pd.to_datetime(df["week"])
+    df["product_supplied"] = pd.to_numeric(df["product_supplied"], errors="coerce")
+    df = df.dropna(subset=["week", "product_name", "product_supplied"])
+    return df
+
+
+try:
+    weekly_total = load_supply_data()
+except Exception as e:
+    st.error(f"Failed to load supply data from BigQuery: {e}")
+    st.stop()
+
+if weekly_total.empty:
+    st.error("No supply data found in BigQuery.")
+    st.stop()
+
+# =========================
+# Interactive Filters
+# =========================
+st.sidebar.header("Filters")
+
+min_week = weekly_total["week"].min().date()
+max_week = weekly_total["week"].max().date()
+
+start_week = st.sidebar.date_input(
+    "Start week",
+    value=min_week,
+    min_value=min_week,
+    max_value=max_week,
+    key="supply_start_week",
+)
+
+end_week = st.sidebar.date_input(
+    "End week",
+    value=max_week,
+    min_value=min_week,
+    max_value=max_week,
+    key="supply_end_week",
+)
+
+if start_week > end_week:
+    st.error("Start week must be earlier than or equal to end week.")
+    st.stop()
+
+filtered_total = weekly_total[
+    (weekly_total["week"] >= pd.to_datetime(start_week))
+    & (weekly_total["week"] <= pd.to_datetime(end_week))
+].copy()
+
+if filtered_total.empty:
+    st.warning("No data available for the selected date range.")
+    st.stop()
+
+weekly_by_product = load_supply_product_data()
+
+filtered_product = weekly_by_product[
+    (weekly_by_product["week"] >= pd.to_datetime(start_week))
+    & (weekly_by_product["week"] <= pd.to_datetime(end_week))
+].copy()
+
+product_options = sorted(filtered_product["product_name"].dropna().unique().tolist())
+
+selected_products = st.sidebar.multiselect(
+    "Select product(s)",
+    options=product_options,
+    default=product_options[:3] if len(product_options) >= 3 else product_options,  # noqa: PLR2004
+    key="product_filter",
+)
+
+try:
+    latest_total = latest_value(
+        filtered_total,
+        date_col="week",
+        value_col="total_product_supplied",
+    )
+except Exception:
+    latest_total = None
+
+c1, c2 = st.columns(2)
+c1.metric("Weeks in selected range", f"{filtered_total.shape[0]:,}")
+c2.metric(
+    "Latest total (sum of products)",
+    f"{latest_total:,.0f}" if latest_total is not None else "—",
+)
+
+st.divider()
+st.subheader("Total Product Supplied (Weekly, All Products Summed)")
+
+fig, ax = plt.subplots()
+ax.plot(filtered_total["week"], filtered_total["total_product_supplied"])
+ax.set_xlabel("Week")
+ax.set_ylabel("Total Product Supplied")
+st.pyplot(fig)
+
+with st.expander("Show total supply data table"):
+    st.dataframe(
+        filtered_total.sort_values("week", ascending=False),
+        use_container_width=True,
+    )
+
+st.caption(
+    "Note: 'Product supplied' is often used as a proxy for consumption. "
+    "This visualization is descriptive (not causal)."
+)
+
+st.divider()
+st.subheader("Product-Level Weekly Supply")
+
+if not selected_products:
+    st.warning("Please select at least one product from the sidebar.")
+else:
+    product_plot_df = filtered_product[
+        filtered_product["product_name"].isin(selected_products)
+    ].copy()
+
+    fig2, ax2 = plt.subplots()
+    for product in selected_products:
+        temp = product_plot_df[product_plot_df["product_name"] == product]
+        ax2.plot(temp["week"], temp["product_supplied"], label=product)
+
+    ax2.set_xlabel("Week")
+    ax2.set_ylabel("Product Supplied")
+    ax2.legend()
+    st.pyplot(fig2)
+
+    with st.expander("Show product-level data table"):
+        st.dataframe(
+            product_plot_df.sort_values(["product_name", "week"], ascending=[True, False]),
+            use_container_width=True,
+        )
diff --git a/dataloading.md b/dataloading.md
new file mode 100644
index 0000000..4ba1f35
--- /dev/null
+++ b/dataloading.md
@@ -0,0 +1,4 @@
+# Electricity Price Data
+* For each month from 2017/01 to the latest available month, fetch the zipfile data from the source site via API, extract all csv files inside the zipfile, and concatenate them.
+* Create the table in Big Query from the first month(2017/01) only.
+* Append data to the table from 2017/02 up to the latest available month.
\ No newline at end of file
diff --git a/research.ipynb b/research.ipynb
index ebab691..7d65b2a 100644
--- a/research.ipynb
+++ b/research.ipynb
@@ -629,7 +629,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": ".venv",
+   "display_name": ".venv (3.13.12)",
    "language": "python",
    "name": "python3"
   },
diff --git a/test_eia_part3.py b/test_eia_part3.py
new file mode 100644
index 0000000..768516c
--- /dev/null
+++ b/test_eia_part3.py
@@ -0,0 +1,115 @@
+import pandas as pd
+import pytest
+from eia_part3 import (
+    add_week_ending_friday_column,
+    build_df_from_eia_data,
+    coerce_numeric_and_dropna,
+    filter_since,
+    latest_value,
+    sum_by_week,
+    validate_required_columns,
+)
+
+# Constants used in tests to satisfy Ruff PLR2004 (no "magic numbers" in comparisons)
+EXPECTED_FIRST_VALUE = 100
+EXPECTED_FILTERED_VALUE = 2
+EXPECTED_LATEST_VALUE = 500.0
+EXPECTED_WEEK_COUNT = 2
+EXPECTED_WEEK1_SUM = 17
+EXPECTED_WEEK2_SUM = 3
+EXPECTED_SINGLE_VALUE = 10
+
+
+def test_build_df_from_eia_data_parses_and_drops_bad_rows():
+    # includes: valid row, invalid date, invalid value
+    data = [
+        {"period": "2012-01-06", "value": "100"},
+        {"period": "not-a-date", "value": "200"},
+        {"period": "2012-01-13", "value": "not-a-number"},
+    ]
+    df = build_df_from_eia_data(data)
+
+    # only the first row should survive
+    assert len(df) == 1
+    assert pd.api.types.is_datetime64_any_dtype(df["week"])
+    assert pd.api.types.is_numeric_dtype(df["value"])
+    assert df["week"].iloc[0] == pd.to_datetime("2012-01-06")
+    assert df["value"].iloc[0] == EXPECTED_FIRST_VALUE
+
+
+def test_filter_since_keeps_2012_and_after():
+    data = [
+        {"period": "2011-12-30", "value": "1"},
+        {"period": "2012-01-06", "value": "2"},
+    ]
+    df = build_df_from_eia_data(data)
+    df2 = filter_since(df, date_col="week", start_date="2012-01-01")
+
+    assert len(df2) == 1
+    assert df2["week"].iloc[0] == pd.to_datetime("2012-01-06")
+    assert df2["value"].iloc[0] == EXPECTED_FILTERED_VALUE
+
+
+def test_latest_value_returns_value_of_most_recent_date_even_if_unsorted():
+    # intentionally unsorted
+    data = [
+        {"period": "2012-01-13", "value": "300"},
+        {"period": "2012-01-06", "value": "100"},
+        {"period": "2012-01-20", "value": "500"},
+    ]
+    df = build_df_from_eia_data(data)
+    v = latest_value(df, date_col="week", value_col="value")
+
+    assert v == EXPECTED_LATEST_VALUE
+
+
+def test_latest_value_raises_on_empty_df():
+    with pytest.raises(ValueError):
+        latest_value(pd.DataFrame(), date_col="week", value_col="value")
+
+
+def test_sum_by_week_sums_duplicates():
+    data = [
+        {"period": "2012-01-06", "value": "10"},
+        {"period": "2012-01-06", "value": "7"},
+        {"period": "2012-01-13", "value": "3"},
+    ]
+    df = build_df_from_eia_data(data)
+    out = sum_by_week(df, date_col="week", value_col="value")
+
+    assert list(out.columns) == ["week", "value"]
+    assert len(out) == EXPECTED_WEEK_COUNT
+    assert (
+        out.loc[out["week"] == pd.to_datetime("2012-01-06"), "value"].iloc[0] == EXPECTED_WEEK1_SUM
+    )
+    assert (
+        out.loc[out["week"] == pd.to_datetime("2012-01-13"), "value"].iloc[0] == EXPECTED_WEEK2_SUM
+    )
+
+
+def test_validate_required_columns_passes_when_present():
+    df = pd.DataFrame({"week": [pd.to_datetime("2012-01-06")], "value": [1]})
+    validate_required_columns(df, ["week", "value"])  # should not raise
+
+
+def test_validate_required_columns_raises_when_missing():
+    df = pd.DataFrame({"week": [pd.to_datetime("2012-01-06")]})
+    with pytest.raises(ValueError):
+        validate_required_columns(df, ["week", "value"])
+
+
+def test_add_week_ending_friday_column_creates_expected_week_ending():
+    # Pick a date that is not Friday to make the test obvious
+    df = pd.DataFrame({"week": pd.to_datetime(["2012-01-03"])})  # Tuesday
+    out = add_week_ending_friday_column(df, date_col="week", new_col="week_ending")
+
+    # Week ending Friday should be 2012-01-06 at midnight
+    assert out["week_ending"].iloc[0] == pd.Timestamp("2012-01-06")
+
+
+def test_coerce_numeric_and_dropna_drops_invalid_values():
+    df = pd.DataFrame({"value": ["10", "not-a-number", None]})
+    out = coerce_numeric_and_dropna(df, value_col="value")
+
+    assert len(out) == 1
+    assert out["value"].iloc[0] == EXPECTED_SINGLE_VALUE

From f0685d568d0f248c155b66e4d64a3bc5a6d912e6 Mon Sep 17 00:00:00 2001
From: Eisuke Kobayashi <eisuke7296@gmail.com>
Date: Fri, 27 Mar 2026 22:38:47 -0400
Subject: [PATCH 2/3] change the design of our website

---
 market_analysis.py |  56 +++++++++-------------
 streamlit_app.py   |   7 +++
 test_eia_part3.py  | 115 ---------------------------------------------
 3 files changed, 29 insertions(+), 149 deletions(-)
 delete mode 100644 test_eia_part3.py

diff --git a/market_analysis.py b/market_analysis.py
index f108ddb..e553a81 100644
--- a/market_analysis.py
+++ b/market_analysis.py
@@ -28,6 +28,7 @@
 # ------ API loaders -------
 @st.cache_data(ttl=3600)
 def load_nyiso_realtime(selected_month) -> any:
+
     start_date = datetime.datetime.strptime(selected_month, "%Y-%m-%d")
 
     if start_date.month == 12:
@@ -42,7 +43,6 @@ def load_nyiso_realtime(selected_month) -> any:
     AND Time_Stamp < '{end_date.strftime("%Y-%m-%d")}'
     """
     df = pandas_gbq.read_gbq(sql, credentials=credentials)
-
     return df
 
 
@@ -158,7 +158,7 @@ def render_intro() -> None:
     st.divider()
 
 
-def render_electricity_section(realtime_df: pd.DataFrame) -> None:
+def render_electricity_section() -> None:
     st.header("Electricity Market Overview")
 
     st.write(
@@ -168,9 +168,27 @@ def render_electricity_section(realtime_df: pd.DataFrame) -> None:
         """
     )
 
+    # input month
+    year = st.selectbox("Year", range(2017, 2027), index=9)
+    month = st.selectbox("Month", range(1, 13))
+    selected_month = datetime.date(year, month, 1)
+    selected_month_str = selected_month.strftime("%Y-%m-%d")
+
+    if selected_month > datetime.date.today():
+        st.error("No data available.")
+        st.stop()
+
+    try:
+        realtime_df = load_nyiso_realtime(selected_month_str)
+    except Exception as exc:
+        st.error(
+            f"Failed to load NYISO electricity data from online public source: {exc}"
+        )
+        return
+
+    # input zones
     zones = sorted(realtime_df["Name"].dropna().unique().tolist())
     default_zone = "N.Y.C." if "N.Y.C." in zones else zones[0]
-
     zone = st.selectbox("Select a NYISO zone", zones, index=zones.index(default_zone))
     zone_df = realtime_df.loc[realtime_df["Name"] == zone].copy()
     zone_df = zone_df.sort_values("Time_Stamp")
@@ -294,38 +312,8 @@ def render_comparison_section(gas_available: bool) -> None:
 # ------ Main ------
 def main() -> None:
     render_sidebar()
-
-    st.sidebar.subheader("Electricity Data Controls")
-    nyiso_month = st.sidebar.text_input(
-        "NYISO month (YYYYMM)",
-        value="202602",
-        help="Example: 202602 for February 2026",
-    )
-    try:
-        nyiso_month_datetime = datetime.datetime.strptime(nyiso_month, "%Y%m")
-
-        if nyiso_month_datetime < datetime.datetime(2017, 1, 1):
-            st.error("No data available. Please fill months after 2017")
-            st.stop()
-
-        selected_month = nyiso_month_datetime.strftime("%Y-%m-%d")
-
-    except ValueError:
-        st.error("Invalid form. Please write in YYYYMM")
-        st.stop()
-
     render_intro()
-
-    try:
-        realtime_df = load_nyiso_realtime(selected_month)
-    except Exception as exc:
-        st.error(
-            f"Failed to load NYISO electricity data from online public source: {exc}"
-        )
-        return
-
-    # Electricity always renders if available
-    render_electricity_section(realtime_df)
+    render_electricity_section()
 
     gas_available = False
     try:
diff --git a/streamlit_app.py b/streamlit_app.py
index 5084720..2a6367a 100644
--- a/streamlit_app.py
+++ b/streamlit_app.py
@@ -1,7 +1,14 @@
 import streamlit as st
 
+import time
+
+start_time = time.time()
+
 page_proposal = st.Page("proposal.py", title="Our Proposal")
 page_market = st.Page("market_analysis.py", title="Energy Market Dashboard")
 
 pg = st.navigation([page_proposal, page_market])
 pg.run()
+
+elapsed = time.time() - start_time
+st.caption(f"Page loaded in {elapsed:.2f} seconds")
diff --git a/test_eia_part3.py b/test_eia_part3.py
deleted file mode 100644
index 768516c..0000000
--- a/test_eia_part3.py
+++ /dev/null
@@ -1,115 +0,0 @@
-import pandas as pd
-import pytest
-from eia_part3 import (
-    add_week_ending_friday_column,
-    build_df_from_eia_data,
-    coerce_numeric_and_dropna,
-    filter_since,
-    latest_value,
-    sum_by_week,
-    validate_required_columns,
-)
-
-# Constants used in tests to satisfy Ruff PLR2004 (no "magic numbers" in comparisons)
-EXPECTED_FIRST_VALUE = 100
-EXPECTED_FILTERED_VALUE = 2
-EXPECTED_LATEST_VALUE = 500.0
-EXPECTED_WEEK_COUNT = 2
-EXPECTED_WEEK1_SUM = 17
-EXPECTED_WEEK2_SUM = 3
-EXPECTED_SINGLE_VALUE = 10
-
-
-def test_build_df_from_eia_data_parses_and_drops_bad_rows():
-    # includes: valid row, invalid date, invalid value
-    data = [
-        {"period": "2012-01-06", "value": "100"},
-        {"period": "not-a-date", "value": "200"},
-        {"period": "2012-01-13", "value": "not-a-number"},
-    ]
-    df = build_df_from_eia_data(data)
-
-    # only the first row should survive
-    assert len(df) == 1
-    assert pd.api.types.is_datetime64_any_dtype(df["week"])
-    assert pd.api.types.is_numeric_dtype(df["value"])
-    assert df["week"].iloc[0] == pd.to_datetime("2012-01-06")
-    assert df["value"].iloc[0] == EXPECTED_FIRST_VALUE
-
-
-def test_filter_since_keeps_2012_and_after():
-    data = [
-        {"period": "2011-12-30", "value": "1"},
-        {"period": "2012-01-06", "value": "2"},
-    ]
-    df = build_df_from_eia_data(data)
-    df2 = filter_since(df, date_col="week", start_date="2012-01-01")
-
-    assert len(df2) == 1
-    assert df2["week"].iloc[0] == pd.to_datetime("2012-01-06")
-    assert df2["value"].iloc[0] == EXPECTED_FILTERED_VALUE
-
-
-def test_latest_value_returns_value_of_most_recent_date_even_if_unsorted():
-    # intentionally unsorted
-    data = [
-        {"period": "2012-01-13", "value": "300"},
-        {"period": "2012-01-06", "value": "100"},
-        {"period": "2012-01-20", "value": "500"},
-    ]
-    df = build_df_from_eia_data(data)
-    v = latest_value(df, date_col="week", value_col="value")
-
-    assert v == EXPECTED_LATEST_VALUE
-
-
-def test_latest_value_raises_on_empty_df():
-    with pytest.raises(ValueError):
-        latest_value(pd.DataFrame(), date_col="week", value_col="value")
-
-
-def test_sum_by_week_sums_duplicates():
-    data = [
-        {"period": "2012-01-06", "value": "10"},
-        {"period": "2012-01-06", "value": "7"},
-        {"period": "2012-01-13", "value": "3"},
-    ]
-    df = build_df_from_eia_data(data)
-    out = sum_by_week(df, date_col="week", value_col="value")
-
-    assert list(out.columns) == ["week", "value"]
-    assert len(out) == EXPECTED_WEEK_COUNT
-    assert (
-        out.loc[out["week"] == pd.to_datetime("2012-01-06"), "value"].iloc[0] == EXPECTED_WEEK1_SUM
-    )
-    assert (
-        out.loc[out["week"] == pd.to_datetime("2012-01-13"), "value"].iloc[0] == EXPECTED_WEEK2_SUM
-    )
-
-
-def test_validate_required_columns_passes_when_present():
-    df = pd.DataFrame({"week": [pd.to_datetime("2012-01-06")], "value": [1]})
-    validate_required_columns(df, ["week", "value"])  # should not raise
-
-
-def test_validate_required_columns_raises_when_missing():
-    df = pd.DataFrame({"week": [pd.to_datetime("2012-01-06")]})
-    with pytest.raises(ValueError):
-        validate_required_columns(df, ["week", "value"])
-
-
-def test_add_week_ending_friday_column_creates_expected_week_ending():
-    # Pick a date that is not Friday to make the test obvious
-    df = pd.DataFrame({"week": pd.to_datetime(["2012-01-03"])})  # Tuesday
-    out = add_week_ending_friday_column(df, date_col="week", new_col="week_ending")
-
-    # Week ending Friday should be 2012-01-06 at midnight
-    assert out["week_ending"].iloc[0] == pd.Timestamp("2012-01-06")
-
-
-def test_coerce_numeric_and_dropna_drops_invalid_values():
-    df = pd.DataFrame({"value": ["10", "not-a-number", None]})
-    out = coerce_numeric_and_dropna(df, value_col="value")
-
-    assert len(out) == 1
-    assert out["value"].iloc[0] == EXPECTED_SINGLE_VALUE

From d3501cad8a300ef2e9aa7e928aea1fa158a583bb Mon Sep 17 00:00:00 2001
From: Eisuke Kobayashi <eisuke7296@gmail.com>
Date: Fri, 27 Mar 2026 22:41:16 -0400
Subject: [PATCH 3/3] delete unused file

---
 Homepage.py | 243 ----------------------------------------------------
 1 file changed, 243 deletions(-)
 delete mode 100644 Homepage.py

diff --git a/Homepage.py b/Homepage.py
deleted file mode 100644
index 61cb4bd..0000000
--- a/Homepage.py
+++ /dev/null
@@ -1,243 +0,0 @@
-import matplotlib.pyplot as plt
-import pandas as pd
-import streamlit as st
-from google.cloud import bigquery
-from google.oauth2 import service_account
-
-from tests.eia_part3 import latest_value
-
-st.set_page_config(page_title="Weekly U.S. Petroleum Supply", layout="wide")
-st.title("The Correlation between Weekly U.S. Petroleum Product Supplied and WTI Crude Oil Price")
-st.subheader("Team Members: Irina, Indra")
-st.caption("Source: U.S. Energy Information Administration (EIA)")
-
-# =========================
-# Project Proposal
-# =========================
-with st.expander("Project Proposal", expanded=False):
-    st.subheader("Project Overview")
-    st.write(
-        """
-        This project analyzes weekly U.S. petroleum product supplied data and
-        WTI crude oil spot price data using the EIA API. Our goal is to explore
-        how petroleum supply and crude oil prices evolve over time and whether
-        they exhibit similar patterns during major economic or energy market events.
-        """
-    )
-
-    st.subheader("Datasets")
-    st.markdown(
-        """
-        - **Weekly U.S. Petroleum Product Supplied**
-          https://www.eia.gov/opendata/browser/petroleum/cons/wpsup
-
-        - **Weekly WTI Crude Oil Spot Price (RWTC)**
-          https://www.eia.gov/opendata/browser/petroleum/pri/spt
-        """
-    )
-
-    st.subheader("Research Questions")
-    st.markdown(
-        """
-        1. How has U.S. petroleum product supplied changed since 2012?
-        2. How has WTI crude oil price changed over the same period?
-        3. Do petroleum supply and crude oil prices show similar patterns over time?
-        4. Are there noticeable disruptions during major events such as the COVID-19 period?
-        """
-    )
-
-    st.subheader("Link to the notebook")
-    st.markdown(
-        "[Project Notebook](https://github.com/advanced-computing/giggling-wombat/blob/main/project.ipynb)"
-    )
-
-    st.subheader("Target Visualization")
-    st.markdown(
-        """
-        - Weekly time-series line chart of U.S. petroleum product supplied
-        - Weekly time-series line chart of WTI crude oil price
-        - Visual comparison of trends between the two series
-        """
-    )
-
-    st.subheader("Known Unknowns and Challenges")
-    st.markdown(
-        """
-        - Petroleum product supplied is a proxy for demand rather than a direct measure
-        - Weekly data can be noisy and may obscure long-term trends
-        - Oil prices and supply may react to different economic forces
-        - The project depends on API data retrieval instead of downloadable CSV files
-        """
-    )
-
-st.divider()
-
-PROJECT_ID = "sipa-adv-c-giggling-wombat"
-TABLE_ID = f"{PROJECT_ID}.petroleum_supply.weekly_supply"
-
-
-@st.cache_resource
-def get_bq_client():
-    credentials = service_account.Credentials.from_service_account_info(
-        st.secrets["gcp_service_account"]
-    )
-    return bigquery.Client(
-        credentials=credentials,
-        project=credentials.project_id,
-    )
-
-
-@st.cache_data(ttl=60 * 60)
-def load_supply_data() -> pd.DataFrame:
-    client = get_bq_client()
-    query = f"""
-        SELECT week, total_product_supplied
-        FROM `{TABLE_ID}`
-        ORDER BY week
-    """
-    df = client.query(query).to_dataframe()
-    df["week"] = pd.to_datetime(df["week"])
-    df["total_product_supplied"] = pd.to_numeric(df["total_product_supplied"], errors="coerce")
-    df = df.dropna(subset=["week", "total_product_supplied"])
-    return df
-
-
-@st.cache_data(ttl=60 * 60)
-def load_supply_product_data() -> pd.DataFrame:
-    client = get_bq_client()
-    query = """
-        SELECT week, product_name, product_supplied
-        FROM `sipa-adv-c-giggling-wombat.petroleum_supply.weekly_supply_by_product`
-        ORDER BY week
-    """
-    df = client.query(query).to_dataframe()
-    df["week"] = pd.to_datetime(df["week"])
-    df["product_supplied"] = pd.to_numeric(df["product_supplied"], errors="coerce")
-    df = df.dropna(subset=["week", "product_name", "product_supplied"])
-    return df
-
-
-try:
-    weekly_total = load_supply_data()
-except Exception as e:
-    st.error(f"Failed to load supply data from BigQuery: {e}")
-    st.stop()
-
-if weekly_total.empty:
-    st.error("No supply data found in BigQuery.")
-    st.stop()
-
-# =========================
-# Interactive Filters
-# =========================
-st.sidebar.header("Filters")
-
-min_week = weekly_total["week"].min().date()
-max_week = weekly_total["week"].max().date()
-
-start_week = st.sidebar.date_input(
-    "Start week",
-    value=min_week,
-    min_value=min_week,
-    max_value=max_week,
-    key="supply_start_week",
-)
-
-end_week = st.sidebar.date_input(
-    "End week",
-    value=max_week,
-    min_value=min_week,
-    max_value=max_week,
-    key="supply_end_week",
-)
-
-if start_week > end_week:
-    st.error("Start week must be earlier than or equal to end week.")
-    st.stop()
-
-filtered_total = weekly_total[
-    (weekly_total["week"] >= pd.to_datetime(start_week))
-    & (weekly_total["week"] <= pd.to_datetime(end_week))
-].copy()
-
-if filtered_total.empty:
-    st.warning("No data available for the selected date range.")
-    st.stop()
-
-weekly_by_product = load_supply_product_data()
-
-filtered_product = weekly_by_product[
-    (weekly_by_product["week"] >= pd.to_datetime(start_week))
-    & (weekly_by_product["week"] <= pd.to_datetime(end_week))
-].copy()
-
-product_options = sorted(filtered_product["product_name"].dropna().unique().tolist())
-
-selected_products = st.sidebar.multiselect(
-    "Select product(s)",
-    options=product_options,
-    default=product_options[:3] if len(product_options) >= 3 else product_options,  # noqa: PLR2004
-    key="product_filter",
-)
-
-try:
-    latest_total = latest_value(
-        filtered_total,
-        date_col="week",
-        value_col="total_product_supplied",
-    )
-except Exception:
-    latest_total = None
-
-c1, c2 = st.columns(2)
-c1.metric("Weeks in selected range", f"{filtered_total.shape[0]:,}")
-c2.metric(
-    "Latest total (sum of products)",
-    f"{latest_total:,.0f}" if latest_total is not None else "—",
-)
-
-st.divider()
-st.subheader("Total Product Supplied (Weekly, All Products Summed)")
-
-fig, ax = plt.subplots()
-ax.plot(filtered_total["week"], filtered_total["total_product_supplied"])
-ax.set_xlabel("Week")
-ax.set_ylabel("Total Product Supplied")
-st.pyplot(fig)
-
-with st.expander("Show total supply data table"):
-    st.dataframe(
-        filtered_total.sort_values("week", ascending=False),
-        use_container_width=True,
-    )
-
-st.caption(
-    "Note: 'Product supplied' is often used as a proxy for consumption. "
-    "This visualization is descriptive (not causal)."
-)
-
-st.divider()
-st.subheader("Product-Level Weekly Supply")
-
-if not selected_products:
-    st.warning("Please select at least one product from the sidebar.")
-else:
-    product_plot_df = filtered_product[
-        filtered_product["product_name"].isin(selected_products)
-    ].copy()
-
-    fig2, ax2 = plt.subplots()
-    for product in selected_products:
-        temp = product_plot_df[product_plot_df["product_name"] == product]
-        ax2.plot(temp["week"], temp["product_supplied"], label=product)
-
-    ax2.set_xlabel("Week")
-    ax2.set_ylabel("Product Supplied")
-    ax2.legend()
-    st.pyplot(fig2)
-
-    with st.expander("Show product-level data table"):
-        st.dataframe(
-            product_plot_df.sort_values(["product_name", "week"], ascending=[True, False]),
-            use_container_width=True,
-        )