-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathplot_metrics.py
More file actions
86 lines (66 loc) · 2.81 KB
/
plot_metrics.py
File metadata and controls
86 lines (66 loc) · 2.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error, mean_squared_error
from tqdm import tqdm
from joblib import Parallel, delayed
ticker = yf.Ticker('^GSPC')
usa_data_full = ticker.history(period='max')
iterations = 30
start_year = 2014
results = {}
# function to compute metrics on rolling iterations
def compute_iteration(start_year, iteration, data):
current_start_year = f"{start_year - iteration + 1}-01-01"
end_year = "2024-01-01"
usa_data = data[(data.index >= current_start_year) & (data.index < end_year)]
window_size = 252
alpha = 1.0
usa_data['Lagged_Close'] = usa_data['Close'].shift(1)
usa_data['Lagged_Volume'] = usa_data['Volume'].shift(1)
usa_data = usa_data.dropna()
X = usa_data[['Lagged_Close', 'Lagged_Volume']].values
y = usa_data['Close'].values
predicted_prices = []
for i in range(window_size, len(X) - 1):
X_train = X[i - window_size:i]
y_train = y[i - window_size:i]
model = Ridge(alpha=alpha)
model.fit(X_train, y_train)
residuals = y_train - model.predict(X_train)
mu = residuals.mean()
sigma = residuals.std()
X_test = X[i + 1].reshape(1, -1)
predicted_price = model.predict(X_test)[0] + mu + sigma * np.random.normal()
predicted_prices.append(predicted_price)
actual_prices = y[window_size + 1:]
predicted_prices = np.array(predicted_prices[:len(actual_prices)])
mae = mean_absolute_error(y_true=actual_prices, y_pred=predicted_prices)
mse = mean_squared_error(y_true=actual_prices, y_pred=predicted_prices)
return iteration, mse, mae
# parallelizing computations across multiple iterations
results_list = Parallel(n_jobs=-1)(
delayed(compute_iteration)(start_year, iteration, usa_data_full) for iteration in range(1, iterations + 1)
)
# converting results into a dictionary
results = {iteration: [mse, mae] for iteration, mse, mae in results_list}
# converting results to a DataFrame for easier plotting
results_df = pd.DataFrame.from_dict(results, orient='index', columns=['MSE', 'MAE'])
# plotting MSE and MAE variation with iterations side-by-side
fig, axes = plt.subplots(1, 2, figsize=(16, 6))
axes[0].plot(results_df.index, results_df['MSE'], marker='o', label='MSE', color='blue')
axes[0].set_title('Variation of MSE with Iterations')
axes[0].set_xlabel('Iteration')
axes[0].set_ylabel('Mean Squared Error (MSE)')
axes[0].grid(True)
axes[0].legend()
axes[1].plot(results_df.index, results_df['MAE'], marker='o', label='MAE', color='red')
axes[1].set_title('Variation of MAE with Iterations')
axes[1].set_xlabel('Iteration')
axes[1].set_ylabel('Mean Absolute Error (MAE)')
axes[1].grid(True)
axes[1].legend()
plt.tight_layout()
plt.show()