Skip to content

Commit eaa627e

Browse files
Hwallace/jupyter (#35)
* updates to include a notebook interface * Remove debug path * Add sklearn notebook * more notebook tuning * update basic * add pytorch * Delete notebooks/.pdf.png
1 parent b8ab3fd commit eaa627e

14 files changed

Lines changed: 3001 additions & 33 deletions

notebooks/basic_network.ipynb

Lines changed: 1207 additions & 0 deletions
Large diffs are not rendered by default.

notebooks/basic_random_forest.ipynb

Lines changed: 430 additions & 0 deletions
Large diffs are not rendered by default.

notebooks/basic_sklearn.ipynb

Lines changed: 388 additions & 0 deletions
Large diffs are not rendered by default.

notebooks/basic_torch.ipynb

Lines changed: 590 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
from MaCh3PythonUtils.file_handling.chain_handler import ChainHandler
2+
import matplotlib.pyplot as plt
3+
4+
class ChainDiagnostics:
5+
def __init__(self, config_reader: ChainHandler) -> None:
6+
self._chain_handler = config_reader
7+
8+
def _extract_chain_information(self, parameter_name: str | int):
9+
if isinstance(parameter_name, str):
10+
parameter_id = self._chain_handler.ttree_array.columns.get_loc(parameter_name)
11+
if isinstance(parameter_name, int):
12+
parameter_id = parameter_name
13+
parameter_name = self._chain_handler.ttree_array.columns[parameter_id]
14+
15+
return self._chain_handler.ttree_array.iloc[:,parameter_id], parameter_name
16+
17+
def __make_plot(self, fig, axs):
18+
if fig is None:
19+
fig, axs = plt.subplots(1, 1, figsize=(10, 5))
20+
elif axs is None:
21+
axs = fig.add_subplot(1, 1, 1)
22+
23+
return fig, axs
24+
25+
def make_trace_plot(self, parameter_name: str | int, axs=None, fig =None):
26+
fig, axs = self.__make_plot(fig, axs)
27+
28+
chain, parameter_name = self._extract_chain_information(parameter_name)
29+
axs.plot(chain, linewidth=0.5, color='darkorange')
30+
31+
return fig, axs
32+
33+
def make_autocorr_plot(self, parameter_name: str | int, axs=None, fig =None):
34+
fig, axs = self.__make_plot(fig, axs)
35+
36+
if fig is None:
37+
fig, axs = plt.subplots(1, 1, figsize=(10, 5))
38+
elif axs is None:
39+
axs = fig.add_subplot(1, 1, 1)
40+
41+
chain, parameter_name = self._extract_chain_information(parameter_name)
42+
axs.acorr(chain, maxlags=1000, linewidth=0.5, color='darkorange')
43+
44+
return fig, axs
45+
46+
def make_posterior_hist_plot(self, parameter_name: str | int, axs=None, fig =None, is_horizontal=False):
47+
fig, axs = self.__make_plot(fig, axs)
48+
49+
if fig is None:
50+
fig, axs = plt.subplots(1, 1, figsize=(10, 5))
51+
elif axs is None:
52+
axs = fig.add_subplot(1, 1, 1)
53+
54+
orientation = 'vertical'
55+
56+
if is_horizontal:
57+
orientation = 'horizontal'
58+
59+
chain, parameter_name = self._extract_chain_information(parameter_name)
60+
axs.hist(chain, bins=50, density=True, linewidth=0.5, color='darkorange', alpha=0.5, orientation=orientation)
61+
62+
return fig, axs
63+
64+
def __call__(self, parameter_name: str):
65+
fig, axs = plt.subplots(2, 2, figsize=(15, 5))
66+
axs[1][1].remove()
67+
axs[1][0].remove()
68+
69+
fig, axs[0][0] = self.make_trace_plot(parameter_name, axs=axs[0][0], fig=fig)
70+
fig, axs[0][1] = self.make_posterior_hist_plot(parameter_name, axs=axs[0][1], fig=fig, is_horizontal=True)
71+
72+
# To share the same axis etc,
73+
plt.setp(axs[0][1].get_yticklabels(), visible=False)
74+
fig.subplots_adjust(wspace=.0)
75+
76+
77+
# fig, axs[1][0] = self.make_autocorr_plot(parameter_name, axs=axs[1][0], fig=fig)
78+
return fig, axs

src/MaCh3PythonUtils/machine_learning/file_ml_interface.py

Lines changed: 47 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
import matplotlib.pyplot as plt
2121
from matplotlib.backends.backend_pdf import PdfPages
2222

23+
from rich import print
24+
2325
class FileMLInterface(ABC):
2426
white_viridis = LinearSegmentedColormap.from_list('white_viridis', [
2527
(0, '#ffffff'),
@@ -59,7 +61,7 @@ def __init__(self, chain: ChainHandler, prediction_variable: str, fit_name: str)
5961
self._scaler = StandardScaler()
6062
# self._pca_matrix = PCA(n_components=0.95)
6163

62-
self._label_scaler = MinMaxScaler(feature_range=(0, 1))
64+
self._label_scaler = StandardScaler()
6365

6466

6567

@@ -86,19 +88,19 @@ def set_training_test_set(self, test_size: float):
8688
self._training_data, self._test_data, self._training_labels, self._test_labels = train_test_split(features, labels, test_size=test_size)
8789

8890
# Fit scaling pre-processors. These get applied properly when scale_data is called
89-
_= self._scaler.fit_transform(self._training_data)
90-
self._label_scaler.fit_transform(self._training_labels)
91+
self._scaler.fit(self._training_data)
92+
self._label_scaler.fit(self._training_labels)
9193

9294
# self._pca_matrix.fit(scaled_training)
9395

9496
def scale_data(self, input_data):
9597
# Applies transformations to data set
9698
scale_data = self._scaler.transform(input_data)
97-
# scale_data = self._pca_matrix.transform(scale_data)
9899
return scale_data
99100

100101
def scale_labels(self, labels):
101102
return self._label_scaler.transform(labels)
103+
# return labels.values.reshape(-1, 1)
102104

103105
def invert_scaling(self, input_data):
104106
# Inverts transform
@@ -193,7 +195,7 @@ def load_model(self, input_model: str):
193195
:param input_file: Pickled Model
194196
:type input_file: str
195197
"""
196-
print(f"Attempting to load file from {input_file}")
198+
print(f"[spring_green1]Attempting to load file from[/spring_green1][bold red3] {input_file}")
197199
with open(input_model, 'r') as f:
198200
self._model = pickle.load(f)
199201

@@ -216,14 +218,18 @@ def test_model(self):
216218
train_as_numpy = self.scale_labels(self._training_labels).T[0]
217219
self.evaluate_model(train_prediction, train_as_numpy, "train_qq_plot.pdf")
218220

219-
print("=====\n\n")
221+
print("=====")
220222
print("Testing Results!")
221223

222224
test_prediction = self.model_predict(self._test_data)
223225
test_as_numpy = self.scale_labels(self._test_labels).T[0]
224226

225227
self.evaluate_model(test_prediction, test_as_numpy, outfile=f"{self._fit_name}")
226-
print("=====\n\n")
228+
print("=====")
229+
230+
231+
def print_model_summary(self):
232+
print("Model Summary")
227233

228234
def model_predict_single_sample(self, sample):
229235
sample_shaped = sample.reshape(1,-1)
@@ -232,7 +238,7 @@ def model_predict_single_sample(self, sample):
232238
def get_maxlikelihood(self)->OptimizeResult:
233239
init_vals = self.training_data.iloc[[1]].to_numpy()[0]
234240

235-
print("Calculating max LLH")
241+
print("[bold purple]Calculating max LLH")
236242
maximal_likelihood = minimize(self.model_predict_single_sample, init_vals, bounds=zip(self._chain.lower_bounds[:-1], self._chain.upper_bounds[:-1]), method="L-BFGS-B", options={"disp": True})
237243
return maximal_likelihood
238244

@@ -245,9 +251,9 @@ def run_likelihood_scan(self, n_divisions: int = 500):
245251

246252
errors = np.sqrt(np.diag(maximal_likelihood.hess_inv(np.identity(self.chain.ndim-1))))
247253

248-
print("Maximal Pars :")
254+
print("[bold red3]Maximal Pars :")
249255
for i in range(self.chain.ndim-1):
250-
print(f"Param : {self.chain.plot_branches[i]} : {maximal_likelihood.x[i]}±{errors[i]}")
256+
print(f"[bold red3]Param :[/bold red3] [yellow3]{self.chain.plot_branches[i]} : {maximal_likelihood.x[i]}±{errors[i]}")
251257

252258

253259
with PdfPages("llh_scan.pdf") as pdf:
@@ -285,13 +291,14 @@ def evaluate_model(self, predicted_values: Iterable, true_values: Iterable, outf
285291
:type outfile: str, optional
286292
"""
287293

288-
print(predicted_values)
289-
print(f"Mean Absolute Error : {metrics.mean_absolute_error(predicted_values,true_values)}")
290-
294+
print(f"[bold red3]Mean Absolute Error :[/bold red3] [yellow3]{metrics.mean_absolute_error(predicted_values,true_values)}")
291295

296+
outfile_name = outfile.split(".")[0]
297+
outfile = f"{outfile_name}.pdf"
298+
warnings.filterwarnings("ignore", message="Polyfit may be poorly conditioned")
292299
lobf = np.poly1d(np.polyfit(predicted_values, true_values, 1))
293300

294-
print(f"Line of best fit : y={lobf.c[0]}x + {lobf.c[1]}")
301+
print(f"[bold purple]Line of best fit :[/bold purple] [dodger_blue1]y={lobf.c[0]}x + {lobf.c[1]}")
295302

296303
fig = plt.figure()
297304

@@ -322,11 +329,21 @@ def evaluate_model(self, predicted_values: Iterable, true_values: Iterable, outf
322329
ax.set_ylabel("True Log Likelihood")
323330

324331
fig.legend()
332+
325333
if outfile=="": outfile = f"evaluated_model_qq_tf.pdf"
326334

327-
print(f"Saving QQ to {outfile}")
335+
print(f"[bold spring_green1]Saving QQ to[/bold spring_green1][dodger_blue1] {outfile}")
328336

329337
fig.savefig(outfile)
338+
339+
try:
340+
is_notebook = self.is_notebook()
341+
if is_notebook:
342+
plt.show()
343+
except Exception:
344+
...
345+
346+
330347
plt.close()
331348

332349
# Gonna draw a hist
@@ -335,4 +352,18 @@ def evaluate_model(self, predicted_values: Iterable, true_values: Iterable, outf
335352
plt.hist(difs, bins=100, density=True, range=(np.std(difs)*-5, np.std(difs)*5))
336353
plt.xlabel("True - Pred")
337354
plt.savefig(f"diffs_5sigma_range_{outfile}")
338-
plt.close()
355+
356+
plt.close()
357+
358+
@classmethod
359+
def is_notebook(cls) -> bool:
360+
try:
361+
shell = get_ipython().__class__.__name__
362+
if shell == 'ZMQInteractiveShell':
363+
return True # Jupyter notebook or qtconsole
364+
elif shell == 'TerminalInteractiveShell':
365+
return False # Terminal running IPython
366+
else:
367+
return False # Other type (?)
368+
except NameError:
369+
return False # Probably standard Python interpreter

src/MaCh3PythonUtils/machine_learning/ml_factory.py

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,9 @@
1111
from MaCh3PythonUtils.machine_learning.tensorflow.tf_manual_interface import TfManualLayeredInterface
1212
from MaCh3PythonUtils.machine_learning.tensorflow.tf_interface import TfInterface
1313

14-
from MaCh3PythonUtils.file_handling.chain_handler import ChainHandler
14+
from MaCh3PythonUtils.machine_learning.torch.torch_interface import TorchInterface
1515

16+
from MaCh3PythonUtils.file_handling.chain_handler import ChainHandler
1617
import sklearn.ensemble as ske
1718
import tensorflow.keras as tfk
1819

@@ -32,6 +33,9 @@ class MLFactory:
3233
"normalizing_flow": TfNormalizingFlowModel,
3334
"autotune": TfAutotuneInterface
3435
},
36+
"torch": {
37+
"sequential": TorchInterface
38+
}
3539
}
3640

3741
def __init__(self, input_chain: ChainHandler, prediction_variable: str, plot_name: str):
@@ -92,7 +96,7 @@ def __make_scikit_model(self, algorithm: str, **kwargs)->SciKitInterface:
9296
def __make_tensorflow_layered_model(self, interface: TfManualLayeredInterface, layers: dict)->TfManualLayeredInterface:
9397
for layer in layers:
9498
layer_id = list(layer.keys())[0]
95-
interface.add_layer(layer_id, layer[layer_id])
99+
interface.add_layer(layer_id, layer[layer_id].copy())
96100

97101
return interface
98102

@@ -106,14 +110,28 @@ def __make_tensorflow_model(self, algorithm: str, **kwargs)->TfInterface:
106110

107111
# Ugh
108112
if algorithm=="sequential" or algorithm=="residual":
109-
print("HERE")
110113
model = self.__make_tensorflow_layered_model(model, kwargs["Layers"])
111114
model.set_training_settings(kwargs.get("FitSettings"))
112115

113116

114117
model.build_model(**kwargs["BuildSettings"])
115118

116119
return model
120+
121+
def __make_torch_model(self, algorithm: str, **kwargs)->TorchInterface:
122+
model_func = self.__IMPLEMENTED_ALGORITHMS["torch"].get(algorithm.lower(), None)
123+
124+
if model_func is None:
125+
raise Exception(f"Cannot find {algorithm}")
126+
127+
model: TorchInterface = model_func(self._chain, self._prediction_variable, self._plot_name)
128+
129+
for layer in kwargs["Layers"]:
130+
layer_id = list(layer.keys())[0]
131+
model.add_layer(layer_id=layer_id, layer_args=layer[layer_id].copy())
132+
133+
model.build_model(**kwargs["BuildSettings"], **kwargs["FitSettings"])
134+
return model
117135

118136
def make_interface(self, interface_type: str, algorithm: str, **kwargs):
119137
interface_type = interface_type.lower()
@@ -122,5 +140,9 @@ def make_interface(self, interface_type: str, algorithm: str, **kwargs):
122140
return self.__make_scikit_model(algorithm, **kwargs)
123141
case "tensorflow":
124142
return self.__make_tensorflow_model(algorithm, **kwargs)
143+
case "torch":
144+
return self.__make_torch_model(algorithm, **kwargs)
145+
125146
case _:
126-
raise Exception(f"{interface_type} not implemented!")
147+
raise Exception(f"{interface_type} not implemented!")
148+

src/MaCh3PythonUtils/machine_learning/scikit/scikit_interface.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from pandas import DataFrame
22
from MaCh3PythonUtils.machine_learning.file_ml_interface import FileMLInterface
3+
from tqdm import tqdm
34

45
"""
56
TODO:

src/MaCh3PythonUtils/machine_learning/tensorflow/tf_interface.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,4 +103,5 @@ def evaluate_model(self, predicted_values: Iterable, true_values: Iterable, outf
103103

104104
# CODE TO DO TF SPECIFIC PLOTS GOES HERE
105105

106-
return super().evaluate_model(predicted_values, true_values, outfile)
106+
return super().evaluate_model(predicted_values, true_values, outfile)
107+

src/MaCh3PythonUtils/machine_learning/tensorflow/tf_manual_interface.py

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,8 @@ def train_model(self):
2222
scaled_data = self.scale_data(self._training_data)
2323
scaled_labels = self.scale_labels(self._training_labels)
2424

25-
lr_schedule = tfk.callbacks.ReduceLROnPlateau(monitor="val_loss", patience=10, factor=0.5, min_lr=1e-8, verbose=1)
25+
lr_schedule = tfk.callbacks.ReduceLROnPlateau(monitor="val_loss", patience=10, factor=0.1, min_lr=1e-9, verbose=1)
2626
stop_early = tfk.callbacks.EarlyStopping(monitor='val_loss', patience=20)
27-
2827
self._model.fit(scaled_data, scaled_labels, **self._training_settings, callbacks=[lr_schedule, stop_early])
2928

3029

@@ -53,12 +52,6 @@ def add_layer(self, layer_id: str, layer_args: dict):
5352
# Hacky, swaps string value of regularliser for proper one
5453
layer_args["kernel_regularizer"] = tfk.regularizers.L2(layer_args["kernel_regularizer"])
5554

55+
5656
self._layers.append(self.__TF_LAYER_IMPLEMENTATIONS[layer_id.lower()](**layer_args))
5757

58-
59-
60-
61-
62-
63-
64-

0 commit comments

Comments
 (0)