-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathage_wise_segregation.py
More file actions
202 lines (159 loc) · 7.07 KB
/
age_wise_segregation.py
File metadata and controls
202 lines (159 loc) · 7.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
import pandas as pd
import scipy.io
from scipy.signal import argrelextrema, butter, filtfilt
import peakutils
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import scipy.io
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
def compute_features(ppg_data):
# Function to downsample a signal
def downsample(signal, current_sampling_rate, target_sampling_rate):
downsampling_factor = current_sampling_rate // target_sampling_rate
downsampled_signal = signal[::downsampling_factor]
return downsampled_signal
# Downsample PPG data
downsampled_signal = downsample(ppg_data, 125, 30)
# Filter the downsampled signal
cutoff_frequency = 0.3638
nyquist_frequency = 30 / 2.0
cutoff_fraction = cutoff_frequency / nyquist_frequency
b, a = butter(5, cutoff_fraction, btype='high')
filtered_ppg_data = filtfilt(b, a, downsampled_signal)
# Find peaks of the filtered signal
indices = peakutils.indexes(filtered_ppg_data, thres=0.02/max(filtered_ppg_data), min_dist=30/2.5)
# Identify preceding trough for each peak
comparator = np.less
trough_indices = argrelextrema(filtered_ppg_data, comparator)[0]
preceding_troughs = np.searchsorted(trough_indices, indices, side='right') - 1
preceding_troughs[preceding_troughs < 0] = 0
# Select only those troughs which precede a peak
trough_indices = trough_indices[preceding_troughs]
peak_amplitudes = filtered_ppg_data[indices]
trough_amplitudes = filtered_ppg_data[trough_indices]
# Compute difference between peaks and preceding troughs
differences = peak_amplitudes - trough_amplitudes
# Calculate mean of differences for Baseline Wander (BW)
BW_list = np.mean(differences)
# Calculate differences between peaks and previous troughs
amplitude_differences = []
for i in range(len(indices)):
if indices[i] > 0:
prev_trough_index = trough_indices[i] - 1
difference = filtered_ppg_data[indices[i]] - filtered_ppg_data[prev_trough_index]
amplitude_differences.append(difference)
# Calculate inter-peak intervals
time_ppg_downsampled = np.arange(0, len(downsampled_signal)/30, 1/30)
peak_times = time_ppg_downsampled[indices]
inter_peak_intervals = np.diff(peak_times)
# Compute the features
fn1 = np.std(amplitude_differences) / np.mean(amplitude_differences)
fn2 = np.mean(amplitude_differences)
trough_times = time_ppg_downsampled[trough_indices]
time_diff_troughs = np.diff(trough_times)
fn3 = np.std(time_diff_troughs) / np.mean(time_diff_troughs)
all_maxima_indices = argrelextrema(filtered_ppg_data, np.greater)[0]
all_minima_indices = argrelextrema(filtered_ppg_data, np.less)[0]
ratio_true_to_all_maxima = len(indices) / len(all_maxima_indices)
fn4 = ratio_true_to_all_maxima
return fn1, fn2, fn3, fn4
def flatten_rr_data(rr_data_list):
rr = []
for i in range(len(rr_data_list)):
q = rr_data_list[i][0][0]
q = np.array(q).flatten()
rr.append(q)
return np.array(rr)
def load_and_preprocess_data(start_index, end_index, exclude_rows=None):
rr_data_list = []
ppg_data = []
# Ensure exclude_rows is a list to avoid issues later
if exclude_rows is None:
exclude_rows = []
# Loop through the struct and read RR data and PPG data
for i in range(start_index, end_index+1):
if i in exclude_rows:
continue
# Read RR data
rr_data = scipy.io.loadmat(f'bidmc_data.mat')['data']['ref'][0, i-1]['params'][0, 0]['rr'][0, 0]['v']
rr_data_list.append(rr_data)
# Read PPG data
filename = f'bidmc_csv/bidmc_{str(i).zfill(2)}_Signals.csv'
data = pd.read_csv(filename)
ppg_data.append(data[' PLETH'])
return rr_data_list, ppg_data
def train_model(rr_data_list, ppg_data):
rr = flatten_rr_data(rr_data_list)
features = []
for i in range(len(ppg_data)):
fn1, fn2, fn3, fn4 = compute_features(ppg_data[i])
features.append([fn1, fn2, fn3, fn4])
X = np.array(features)
X_expanded = np.repeat(X, 481, axis=0)
rr_flattened = rr.flatten()
model = LinearRegression()
model.fit(X_expanded, rr_flattened)
return model, X_expanded, rr_flattened
def evaluate_model(model, X, rr_flattened):
predictions = model.predict(X)
predictions = np.maximum(predictions, 0)
error = np.mean(np.abs(predictions - rr_flattened))
print("error: ", error)
plt.plot(rr_flattened, label='Actual')
plt.plot(predictions, label='Predicted')
plt.legend()
plt.show()
def split_data(rr_data_list, ppg_data, split):
rr_train, rr_test, ppg_train, ppg_test = train_test_split(rr_data_list, ppg_data, test_size=split)
return rr_train, rr_test, ppg_train, ppg_test
def train_and_evaluate_for_group(patient_group, excluded_group,split):
# Load and preprocess data for the patient group
rr_data, ppg_data = load_and_preprocess_data(min(patient_group), max(patient_group), exclude_rows=excluded_group)
rr_train, rr_test, ppg_train, ppg_test = split_data(rr_data, ppg_data, split)
model, X_train, rr_train_flattened = train_model(rr_train, ppg_train)
print("Training evaluation for the patient group:")
evaluate_model(model, X_train, rr_train_flattened)
X_test = []
for ppg in ppg_test:
features = compute_features(ppg)
X_test.append(features)
X_test_expanded = np.repeat(X_test, 481, axis=0)
print("Test evaluation for the patient group:")
rr_test_flattened = flatten_rr_data(rr_test).flatten()
evaluate_model(model, X_test_expanded, rr_test_flattened)
return model
age_list = []
for i in range(1, 54):
filename = f'bidmc_csv/bidmc_{str(i).zfill(2)}_Fix.txt'
with open(filename, 'r') as file:
file_contents = file.read()
age_index = file_contents.find("Age:")
if age_index != -1:
age_string = file_contents[age_index + len("Age:"):].strip()
age_end_index = age_string.find("\n")
if age_end_index != -1:
age = age_string[:age_end_index].strip()
age_list.append(age)
ex = []
for i in range(len(age_list)):
if(age_list[i] == "90+"):
age_list[i] = '91'
if(age_list[i] == "NaN"):
age_list[i] = '0'
ex.append(i+1)
age_list[i] = int(age_list[i])
less_tahn_60 = []
for i in range(len(age_list)):
if(age_list[i] <= 60):
less_tahn_60.append(i+1)
greater_than_60 = []
for i in range(len(age_list)):
if(age_list[i] > 60):
greater_than_60.append(i+1)
print("less than 60: ", len(less_tahn_60) -2)
print("greater than 60: ", len(greater_than_60) - 2)
# print(less_tahn_60)
less_than_60_model = train_and_evaluate_for_group(less_tahn_60, ex + greater_than_60 +[15] , 0.1)
greater_than_60_model = train_and_evaluate_for_group(greater_than_60, ex + less_tahn_60 + [13] + [19], 0.2)