-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathpull_api.py
More file actions
131 lines (103 loc) · 4.77 KB
/
pull_api.py
File metadata and controls
131 lines (103 loc) · 4.77 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import requests
import pandas as pd
import json
def fetch_data_from_postgrest_api(url, endpoint):
try:
# Make a GET request to the API
response = requests.get(url + endpoint)
# Check if the request was successful (status code 200)
if response.status_code == 200:
# Parse the JSON response
data = response.json()
return data
else:
# Print an error message if the request was not successful
print(f"Error: {response.status_code} - {response.text}")
return None
except Exception as e:
print(f"An error occurred: {str(e)}")
return None
def mental_health_catalogue():
api_url = "https://www.cataloguementalhealth.ac.uk:443/testing/api/v2/"
# Specify the endpoint you want to access (replace with your specific endpoint)
endpoint_to_access = "studies"#"-H 'accept: application/json' \ -H 'Range-Unit: items'"
# Fetch data from the specified endpoint
result = fetch_data_from_postgrest_api(api_url, endpoint_to_access)
result = pd.DataFrame(data = result)
# Process the result (replace this part with your specific data processing logic)
if result is not None:
print("Data fetched successfully:")
data_out = result[["study_id", "title", "aims", "website", "related_themes", "sample_type", "geographic_coverage_nations", "geographic_coverage_regions", "start_date", "sample_size_at_recruitment", "age_at_recruitment", "sex"]]
data_out.columns = ["MH_study_id", "LPS name", "Aims", "Website", "Themes", "Sample type", "Geographic coverage - Nations", "Geographic coverage - Regions", "Start date", "Sample size at recruitment", "Age at recruitment", "sex"]
#print(data_out["MH_study_id"])
llc_studies = ["ALSPAC", "AHMS", "BCS", "BiB", "ELSA", "GSSFHS", "LSYPE", "MCS", "NCDS", "NICOLA", "SABRE", "TEDS", "TwinsUK", "UKHLS"]
data_out = data_out.loc[data_out["MH_study_id"].isin(llc_studies)]
data_out.to_csv("mh_catalogue.csv")
print("Successfully retrieved mental health data")
else:
print("Failed to fetch mental health data")
def load_pids():
'''
Uses json of HDRUK provided persistent ids (dataset level)
Returns
-------
newpids : list
persistent IDs for all available NHS datasets available via API
'''
pid_loc = 'datasets_pids_lookup.json'
with open(pid_loc, "r") as f:
pids = json.load(f)
return pids
def gateway(
):
'''
Returns
-------
metadata : dict
metdata json/dictionary for target dataset
'''
pids = load_pids()
data_out_df = ""
for key, val in pids.items():
# define URL and add persistent ID of target dataset
url = "https://api.www.healthdatagateway.org/api/v1/datasets/"+key
# make request
response = requests.get(url)
# get data as text
data = response.text
# convert to json and return
dataset = json.loads(data)
data_out = {}
if val == "ECDS" or val == "COVIDSGSS" or val == "IELISA" or val == "CHESS" or val == "GDPPR" :
data_out["title"] = [dataset["data"]['datasetfields']['metadataquality']['title']]
else:
data_out["title"] = [dataset["data"]['datasetfields']['datautility']['title']]
data_out["abstract"] = [dataset["data"]['datasetfields']['abstract']]
data_out["geo_coverage"] = [dataset["data"]['datasetfields']['geographicCoverage'][0]]
data_out["start_date"] = [dataset["data"]['datasetfields']['datasetStartDate']]
data_out["age_band"] = [dataset["data"]['datasetfields']['ageBand']]
if val != "HESAE" and val != "MHSDS":
data_out["collection_situation"] = [dataset["data"]['datasetv2']['provenance']['origin']['collectionSituation'][0]]
else:
if val == "HESAE":
data_out["collection_situation"] = "A&E"
else:
data_out["collection_situation"] = ""
if val == "MHSDS":
data_out["purpose"] = ""
data_out["source"] = ""
else:
data_out["purpose"] = [dataset["data"]['datasetv2']['provenance']['origin']['purpose'][0]]
data_out["source"] = [dataset["data"]['datasetv2']['provenance']['origin']['source'][0]]
data_out["pathway"] = [dataset["data"]['datasetv2']['coverage']['pathway']]
if type(data_out_df) == str:
data_out_df = pd.DataFrame(data = data_out)
else:
data_out_row = pd.DataFrame(data = data_out)
data_out_df = pd.concat([data_out_df, data_out_row])
data_out_df.to_csv("gateway.csv")
def main():
gateway()
mental_health_catalogue()
if __name__ == "__main__":
main()