-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcloud_function.py
More file actions
56 lines (48 loc) · 1.94 KB
/
cloud_function.py
File metadata and controls
56 lines (48 loc) · 1.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# Cloud Function example to process file upload events
import os
from googleapiclient.discovery import build
def process_file_upload(event, context):
"""Triggered by a file upload event in the bucket. Calls the Dataflow ETL
pipeline
Args:
event (dict): Event payload
context (google.cloud.functions.Context): Metadata for the event
"""
# Process only files in the "in/" folder
file_name = event['name']
if "in/" in event['name']:
db_user, db_pass, db_host = os.getenv("DB_USER"), os.getenv("DB_PASS"), os.getenv("DB_HOST")
if not db_user or not db_pass or not db_host:
raise Exception("Cannot execute function. No DB env vars found")
bucket = event['bucket']
project_id = bucket
job_name = bucket + "-upload-etl-" + context.event_id
template_path = "gs://" + bucket + "/dataflow_templates/cars-etl.json"
input_file = "gs://" + bucket + "/" + file_name
payload_params = {
"input": input_file,
"db_user": db_user,
"db_pass": db_pass,
"db_host": db_host,
"gcp_project_id": project_id,
"gcp_bucket_id": bucket
}
environment_params = {"tempLocation": "gs://" + bucket + "/tmp"}
try:
service = build('dataflow', 'v1b3', cache_discovery=False)
request = service.projects().locations().flexTemplates().launch(
projectId = bucket,
location = "us-east1",
body = {
"launchParameter": {
"jobName": job_name,
"parameters": payload_params,
"environment": environment_params,
"containerSpecGcsPath": template_path
}
}
)
response = request.execute()
print(response)
except Exception as e:
print(e)