Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 17 additions & 21 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,20 @@
# Setup
1. python -m venv venv
2. pip install -r requirements.txt

# How to run
- arg1 Execute type
- M Migrate
- arg2 Datasource file
- arg3 Target db
- arg4 csv path

# visualize
- visualize.ipynb

Example
python run.py M ../data-devclub-1.xml ../database/dev_mountain.db ../output/test.csv

# hackathon-season2
## Story
สายการบิน DevMountain ถูก Take Over โดยสายการบิน DevClub ซึ่งจะต้องมีการย้ายพนักงานจาก DevMountain มา DevClub ซึ่งส่งผลกระทบกับทางแผนกไอทีต้องทำการย้ายข้อมูลมาลงในฐานข้อมูลใหม่
Expand Down Expand Up @@ -64,24 +81,3 @@
- ส่งเร็ว
- Creative ตอนทำ Data visualization
- ผลลัพธ์จาก SQLlite ต้องได้เป็น **JSON** format



### ไม่รู้วิธี Fork หรือ สร้าง PR สามารถฝึกได้จากที่นี่
https://github.com/firstcontributions/first-contributions

## How to submit
- Create a PR to hackathon season 2 repository, add your name or team information and your repo link


## Q&A คำถามที่พบบ่อย
- ดูได้จาก issues
https://github.com/devmountaintechfest/hackathon-season2/issues

## ทีม Dev mountain ที่ดูแลการแข่งครั้งนี้
สมาชิก
- [annibuliful](https://github.com/annibuliful)
- [lordbenz](https://github.com/lordbenz)
- [Issawat](https://github.com/Issawat)

Repo: [hackathon](https://github.com/devmountaintechfest/hackathon-season2)
13 changes: 13 additions & 0 deletions csv/devclub.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
EMP_ID,PASSPORT,FIRSTNAME,LASTNAME,GENDER,BIRTHDAY,NATIONALITY,HIRED,DEPT,POSITION,STATUS,REGION
2,CDC87ETW8EQ,Burton,Gallegos,0,22-09-1960,Germany,29-10-2021,Aircraft Maintenance,Pilot,1,APAC
3,JUI65YBK7AF,Jada,Bender,0,28-05-1963,Pakistan,11-02-2001,Pilot,Pilot,1,Canada
5,AZE20CSG4MU,Lillian,Reese,0,03-12-1982,Ukraine,19-05-2002,Flight Planning,Steward,1,Canada
23,NFH65BYM0VB,Armand,Horn,0,24-05-1987,Netherlands,19-06-2007,Aircraft Maintenance,Airhostess,1,Ocenia
29,CMK62UAD3VK,Rowan,Leonard,1,15-07-1974,Germany,27-03-2004,Aircraft Maintenance,Pilot,1,Ocenia
33,EWD45RJW5YK,Carter,Velasquez,0,23-11-1967,Indonesia,27-02-2005,Flight Planning,Pilot,1,APAC
34,BFS82MEY3CX,Selma,Bush,0,26-03-1972,Italy,10-10-2008,Flight Attendance,Airhostess,1,USA
50,MRC33GHJ2KW,Calvin,Roach,1,16-04-1999,Mexico,18-03-2011,Flight Attendance,Steward,1,Europe
66,WKV12UQC6QF,Zachery,Valentine,0,04-06-1971,Philippines,25-08-2011,Flight Attendance,Steward,1,Middle East
80,EUC74ENE9ZK,Ryan,Rush,0,13-06-1998,Italy,31-07-2019,Aircraft Maintenance,Pilot,1,Middle East
95,OUP31WOE2IE,Dara,Wilcox,1,29-06-1996,Singapore,18-05-2011,Flight Attendance,Airhostess,1,Canada
97,SUF73DKV4QE,Dante,Hart,0,21-12-1999,Peru,22-02-2016,Pilot,Pilot,1,Europe
Binary file added database/dev_mountain.db
Binary file not shown.
2 changes: 2 additions & 0 deletions output/test_Germany.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
2,CDC87ETW8EQ,Burton,Gallegos,0,22-09-1960,Germany,29-10-2021,Aircraft Maintenance,Pilot,1,APAC
29,CMK62UAD3VK,Rowan,Leonard,1,15-07-1974,Germany,27-03-2004,Aircraft Maintenance,Pilot,1,Ocenia
1 change: 1 addition & 0 deletions output/test_Indonesia.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
33,EWD45RJW5YK,Carter,Velasquez,0,23-11-1967,Indonesia,27-02-2005,Flight Planning,Pilot,1,APAC
2 changes: 2 additions & 0 deletions output/test_Italy.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
34,BFS82MEY3CX,Selma,Bush,0,26-03-1972,Italy,10-10-2008,Flight Attendance,Airhostess,1,USA
80,EUC74ENE9ZK,Ryan,Rush,0,13-06-1998,Italy,31-07-2019,Aircraft Maintenance,Pilot,1,Middle East
1 change: 1 addition & 0 deletions output/test_Mexico.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
50,MRC33GHJ2KW,Calvin,Roach,1,16-04-1999,Mexico,18-03-2011,Flight Attendance,Steward,1,Europe
1 change: 1 addition & 0 deletions output/test_Netherlands.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
23,NFH65BYM0VB,Armand,Horn,0,24-05-1987,Netherlands,19-06-2007,Aircraft Maintenance,Airhostess,1,Ocenia
1 change: 1 addition & 0 deletions output/test_Pakistan.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3,JUI65YBK7AF,Jada,Bender,0,28-05-1963,Pakistan,11-02-2001,Pilot,Pilot,1,Canada
1 change: 1 addition & 0 deletions output/test_Peru.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
97,SUF73DKV4QE,Dante,Hart,0,21-12-1999,Peru,22-02-2016,Pilot,Pilot,1,Europe
1 change: 1 addition & 0 deletions output/test_Philippines.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
66,WKV12UQC6QF,Zachery,Valentine,0,04-06-1971,Philippines,25-08-2011,Flight Attendance,Steward,1,Middle East
1 change: 1 addition & 0 deletions output/test_Singapore.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
95,OUP31WOE2IE,Dara,Wilcox,1,29-06-1996,Singapore,18-05-2011,Flight Attendance,Airhostess,1,Canada
1 change: 1 addition & 0 deletions output/test_Ukraine.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
5,AZE20CSG4MU,Lillian,Reese,0,03-12-1982,Ukraine,19-05-2002,Flight Planning,Steward,1,Canada
37 changes: 37 additions & 0 deletions src/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import json

class DevMountainData(object):
def __init__(self, data):
self.emp_id=int(data[0].text)
self.passport=data[1].text
self.firstname=data[2].text
self.lastname=data[3].text
self.gender=int(data[4].text)
self.birthday=data[5].text
self.nationality=data[6].text
self.hired=data[7].text
self.dept=data[8].text
self.position=data[9].text
self.status=int(data[10].text)
self.region=data[11].text

class ClubData(object):
def __init__(self, data):
self.emp_id=data.emp_id
self.passport=data.passport
self.firstname=data.firstname
self.lastname=data.lastname
self.gender=data.gender
self.birthday=data.birthday
self.nationality=data.nationality
self.hired=data.hired
self.dept=data.dept
self.position=data.position
self.status=data.status
self.region=data.region

def toSet(self):
return (self.emp_id,self.passport,self.firstname,self.lastname,self.gender,self.birthday,self.nationality,self.hired,self.dept,self.position,self.status,self.region)

def toJson(self):
return json.dumps(self.__dict__)
11 changes: 11 additions & 0 deletions src/requirments.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
antiorm==1.2.1
db==0.1.1
db-sqlite3==0.0.1
lxml==4.9.1
numpy==1.23.3
python-dateutil==2.8.2
pytz==2022.4
six==1.16.0
matplotlib==3.6.1
jupyterlab==3.4.8
notebook==6.4.12
145 changes: 145 additions & 0 deletions src/run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
from lxml import etree as ET
from data import DevMountainData, ClubData
from util import DataUtility, FileUtility, usedtime
import time,sys,os,csv
import sys
from datetime import datetime, date
from dateutil import relativedelta
import matplotlib.pyplot as plt


@usedtime
class Executor(object):
config = {}
total = int()
valid = int()
invalid = int()
results = []

@usedtime
def setup(self, config):
self.config = config

@usedtime
def extract(self):
global clubData
print("Extract..")
today = date.today().strftime("%d-%m-%Y")
xmlData = self.config['datasource']
parser = ET.XMLParser(remove_comments=False)
xml = ET.parse(xmlData, parser=parser)
alldata = xml.xpath("count(/records/record)")
self.total = int(alldata)
print("Raw Data Total:", self.total)
datasource = xml.xpath(
"/records/record[STATUS/text()='1' and (POSITION/text()='Steward' or POSITION/text()='Pilot' or "
"POSITION/text()='Airhostess') and EMPID/text()!=PASSPORT/text()]")

for element in list(datasource):
data = DevMountainData(element)
birthday = data.birthday
d1 = datetime.strptime(today, "%d-%m-%Y")
d2 = datetime.strptime(birthday, "%d-%m-%Y")
datediff = relativedelta.relativedelta(d1, d2)
print(datediff.days)
if datediff.days > 3:
clubData = ClubData(data)
self.results.append(clubData.toSet())

print("Raw Data Valid:", len(self.results))
print("Raw Data InValid:", str(self.total - len(self.results)))

@usedtime
def load(self):
print("Load..")
dataUtility = DataUtility(self.config['dbName'])
dataUtility.dbSetup()
dataUtility.view()
dataUtility.save(self.results)

@usedtime
def generateSummary(self):
print("Generate Summary..")
nation_group = []
for nat in self.results:
if nat[6] not in nation_group:
nation_group.append(nat[6])
for ds in nation_group:
data = [i for i in self.results if i[6] == ds]
FileUtility().write(self.config['clubDataReport'], [list(dat) for dat in data])

if not os.path.exists("../csv"):
os.makedirs("../csv")

with open("../csv/devclub.csv", 'w', newline="") as f:
write = csv.writer(f)
write.writerow(["EMP_ID","PASSPORT","FIRSTNAME","LASTNAME","GENDER","BIRTHDAY","NATIONALITY","HIRED","DEPT","POSITION","STATUS","REGION"])
write.writerows([list(dat) for dat in self.results])

@usedtime
def visualize(self):
print("Generate visualize..")
if not os.path.exists("../visualize"):
os.makedirs("../visualize")
# gender
male = 0
female = 0
for data in self.results:
gender = data[4]
if gender == 0:
male += 1
elif gender == 1:
female += 1
else:
pass
gender_data = [male, female]
gender_data_label = ["Male", "Female"]
print(gender_data)
plt.pie(gender_data, labels=gender_data_label)
plt.legend()
plt.title("summary gender")
plt.savefig('../visualize/gender.png')
# import numpy as np
# import pandas as pd
# import matplotlib.pyplot as plt
# df1 = pd.read_csv("../csv/devclub.csv")
# df2 = df1[['GENDER', 'EMP_ID']]
# df2 = df2.groupby(['GENDER'])['EMP_ID']
# print(df2.plot.pie())

def main():
try:
print("#####start#####")
print(f"{sys.argv}")
if len(sys.argv) > 1:
exeType = sys.argv[1]
print(f'{exeType}')
if exeType == 'M':
datasource = sys.argv[2]
dbName = sys.argv[3]
reportName = sys.argv[4]
config = {
"datasource": datasource,
"dbName": dbName,
"clubDataReport": reportName
}
startTime = time.perf_counter()
exe = Executor()
exe.setup(config)
exe.extract()
exe.load()
exe.generateSummary()
exe.visualize()
endTime = time.perf_counter()
totalTime = endTime - startTime
print(f'##Total used time {totalTime:.4f} seconds##')
else:
print("Please enter Execute Type (M) Parameter!")
else:
print("Please enter Parameter!")
except ValueError as ve:
return str(ve)


if __name__ == "__main__":
sys.exit(main())
89 changes: 89 additions & 0 deletions src/util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import sqlite3
from functools import wraps
import time
import csv,json


def usedtime(func):
@wraps(func)
def usedtimeWrapper(*args, **kwargs):
startTime = time.perf_counter()
result = func(*args, **kwargs)
endTime = time.perf_counter()
totalTime = endTime - startTime
print(f'{func.__name__} used time {totalTime:.4f} seconds')
return result

return usedtimeWrapper


class DataUtility(object):
def __init__(self, DB_NAME):
self.dbName = DB_NAME

def getConnection(self):
con = sqlite3.connect(self.dbName)
return con

def dbSetup(self):
con = self.getConnection()
cur = con.cursor()
try:
cur.execute("DROP TABLE employee;")
except:
pass
cur.execute(
"CREATE TABLE employee(EMP_ID INTEGER,PASSPORT VARCHAR(100),FIRSTNAME VARCHAR(100),LASTNAME VARCHAR(100),"
"GENDER INTEGER,BIRTHDAY VARCHAR(100),NATIONALITY VARCHAR(100),HIRED VARCHAR(100),DEPT VARCHAR(100),"
"POSITION VARCHAR(100),STATUS INTEGER,REGION VARCHAR(100));")
con.close()
print("setup completed.")

def view(self):
con = self.getConnection()
cur = con.cursor()
try:
cur.execute("DROP VIEW employee_country_view;")
except:
pass
try:
cur.execute("DROP VIEW employee_department_view;")
except:
pass
try:
cur.execute("DROP VIEW employee_nation_view;")
except:
pass
cur.execute("""CREATE VIEW employee_country_view AS SELECT "EMP_ID","PASSPORT","FIRSTNAME","LASTNAME",
"REGION" FROM employee;""")
cur.execute("""CREATE VIEW employee_department_view AS SELECT "EMP_ID","PASSPORT","FIRSTNAME","LASTNAME",
"DEPT" FROM employee;""")
cur.execute("""CREATE VIEW employee_nation_view AS SELECT "EMP_ID","PASSPORT","FIRSTNAME","LASTNAME",
"NATIONALITY" FROM employee;""")
con.close()
print("setup completed.")

def save(self, data):
print("Save..")
con = self.getConnection()
con.executemany(
"INSERT INTO employee (EMP_ID,PASSPORT,FIRSTNAME,LASTNAME,GENDER,BIRTHDAY,NATIONALITY,HIRED,DEPT,"
"POSITION,STATUS,REGION) VALUES(?,?,?,?,?,?,?,?,?,?,?,?);",
data)
con.commit()
con.close()


class FileUtility(object):
def write(seft, fileName, data):
print("Write..")
nation = data[0][6]
print(nation)
with open("{}_{}.csv".format(fileName.replace(".csv", ""), nation), 'w', newline="") as f:
write = csv.writer(f)
write.writerows(data)

def json(seft, fileName, data):
print("Write json..")
with open("{}.json".format(fileName.replace(".json", "")), "w") as final:
json.dump(data, final)
Loading