-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdeltasSQL.py
More file actions
122 lines (84 loc) · 2.97 KB
/
deltasSQL.py
File metadata and controls
122 lines (84 loc) · 2.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#!/usr/bin/python
import sys,gzip
tableName =sys.argv[1]
fileName =sys.argv[2]
headerFileName =sys.argv[3]
localStoreId={}
thisAction={}
localStoreVals={}
localStoreOldVals={}
writeEvery=5
rowHeader="\n "
#rowHeader=" "
dataFieldSeparator=','
headerSeparator=','
################################################################################
### READING PART ###############################################################
################################################################################
# Delta file example:
# -"123","bla bla bla OLD","22"
# +"123","bla bla bla NEW","22"
fileIn = gzip.open(fileName, 'r')
for thisLine in fileIn:
thisSign=thisLine[0]
# Get rid of leading [+-] sign
thisLine=thisLine[1:]
# Get rid of first two lines (+++ and ---, resulting from diff)
if thisLine[0] == '-' or thisLine[0]=='+':
continue
# Get rid of non-ID lines
if not thisLine[1].isdigit():
continue
thisLine=thisLine.rstrip('\n')
thisId=thisLine.split(dataFieldSeparator)[0]
# New Id, default action
if not localStoreId.has_key(thisId):
if thisSign == '-':
thisAction[thisId]='D'
if thisSign == '+':
thisAction[thisId]='I'
# If i already have this id, it is an updated
else:
thisAction[thisId]='U'
# If minus sign, i save old values
if thisSign == '-':
localStoreOldVals[thisId]=thisLine
localStoreId[thisId]=thisId
localStoreVals[thisId]=thisLine
fileIn.close()
# Header, write once execute everywhere :-D
headerFile = open(headerFileName, 'r')
headerString=headerFile.readline().rstrip('\n')
headerFile.close()
headerList=headerString.split(headerSeparator)
headerLen=len(headerList)
################################################################################
### WRITING PART ###############################################################
################################################################################
print "-- HEADER= "+headerString
print
processedRows=0
writeStr=""
for thisKey, thisValue in localStoreVals.items():
updStr=""
opType=""
if thisAction[thisKey] == 'D':
print "-- OLD VALUES= "+localStoreOldVals[thisKey]
print "DELETE FROM "+tableName+" WHERE ID="+thisKey+";"
if thisAction[thisKey] == 'I':
print "-- NEW VALUES= "+thisValue[1:-1]
print "INSERT INTO "+tableName+"\n ("+headerString+")\nVALUES\n ("+ thisValue +");"
# Now the funny part...
if thisAction[thisKey] == 'U':
oldValues=list(localStoreOldVals[thisKey].replace('"','').split(dataFieldSeparator))
newValues=list(thisValue.replace('"','').split(dataFieldSeparator))
print "-- OLD VALUES= "+localStoreOldVals[thisKey]
print "-- NEW VALUES= "+thisValue
print "UPDATE "+tableName+" SET"
updStr=""
for fieldIdx in range(0,len(headerList)):
if newValues[fieldIdx] != oldValues[fieldIdx]:
updStr= updStr+ " "+headerList[fieldIdx].lower()+" = \""+ newValues[fieldIdx] +"\","
print updStr[:-1]
print " WHERE ID="+thisKey+";"
print "\n\n"