|
@ -10,11 +10,13 @@ import utils.report_tool |
|
|
import basic.program |
|
|
import basic.program |
|
|
import basic.constants as B |
|
|
import basic.constants as B |
|
|
import utils.match_const as M |
|
|
import utils.match_const as M |
|
|
|
|
|
|
|
|
# ------------------------------------------------------------ |
|
|
# ------------------------------------------------------------ |
|
|
""" |
|
|
""" |
|
|
|
|
|
|
|
|
""" |
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Matching(): |
|
|
class Matching(): |
|
|
def __init__(self, comp): |
|
|
def __init__(self, comp): |
|
|
self.comp = comp |
|
|
self.comp = comp |
|
@ -64,14 +66,17 @@ class Matching(): |
|
|
|
|
|
|
|
|
def isHitA(self, key): |
|
|
def isHitA(self, key): |
|
|
return ((key in self.linksA) and (self.linksA[key] != B.SVAL_NULL)) |
|
|
return ((key in self.linksA) and (self.linksA[key] != B.SVAL_NULL)) |
|
|
|
|
|
|
|
|
def isHitB(self, key): |
|
|
def isHitB(self, key): |
|
|
return ((key in self.linksB) and (self.linksB[key] != B.SVAL_NULL)) |
|
|
return ((key in self.linksB) and (self.linksB[key] != B.SVAL_NULL)) |
|
|
|
|
|
|
|
|
def setHit(self, keyA, keyB): |
|
|
def setHit(self, keyA, keyB): |
|
|
if (not self.isHitA(keyA)) and (not self.isHitB(keyB)): |
|
|
if (not self.isHitA(keyA)) and (not self.isHitB(keyB)): |
|
|
if (keyA != B.SVAL_NULL): self.linksA[keyA] = keyB |
|
|
if (keyA != B.SVAL_NULL): self.linksA[keyA] = keyB |
|
|
if (keyB != B.SVAL_NULL): self.linksB[keyB] = keyA |
|
|
if (keyB != B.SVAL_NULL): self.linksB[keyB] = keyA |
|
|
return "OK" |
|
|
return "OK" |
|
|
raise Exception("one of the links are set") |
|
|
raise Exception("one of the links are set") |
|
|
|
|
|
|
|
|
def setNohit(self, similarity, keyA, keyB): |
|
|
def setNohit(self, similarity, keyA, keyB): |
|
|
""" The similarity must be unique. Only a mismatch will be set. """ |
|
|
""" The similarity must be unique. Only a mismatch will be set. """ |
|
|
if similarity in self.nomatch: |
|
|
if similarity in self.nomatch: |
|
@ -79,6 +84,7 @@ class Matching(): |
|
|
if (self.isHitA(keyA) or self.isHitB(keyB)): |
|
|
if (self.isHitA(keyA) or self.isHitB(keyB)): |
|
|
return |
|
|
return |
|
|
self.nomatch[similarity] = [keyA, keyB] |
|
|
self.nomatch[similarity] = [keyA, keyB] |
|
|
|
|
|
|
|
|
def getTableDdl(self, path): |
|
|
def getTableDdl(self, path): |
|
|
a = path.split(":") |
|
|
a = path.split(":") |
|
|
ddl = self.comp.conf[B.DATA_NODE_DDL] |
|
|
ddl = self.comp.conf[B.DATA_NODE_DDL] |
|
@ -87,6 +93,7 @@ class Matching(): |
|
|
if (x == B.DATA_NODE_DATA): break |
|
|
if (x == B.DATA_NODE_DATA): break |
|
|
if x in ddl: ddl = ddl[x] |
|
|
if x in ddl: ddl = ddl[x] |
|
|
return ddl |
|
|
return ddl |
|
|
|
|
|
|
|
|
def setDiffHeader(matching): |
|
|
def setDiffHeader(matching): |
|
|
job = basic.program.Job.getInstance() |
|
|
job = basic.program.Job.getInstance() |
|
|
verify = int(job.getDebugLevel("match_tool")) - 1 |
|
|
verify = int(job.getDebugLevel("match_tool")) - 1 |
|
@ -98,8 +105,10 @@ class Matching(): |
|
|
htmltxt += "</head>" |
|
|
htmltxt += "</head>" |
|
|
htmltxt += "<body>" |
|
|
htmltxt += "<body>" |
|
|
htmltxt += "<h1>" + M.MATCH[matching.matchtype]["title"] + "</h1>" |
|
|
htmltxt += "<h1>" + M.MATCH[matching.matchtype]["title"] + "</h1>" |
|
|
htmltxt += "<h4>"+M.MATCH[M.MATCH[matching.matchtype]["A"]]["long"]+": "+matching.matchfiles["A"]+"</h4>" |
|
|
htmltxt += "<h4>" + M.MATCH[M.MATCH[matching.matchtype]["A"]]["long"] + ": " + matching.matchfiles[ |
|
|
htmltxt += "<h4>"+M.MATCH[M.MATCH[matching.matchtype]["B"]]["long"]+": "+matching.matchfiles["B"]+"</h4><br>" |
|
|
"A"] + "</h4>" |
|
|
|
|
|
htmltxt += "<h4>" + M.MATCH[M.MATCH[matching.matchtype]["B"]]["long"] + ": " + matching.matchfiles[ |
|
|
|
|
|
"B"] + "</h4><br>" |
|
|
matching.htmltext = htmltxt |
|
|
matching.htmltext = htmltxt |
|
|
|
|
|
|
|
|
def setDiffFooter(self): |
|
|
def setDiffFooter(self): |
|
@ -110,6 +119,7 @@ class Matching(): |
|
|
htmltext += "</body></html>" |
|
|
htmltext += "</body></html>" |
|
|
self.htmltext = htmltext |
|
|
self.htmltext = htmltext |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def matchFiles(matching): |
|
|
def matchFiles(matching): |
|
|
""" |
|
|
""" |
|
|
|
|
|
|
|
@ -118,6 +128,7 @@ def matchFiles(matching): |
|
|
:return: |
|
|
:return: |
|
|
""" |
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def matchBestfit(matching, path): |
|
|
def matchBestfit(matching, path): |
|
|
""" |
|
|
""" |
|
|
in this strategy the difference-score of all elements of both sides will be calculated. |
|
|
in this strategy the difference-score of all elements of both sides will be calculated. |
|
@ -154,7 +165,7 @@ def matchBestfit(matching, path): |
|
|
if (matching.isHitB(composeKey("b", ib))): |
|
|
if (matching.isHitB(composeKey("b", ib))): |
|
|
ib += 1 |
|
|
ib += 1 |
|
|
continue |
|
|
continue |
|
|
similarity=getSimilarity(matching, path, rA, rB, ix) |
|
|
similarity = getSimilarity(matching, rA, rB, ix, M.MATCH[matching.matchtype]["simorder"]) |
|
|
if (similarity == "MATCH"): |
|
|
if (similarity == "MATCH"): |
|
|
matching.setHit(composeKey("a", ia), composeKey("b", ib)) |
|
|
matching.setHit(composeKey("a", ia), composeKey("b", ib)) |
|
|
continue |
|
|
continue |
|
@ -164,6 +175,7 @@ def matchBestfit(matching, path): |
|
|
ix += 1 |
|
|
ix += 1 |
|
|
ia += 1 |
|
|
ia += 1 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def matchRestfit(matching): |
|
|
def matchRestfit(matching): |
|
|
""" """ |
|
|
""" """ |
|
|
job = basic.program.Job.getInstance() |
|
|
job = basic.program.Job.getInstance() |
|
@ -182,52 +194,81 @@ def matchRestfit(matching): |
|
|
print("neues Matching " + pair[0] + " " + pair[1]) |
|
|
print("neues Matching " + pair[0] + " " + pair[1]) |
|
|
matching.setHit(pair[0], pair[1]) |
|
|
matching.setHit(pair[0], pair[1]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def composeKey(side, i): |
|
|
def composeKey(side, i): |
|
|
return side.lower() + str(i + 1).zfill(4) |
|
|
return side.lower() + str(i + 1).zfill(4) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def extractKeyI(key): |
|
|
def extractKeyI(key): |
|
|
return int(key[1:]) - 1 |
|
|
return int(key[1:]) - 1 |
|
|
|
|
|
|
|
|
def getSimilarity(matching, path, rA, rB, i): |
|
|
|
|
|
""" it calculates the similarity between both rows by: |
|
|
def setMatchkeys(matching, path): |
|
|
concat each criteria with single-similarity 00..99 and i with 999..000 """ |
|
|
|
|
|
job = basic.program.Job.getInstance() |
|
|
job = basic.program.Job.getInstance() |
|
|
verify = int(job.getDebugLevel("match_tool")) - 1 |
|
|
verify = int(job.getDebugLevel("match_tool")) - 1 |
|
|
job.debug(verify, "getSimilarity "+path+" "+str(i)) |
|
|
job.debug(verify, "getSimilarity " + path) |
|
|
if len(matching.matchkeys) > 0: |
|
|
if len(matching.matchkeys) > 0: |
|
|
keys = matching.matchkeys |
|
|
return |
|
|
elif (B.DATA_NODE_DDL in matching.comp.conf): |
|
|
if (B.DATA_NODE_DDL in matching.comp.conf): |
|
|
job.debug(verify, "ddl " + path + " " + str(i)) |
|
|
job.debug(verify, "ddl " + path) |
|
|
a = path.split(":") |
|
|
a = path.split(":") |
|
|
ddl = matching.comp.conf[B.DATA_NODE_DDL] |
|
|
ddl = matching.comp.conf[B.DATA_NODE_DDL] |
|
|
for x in a: |
|
|
for x in a: |
|
|
if (len(x) < 2): continue |
|
|
if (len(x) < 2): |
|
|
if (x == B.DATA_NODE_DATA): break |
|
|
continue |
|
|
if x in ddl: ddl = ddl[x] |
|
|
if (x == B.DATA_NODE_DATA): |
|
|
job.debug(verify, "ddl " + json.dumps(ddl) + " " + str(i)) |
|
|
break |
|
|
|
|
|
if x in ddl: |
|
|
|
|
|
ddl = ddl[x] |
|
|
|
|
|
job.debug(verify, "ddl " + json.dumps(ddl)) |
|
|
keys = {} |
|
|
keys = {} |
|
|
for f in ddl: |
|
|
for f in ddl: |
|
|
job.debug(verify, "ddl " + f + " " + str(i)) |
|
|
job.debug(verify, "ddl " + f) |
|
|
if ("key" in ddl[f]) and (len(ddl[f]["key"]) > 0): |
|
|
if ("key" in ddl[f]) and (len(ddl[f]["key"]) > 0): |
|
|
b = ddl[f]["key"].split(":") |
|
|
b = ddl[f]["key"].split(":") |
|
|
if (len(b)!=2): raise Exception("falsch formatierter Schluessel "+ddl[f]["key"]) |
|
|
if (len(b) != 2): |
|
|
if (not b[1].isnumeric()): raise Exception("falsch formatierter Schluessel "+ddl[f]["key"]) |
|
|
raise Exception("falsch formatierter Schluessel " + ddl[f]["key"]) |
|
|
k = "k"+b[1].zfill(2) |
|
|
if (not b[1].isnumeric()): |
|
|
job.debug(verify, "ddl " + f + " " + str(i)) |
|
|
raise Exception("falsch formatierter Schluessel " + ddl[f]["key"]) |
|
|
|
|
|
k = "k"+b[0]+""+b[1].zfill(2) |
|
|
|
|
|
job.debug(verify, "ddl " + f) |
|
|
keys[k] = {"ktyp": b[0], "field": ddl[f]["feld"], "type": ddl[f]["type"], "rule": ddl[f]["acceptance"]} |
|
|
keys[k] = {"ktyp": b[0], "field": ddl[f]["feld"], "type": ddl[f]["type"], "rule": ddl[f]["acceptance"]} |
|
|
matching.matchkeys = keys |
|
|
matching.matchkeys = keys |
|
|
job.debug(verify, "ddl " + json.dumps(keys) + " " + str(i)) |
|
|
job.debug(verify, "ddl " + json.dumps(keys)) |
|
|
msim="" |
|
|
|
|
|
topsim="" |
|
|
|
|
|
|
|
|
def getSimilarity(matching, rA, rB, i, simorder=M.SIM_DEFAULT): |
|
|
|
|
|
""" it calculates the similarity between both rows by: |
|
|
|
|
|
concat each criteria with single-similarity 00..99 and i with 999..000 """ |
|
|
|
|
|
job = basic.program.Job.getInstance() |
|
|
|
|
|
verify = int(job.getDebugLevel("match_tool")) - 1 |
|
|
|
|
|
job.debug(verify, "getSimilarity ") |
|
|
|
|
|
mBsim = "" |
|
|
|
|
|
mTsim = "" |
|
|
|
|
|
topBsim = "" |
|
|
|
|
|
topTsim = "" |
|
|
for k in sorted(matching.matchkeys): |
|
|
for k in sorted(matching.matchkeys): |
|
|
msim += getStringSimilarity(str(rA[matching.matchkeys[k]["field"]]), str(rB[matching.matchkeys[k]["field"]])) |
|
|
if M.SIM_TECHNICAL in k: |
|
|
topsim += "99" |
|
|
mTsim += getStringSimilarity(str(rA[matching.matchkeys[k]["field"]]), str(rB[matching.matchkeys[k]["field"]])) |
|
|
if msim == topsim: |
|
|
topTsim += "99" |
|
|
|
|
|
if M.SIM_BUSINESS in k: |
|
|
|
|
|
mTsim += getStringSimilarity(str(rA[matching.matchkeys[k]["field"]]), str(rB[matching.matchkeys[k]["field"]])) |
|
|
|
|
|
topTsim += "99" |
|
|
|
|
|
|
|
|
|
|
|
if mBsim == topBsim and mTsim == topTsim: |
|
|
job.debug(verify, "Treffer ") |
|
|
job.debug(verify, "Treffer ") |
|
|
return "MATCH" |
|
|
return "MATCH" |
|
|
else: |
|
|
elif simorder[0:1] == M.SIM_TECHNICAL and mTsim == topTsim: |
|
|
job.debug(verify, "nomatch S"+msim+str(i).zfill(3)) |
|
|
job.debug(verify, "Treffer ") |
|
|
return "S"+msim+str(i).zfill(3) |
|
|
return "MATCH" |
|
|
pass |
|
|
elif simorder[0:1] == M.SIM_BUSINESS and mBsim == topBsim: |
|
|
|
|
|
job.debug(verify, "Treffer ") |
|
|
|
|
|
return "MATCH" |
|
|
|
|
|
elif simorder[0:1] == M.SIM_TECHNICAL: |
|
|
|
|
|
return "S"+mTsim+mBsim+str(i).zfill(3) |
|
|
|
|
|
else: # if simorder[0:1] == M.SIM_BUSINESS: |
|
|
|
|
|
return "S" + mBsim + mTsim + str(i).zfill(3) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def matchTree(matching): |
|
|
def matchTree(matching): |
|
|
""" |
|
|
""" |
|
@ -242,6 +283,7 @@ def matchTree(matching): |
|
|
job.debug(verify, "..>> ende matching " + matching.htmltext) |
|
|
job.debug(verify, "..>> ende matching " + matching.htmltext) |
|
|
return matching.htmltext |
|
|
return matching.htmltext |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def matchElement(matching, A, B, path): |
|
|
def matchElement(matching, A, B, path): |
|
|
""" travers through the datatree """ |
|
|
""" travers through the datatree """ |
|
|
job = basic.program.Job.getInstance() |
|
|
job = basic.program.Job.getInstance() |
|
@ -256,6 +298,7 @@ def matchElement(matching, A, B, path): |
|
|
else: |
|
|
else: |
|
|
return matching |
|
|
return matching |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def getStringSimilarity(strA, strB): |
|
|
def getStringSimilarity(strA, strB): |
|
|
job = basic.program.Job.getInstance() |
|
|
job = basic.program.Job.getInstance() |
|
|
verify = int(job.getDebugLevel("match_tool")) - 1 |
|
|
verify = int(job.getDebugLevel("match_tool")) - 1 |
|
@ -266,6 +309,7 @@ def getStringSimilarity(strA, strB): |
|
|
if (strA.strip().lower() == strB.strip().lower()): return "55" |
|
|
if (strA.strip().lower() == strB.strip().lower()): return "55" |
|
|
return "00" |
|
|
return "00" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def getEvaluation(matching, type, acceptance, sideA, sideB): |
|
|
def getEvaluation(matching, type, acceptance, sideA, sideB): |
|
|
job = basic.program.Job.getInstance() |
|
|
job = basic.program.Job.getInstance() |
|
|
verify = int(job.getDebugLevel("match_tool")) - 1 |
|
|
verify = int(job.getDebugLevel("match_tool")) - 1 |
|
@ -285,6 +329,7 @@ def getEvaluation(matching, type, acceptance, sideA, sideB): |
|
|
classB = "acceptB" |
|
|
classB = "acceptB" |
|
|
return [result, classA, classB] |
|
|
return [result, classA, classB] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def matchDict(matching, A, B, path): |
|
|
def matchDict(matching, A, B, path): |
|
|
""" travers through the datatree """ |
|
|
""" travers through the datatree """ |
|
|
job = basic.program.Job.getInstance() |
|
|
job = basic.program.Job.getInstance() |
|
@ -318,6 +363,7 @@ def matchDict(matching, A, B, path): |
|
|
job.debug(verify, "matchDict 420 ...<<---") |
|
|
job.debug(verify, "matchDict 420 ...<<---") |
|
|
return matching |
|
|
return matching |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def matchArray(matching, A, B, path): |
|
|
def matchArray(matching, A, B, path): |
|
|
""" matches the datarows of the datatree """ |
|
|
""" matches the datarows of the datatree """ |
|
|
job = basic.program.Job.getInstance() |
|
|
job = basic.program.Job.getInstance() |
|
@ -341,6 +387,7 @@ def matchArray(matching, A, B, path): |
|
|
htmltext += compareRows(matching, path) |
|
|
htmltext += compareRows(matching, path) |
|
|
matching.htmltext += htmltext |
|
|
matching.htmltext += htmltext |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def compareRows(matching, path): |
|
|
def compareRows(matching, path): |
|
|
""" traverse through matched rows """ |
|
|
""" traverse through matched rows """ |
|
|
job = basic.program.Job.getInstance() |
|
|
job = basic.program.Job.getInstance() |
|
@ -373,6 +420,7 @@ def compareRows(matching, path): |
|
|
matching.difftext += "</table>" |
|
|
matching.difftext += "</table>" |
|
|
return htmltext |
|
|
return htmltext |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def markRow(matching, header, row, side): |
|
|
def markRow(matching, header, row, side): |
|
|
job = basic.program.Job.getInstance() |
|
|
job = basic.program.Job.getInstance() |
|
|
verify = int(job.getDebugLevel("match_tool")) - 4 |
|
|
verify = int(job.getDebugLevel("match_tool")) - 4 |
|
@ -396,6 +444,7 @@ def markRow(matching, header, row, side): |
|
|
matching.difftext += text |
|
|
matching.difftext += text |
|
|
return text |
|
|
return text |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def compareRow(matching, header, rA, rB): |
|
|
def compareRow(matching, header, rA, rB): |
|
|
""" traverse through matched rows """ |
|
|
""" traverse through matched rows """ |
|
|
job = basic.program.Job.getInstance() |
|
|
job = basic.program.Job.getInstance() |
|
@ -452,7 +501,7 @@ def compareRow(matching, header, rA, rB): |
|
|
matching.difftext += text |
|
|
matching.difftext += text |
|
|
return text |
|
|
return text |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# -------------------------------------------------------------------------- |
|
|
# -------------------------------------------------------------------------- |
|
|
def matchLines(matching): |
|
|
def matchLines(matching): |
|
|
pass |
|
|
pass |
|
|
|
|
|
|
|
|