Browse Source

similitary-order for business or technical match

master
Ulrich Carmesin 2 years ago
parent
commit
1bf757ced4
  1. 5
      test/test_compare.py
  2. 30
      utils/match_const.py
  3. 207
      utils/match_tool.py

5
test/test_compare.py

@ -111,9 +111,10 @@ class MyTestCase(unittest.TestCase):
def xtest_similarity(self):
matching = self.getMatching()
utils.match_tool.getSimilarity(matching, ":database:scheme:table:_data",
utils.match_tool.setMatchkeys(matching, ":database:scheme:table:_data")
utils.match_tool.getSimilarity(matching,
tdata[M.MATCH_SIDE_PREACTUAL]["data"]["database"]["scheme"]["table"]["_data"][0],
tdata[M.MATCH_SIDE_POSTACTUAL]["data"]["database"]["scheme"]["table"]["_data"][0],1)
tdata[M.MATCH_SIDE_POSTACTUAL]["data"]["database"]["scheme"]["table"]["_data"][0], 1)
def test_bestfit(self):
job = Job("unit")

30
utils/match_const.py

@ -2,6 +2,11 @@
"""
constants for used for api-functions
"""
SIM_BUSINESS = "B"
SIM_TECHNICAL = "T"
SIM_DEFAULT = "BT"
MATCH_SIDE_PREEXPECT = "preexpect"
""" it implies the precondition of the expectation """
MATCH_DICT_PREEXPECT = {
@ -67,10 +72,7 @@ MATCH = {
MATCH_PRECOND: {
"A": MATCH_SIDE_PREEXPECT,
"B": MATCH_SIDE_PREACTUAL,
"shortA": "SV",
"shortB": "IV",
"longA": "Soll-Vorher",
"longB": "Ist-Vorher",
"simorder": SIM_BUSINESS + SIM_TECHNICAL,
"mode": "info",
"filename": "01_Vorbedingungen",
"title": "Pruefung Vorbedingung (Soll-Vorher - Ist-Vorher)"
@ -78,10 +80,7 @@ MATCH = {
MATCH_POSTCOND: {
"A": MATCH_SIDE_POSTEXPECT,
"B": MATCH_SIDE_POSTACTUAL,
"shortA": "SN",
"shortB": "IN",
"longA": "Soll-Nachher",
"longB": "Ist-Nachher",
"simorder": SIM_BUSINESS + SIM_TECHNICAL,
"mode": "hard",
"filename": "00_Fachabgleich",
"title": "Fachliche Auswertung (Soll-Nachher - Ist-Nachher)"
@ -89,10 +88,7 @@ MATCH = {
MATCH_SUCCESS: {
"A": MATCH_SIDE_PREACTUAL,
"B": MATCH_SIDE_POSTACTUAL,
"shortA": "IV",
"shortB": "IN",
"longA": "Ist-Vorher",
"longB": "Ist-Nachher",
"simorder": SIM_TECHNICAL + SIM_BUSINESS,
"mode": "action",
"filename": "04_Ablauf",
"title": "Ablauf-Differenz (Ist-Vorher - Ist-Nachher)"
@ -100,10 +96,7 @@ MATCH = {
MATCH_PRESTEP: {
"A": MATCH_SIDE_PRESTEP,
"B": MATCH_SIDE_POSTACTUAL,
"shortA": "VN",
"shortB": "IN",
"longA": "Vor-Schritt",
"longB": "Ist-Nachher",
"simorder": SIM_TECHNICAL + SIM_BUSINESS,
"mode": "action",
"filename": "02_Vorschritt",
"title": "Schritt-Differenz (Vorschritt-Nachher - Ist-Nachher)"
@ -111,10 +104,7 @@ MATCH = {
MATCH_TESTEXAMPLE: {
"A": MATCH_SIDE_TESTCASE,
"B": MATCH_SIDE_POSTACTUAL,
"shortA": "TN",
"shortB": "IN",
"longA": "Vergleich-Soll",
"longB": "Ist-Nachher",
"simorder": SIM_BUSINESS + SIM_TECHNICAL,
"mode": "action",
"filename": "03_Vergleichstestfall",
"title": "Vergleichstestfall (Vergleich-Soll - Ist-Nachher)"

207
utils/match_tool.py

@ -10,11 +10,13 @@ import utils.report_tool
import basic.program
import basic.constants as B
import utils.match_const as M
# ------------------------------------------------------------
"""
"""
class Matching():
def __init__(self, comp):
self.comp = comp
@ -64,21 +66,25 @@ class Matching():
def isHitA(self, key):
return ((key in self.linksA) and (self.linksA[key] != B.SVAL_NULL))
def isHitB(self, key):
return ((key in self.linksB) and (self.linksB[key] != B.SVAL_NULL))
def setHit(self, keyA, keyB):
if (not self.isHitA(keyA)) and (not self.isHitB(keyB)):
if (keyA != B.SVAL_NULL): self.linksA[keyA] = keyB
if (keyB != B.SVAL_NULL): self.linksB[keyB] = keyA
return "OK"
raise Exception("one of the links are set")
def setNohit(self, similarity, keyA, keyB):
""" The similarity must be unique. Only a mismatch will be set. """
if similarity in self.nomatch:
raise Exception("similarity "+similarity+" exists")
raise Exception("similarity " + similarity + " exists")
if (self.isHitA(keyA) or self.isHitB(keyB)):
return
self.nomatch[similarity] = [keyA, keyB]
def getTableDdl(self, path):
a = path.split(":")
ddl = self.comp.conf[B.DATA_NODE_DDL]
@ -87,19 +93,22 @@ class Matching():
if (x == B.DATA_NODE_DATA): break
if x in ddl: ddl = ddl[x]
return ddl
def setDiffHeader(matching):
job = basic.program.Job.getInstance()
verify = int(job.getDebugLevel("match_tool"))-1
verify = int(job.getDebugLevel("match_tool")) - 1
job.debug(verify, "getDiffHeader ")
htmltxt = "<!DOCTYPE html>"
htmltxt += "<html><head>"
htmltxt += "<title>"+M.MATCH[matching.matchtype]["title"]+"</title>"
htmltxt += "<title>" + M.MATCH[matching.matchtype]["title"] + "</title>"
htmltxt += utils.css_tool.getInternalStyle("diffFiles")
htmltxt += "</head>"
htmltxt += "<body>"
htmltxt += "<h1>"+M.MATCH[matching.matchtype]["title"]+"</h1>"
htmltxt += "<h4>"+M.MATCH[M.MATCH[matching.matchtype]["A"]]["long"]+": "+matching.matchfiles["A"]+"</h4>"
htmltxt += "<h4>"+M.MATCH[M.MATCH[matching.matchtype]["B"]]["long"]+": "+matching.matchfiles["B"]+"</h4><br>"
htmltxt += "<h1>" + M.MATCH[matching.matchtype]["title"] + "</h1>"
htmltxt += "<h4>" + M.MATCH[M.MATCH[matching.matchtype]["A"]]["long"] + ": " + matching.matchfiles[
"A"] + "</h4>"
htmltxt += "<h4>" + M.MATCH[M.MATCH[matching.matchtype]["B"]]["long"] + ": " + matching.matchfiles[
"B"] + "</h4><br>"
matching.htmltext = htmltxt
def setDiffFooter(self):
@ -110,6 +119,7 @@ class Matching():
htmltext += "</body></html>"
self.htmltext = htmltext
def matchFiles(matching):
"""
@ -118,6 +128,7 @@ def matchFiles(matching):
:return:
"""
def matchBestfit(matching, path):
"""
in this strategy the difference-score of all elements of both sides will be calculated.
@ -128,7 +139,7 @@ def matchBestfit(matching, path):
:return:
"""
# initialize all potential links with null
i=0
i = 0
if (matching.sideA is not None):
for r in matching.sideA:
k = composeKey("a", i)
@ -154,7 +165,7 @@ def matchBestfit(matching, path):
if (matching.isHitB(composeKey("b", ib))):
ib += 1
continue
similarity=getSimilarity(matching, path, rA, rB, ix)
similarity = getSimilarity(matching, rA, rB, ix, M.MATCH[matching.matchtype]["simorder"])
if (similarity == "MATCH"):
matching.setHit(composeKey("a", ia), composeKey("b", ib))
continue
@ -164,70 +175,100 @@ def matchBestfit(matching, path):
ix += 1
ia += 1
def matchRestfit(matching):
""" """
job = basic.program.Job.getInstance()
verify = int(job.getDebugLevel("match_tool"))-1
verify = int(job.getDebugLevel("match_tool")) - 1
job.debug(verify, "matchRestfit ")
for x in sorted(matching.nomatch, reverse=True):
job.debug(verify, "matchRestfit " +x)
job.debug(verify, "matchRestfit " + x)
pair = matching.nomatch[x]
if (matching.isHitA(pair[0])):
print("A "+pair[0]+" bereits zugeordnet mit "+matching.linksA[pair[0]])
print("A " + pair[0] + " bereits zugeordnet mit " + matching.linksA[pair[0]])
if (matching.isHitB(pair[1])):
print("B " + pair[1] + " bereits zugeordnet mit " + matching.linksB[pair[1]])
continue
if (matching.isHitA(pair[0])):
continue
print("neues Matching "+pair[0]+" "+pair[1])
print("neues Matching " + pair[0] + " " + pair[1])
matching.setHit(pair[0], pair[1])
def composeKey(side, i):
return side.lower()+str(i+1).zfill(4)
return side.lower() + str(i + 1).zfill(4)
def extractKeyI(key):
return int(key[1:])-1
return int(key[1:]) - 1
def getSimilarity(matching, path, rA, rB, i):
""" it calculates the similarity between both rows by:
concat each criteria with single-similarity 00..99 and i with 999..000 """
def setMatchkeys(matching, path):
job = basic.program.Job.getInstance()
verify = int(job.getDebugLevel("match_tool"))-1
job.debug(verify, "getSimilarity "+path+" "+str(i))
verify = int(job.getDebugLevel("match_tool")) - 1
job.debug(verify, "getSimilarity " + path)
if len(matching.matchkeys) > 0:
keys = matching.matchkeys
elif (B.DATA_NODE_DDL in matching.comp.conf):
job.debug(verify, "ddl " + path + " " + str(i))
return
if (B.DATA_NODE_DDL in matching.comp.conf):
job.debug(verify, "ddl " + path)
a = path.split(":")
ddl = matching.comp.conf[B.DATA_NODE_DDL]
for x in a:
if (len(x) < 2): continue
if (x == B.DATA_NODE_DATA): break
if x in ddl: ddl = ddl[x]
job.debug(verify, "ddl " + json.dumps(ddl) + " " + str(i))
if (len(x) < 2):
continue
if (x == B.DATA_NODE_DATA):
break
if x in ddl:
ddl = ddl[x]
job.debug(verify, "ddl " + json.dumps(ddl))
keys = {}
for f in ddl:
job.debug(verify, "ddl " + f + " " + str(i))
if ("key" in ddl[f]) and (len(ddl[f]["key"])>0):
job.debug(verify, "ddl " + f)
if ("key" in ddl[f]) and (len(ddl[f]["key"]) > 0):
b = ddl[f]["key"].split(":")
if (len(b)!=2): raise Exception("falsch formatierter Schluessel "+ddl[f]["key"])
if (not b[1].isnumeric()): raise Exception("falsch formatierter Schluessel "+ddl[f]["key"])
k = "k"+b[1].zfill(2)
job.debug(verify, "ddl " + f + " " + str(i))
keys[k]={ "ktyp": b[0], "field": ddl[f]["feld"], "type": ddl[f]["type"], "rule": ddl[f]["acceptance"]}
matching.matchkeys=keys
job.debug(verify, "ddl " + json.dumps(keys) + " " + str(i))
msim=""
topsim=""
if (len(b) != 2):
raise Exception("falsch formatierter Schluessel " + ddl[f]["key"])
if (not b[1].isnumeric()):
raise Exception("falsch formatierter Schluessel " + ddl[f]["key"])
k = "k"+b[0]+""+b[1].zfill(2)
job.debug(verify, "ddl " + f)
keys[k] = {"ktyp": b[0], "field": ddl[f]["feld"], "type": ddl[f]["type"], "rule": ddl[f]["acceptance"]}
matching.matchkeys = keys
job.debug(verify, "ddl " + json.dumps(keys))
def getSimilarity(matching, rA, rB, i, simorder=M.SIM_DEFAULT):
""" it calculates the similarity between both rows by:
concat each criteria with single-similarity 00..99 and i with 999..000 """
job = basic.program.Job.getInstance()
verify = int(job.getDebugLevel("match_tool")) - 1
job.debug(verify, "getSimilarity ")
mBsim = ""
mTsim = ""
topBsim = ""
topTsim = ""
for k in sorted(matching.matchkeys):
msim += getStringSimilarity(str(rA[matching.matchkeys[k]["field"]]), str(rB[matching.matchkeys[k]["field"]]))
topsim += "99"
if msim == topsim:
if M.SIM_TECHNICAL in k:
mTsim += getStringSimilarity(str(rA[matching.matchkeys[k]["field"]]), str(rB[matching.matchkeys[k]["field"]]))
topTsim += "99"
if M.SIM_BUSINESS in k:
mTsim += getStringSimilarity(str(rA[matching.matchkeys[k]["field"]]), str(rB[matching.matchkeys[k]["field"]]))
topTsim += "99"
if mBsim == topBsim and mTsim == topTsim:
job.debug(verify, "Treffer ")
return "MATCH"
else:
job.debug(verify, "nomatch S"+msim+str(i).zfill(3))
return "S"+msim+str(i).zfill(3)
pass
elif simorder[0:1] == M.SIM_TECHNICAL and mTsim == topTsim:
job.debug(verify, "Treffer ")
return "MATCH"
elif simorder[0:1] == M.SIM_BUSINESS and mBsim == topBsim:
job.debug(verify, "Treffer ")
return "MATCH"
elif simorder[0:1] == M.SIM_TECHNICAL:
return "S"+mTsim+mBsim+str(i).zfill(3)
else: # if simorder[0:1] == M.SIM_BUSINESS:
return "S" + mBsim + mTsim + str(i).zfill(3)
def matchTree(matching):
"""
@ -235,18 +276,19 @@ def matchTree(matching):
:return:
"""
job = basic.program.Job.getInstance()
verify = int(job.getDebugLevel("match_tool"))-4
job.debug(verify, "..>> start matching "+matching.mode)
verify = int(job.getDebugLevel("match_tool")) - 4
job.debug(verify, "..>> start matching " + matching.mode)
matchElement(matching, matching.sideA, matching.sideB, "")
matching.setDiffFooter()
job.debug(verify, "..>> ende matching "+matching.htmltext)
job.debug(verify, "..>> ende matching " + matching.htmltext)
return matching.htmltext
def matchElement(matching, A, B, path):
""" travers through the datatree """
job = basic.program.Job.getInstance()
verify = int(job.getDebugLevel("match_tool"))-4
job.debug(verify, "matchElem "+path+" A "+str(type(A))+" B "+str(type(B)))
verify = int(job.getDebugLevel("match_tool")) - 4
job.debug(verify, "matchElem " + path + " A " + str(type(A)) + " B " + str(type(B)))
if ((A is not None) and (isinstance(A, list))) \
or ((B is not None) and (isinstance(B, list))):
return matchArray(matching, A, B, path)
@ -256,20 +298,22 @@ def matchElement(matching, A, B, path):
else:
return matching
def getStringSimilarity(strA, strB):
job = basic.program.Job.getInstance()
verify = int(job.getDebugLevel("match_tool"))-1
job.debug(verify, "getStringSimilarity "+strA+" ?= "+strB)
verify = int(job.getDebugLevel("match_tool")) - 1
job.debug(verify, "getStringSimilarity " + strA + " ?= " + strB)
if (strA == strB): return "99"
if (strA.strip() == strB.strip()): return "77"
if (strA.lower() == strB.lower()): return "66"
if (strA.strip().lower() == strB.strip().lower()): return "55"
return "00"
def getEvaluation(matching, type, acceptance, sideA, sideB):
job = basic.program.Job.getInstance()
verify = int(job.getDebugLevel("match_tool"))-1
job.debug(verify, "getEvaluation "+str(sideA)+" ?= "+str(sideB))
verify = int(job.getDebugLevel("match_tool")) - 1
job.debug(verify, "getEvaluation " + str(sideA) + " ?= " + str(sideB))
match = getStringSimilarity(str(sideA), str(sideB))
classA = "novalue"
classB = "novalue"
@ -285,11 +329,12 @@ def getEvaluation(matching, type, acceptance, sideA, sideB):
classB = "acceptB"
return [result, classA, classB]
def matchDict(matching, A, B, path):
""" travers through the datatree """
job = basic.program.Job.getInstance()
verify = int(job.getDebugLevel("match_tool"))-4
job.debug(verify, "matchDict "+path)
verify = int(job.getDebugLevel("match_tool")) - 4
job.debug(verify, "matchDict " + path)
if (A is not None):
for k in A:
job.debug(verify, "matchDict 400 " + k + ".")
@ -299,11 +344,11 @@ def matchDict(matching, A, B, path):
if (isinstance(A[k], dict)): A[k]["_match"] = "Y"
if (isinstance(B[k], dict)): B[k]["_match"] = "Y"
job.debug(verify, "matchDict 404 " + k + "." + path)
matchElement(matching, A[k], B[k], path+":"+k)
matchElement(matching, A[k], B[k], path + ":" + k)
else:
if (isinstance(A[k], dict)): A[k]["_match"] = "N"
job.debug(verify, "matchDict 408 " + path)
matchElement(matching, A[k], None, path+":"+k)
matchElement(matching, A[k], None, path + ":" + k)
if (B is not None):
for k in B:
job.debug(verify, "matchDict 412 " + k + ".")
@ -314,15 +359,16 @@ def matchDict(matching, A, B, path):
elif (A is None) or (k not in A):
if (A is not None) and (isinstance(A[k], dict)): B[k]["_match"] = "N"
job.debug(verify, "matchDict 418 " + k + "." + path)
matchElement(matching, None, B[k], path+":"+k)
matchElement(matching, None, B[k], path + ":" + k)
job.debug(verify, "matchDict 420 ...<<---")
return matching
def matchArray(matching, A, B, path):
""" matches the datarows of the datatree """
job = basic.program.Job.getInstance()
verify = int(job.getDebugLevel("match_tool"))-4
job.debug(verify, "matchArray "+path+"\n.."+matching.htmltext)
verify = int(job.getDebugLevel("match_tool")) - 4
job.debug(verify, "matchArray " + path + "\n.." + matching.htmltext)
matching.sideA = A
matching.sideB = B
matchBestfit(matching, path)
@ -341,10 +387,11 @@ def matchArray(matching, A, B, path):
htmltext += compareRows(matching, path)
matching.htmltext += htmltext
def compareRows(matching, path):
""" traverse through matched rows """
job = basic.program.Job.getInstance()
verify = int(job.getDebugLevel("match_tool"))-1
verify = int(job.getDebugLevel("match_tool")) - 1
ddl = matching.getTableDdl(path)
report = utils.report_tool.Report.getInstance()
table = ""
@ -353,8 +400,8 @@ def compareRows(matching, path):
htmltext = "<table><tr><th></th>"
for f in ddl[B.DATA_NODE_HEADER]:
job.debug(verify, "ddl " + f + " ")
header.append({ "field": f, "type": ddl[f]["type"], "acceptance": ddl[f]["acceptance"]})
htmltext += "<th>"+f+"</th>"
header.append({"field": f, "type": ddl[f]["type"], "acceptance": ddl[f]["acceptance"]})
htmltext += "<th>" + f + "</th>"
htmltext += "</tr>"
matching.difftext = htmltext
for k in sorted(matching.linksA):
@ -373,9 +420,10 @@ def compareRows(matching, path):
matching.difftext += "</table>"
return htmltext
def markRow(matching, header, row, side):
job = basic.program.Job.getInstance()
verify = int(job.getDebugLevel("match_tool"))-4
verify = int(job.getDebugLevel("match_tool")) - 4
text = ""
cssClass = ""
for f in header:
@ -390,16 +438,17 @@ def markRow(matching, header, row, side):
res = getEvaluation(matching, f["type"], f["acceptance"], "", row[f["field"]])
val = str(row[f["field"]])
cssClass = res[2]
text += "<td "+utils.css_tool.getInlineStyle("diffFiles", cssClass)+">"+val+"</td>"
text = "<tr><td "+utils.css_tool.getInlineStyle("diffFiles", cssClass)+">" \
+ M.MATCH[M.MATCH[matching.matchtype][side]]["short"] + "</td>"+text+"</tr>"
text += "<td " + utils.css_tool.getInlineStyle("diffFiles", cssClass) + ">" + val + "</td>"
text = "<tr><td " + utils.css_tool.getInlineStyle("diffFiles", cssClass) + ">" \
+ M.MATCH[M.MATCH[matching.matchtype][side]]["short"] + "</td>" + text + "</tr>"
matching.difftext += text
return text
def compareRow(matching, header, rA, rB):
""" traverse through matched rows """
job = basic.program.Job.getInstance()
verify = int(job.getDebugLevel("match_tool"))-4
verify = int(job.getDebugLevel("match_tool")) - 4
allident = True
textA = ""
textB = ""
@ -417,7 +466,7 @@ def compareRow(matching, header, rA, rB):
classB = res[2]
valA = str(rA[f["field"]])
valB = str(rB[f["field"]])
elif f["field"] in rA :
elif f["field"] in rA:
valA = str(rA[f["field"]])
match = "ddl"
classA = "acceptA"
@ -432,27 +481,27 @@ def compareRow(matching, header, rA, rB):
classA = "acceptA"
classB = "acceptB"
if (match == "MATCH"):
textA += "<td>"+valA+"</td>"
textB += "<td>"+valB+"</td>"
textA += "<td>" + valA + "</td>"
textB += "<td>" + valB + "</td>"
matching.setCssClass("result1")
elif (match == "hard"):
allident = False
textA += "<td "+utils.css_tool.getInlineStyle("diffFiles", classA)+">"+valA+"</td>"
textB += "<td "+utils.css_tool.getInlineStyle("diffFiles", classB)+">"+valB+"</td>"
textA += "<td " + utils.css_tool.getInlineStyle("diffFiles", classA) + ">" + valA + "</td>"
textB += "<td " + utils.css_tool.getInlineStyle("diffFiles", classB) + ">" + valB + "</td>"
matching.setCssClass("result3")
else:
allident = False
textA += "<td "+utils.css_tool.getInlineStyle("diffFiles", classA)+">"+valA+" ("+match+")</td>"
textB += "<td "+utils.css_tool.getInlineStyle("diffFiles", classB)+">"+valB+" ("+match+")</td>"
textA += "<td " + utils.css_tool.getInlineStyle("diffFiles", classA) + ">" + valA + " (" + match + ")</td>"
textB += "<td " + utils.css_tool.getInlineStyle("diffFiles", classB) + ">" + valB + " (" + match + ")</td>"
matching.setCssClass("result1")
if allident:
return "<tr><td/>"+textA+"</tr>"
text = "<tr><td>"+M.MATCH[M.MATCH[matching.matchtype]["A"]]["short"]+"</td>"+textA+"</tr>"
text += "<tr><td>"+M.MATCH[matching.matchtype]["shortB"]+"</td>"+textB+"</tr>"
return "<tr><td/>" + textA + "</tr>"
text = "<tr><td>" + M.MATCH[M.MATCH[matching.matchtype]["A"]]["short"] + "</td>" + textA + "</tr>"
text += "<tr><td>" + M.MATCH[matching.matchtype]["shortB"] + "</td>" + textB + "</tr>"
matching.difftext += text
return text
# --------------------------------------------------------------------------
def matchLines(matching):
pass

Loading…
Cancel
Save