From 1bf757ced4586ebdf131ed36540ef88958b6c651 Mon Sep 17 00:00:00 2001 From: Ulrich Carmesin Date: Thu, 21 Apr 2022 22:32:21 +0200 Subject: [PATCH] similitary-order for business or technical match --- test/test_compare.py | 5 +- utils/match_const.py | 30 ++---- utils/match_tool.py | 213 ++++++++++++++++++++++++++----------------- 3 files changed, 144 insertions(+), 104 deletions(-) diff --git a/test/test_compare.py b/test/test_compare.py index 4c77e6d..8db645e 100644 --- a/test/test_compare.py +++ b/test/test_compare.py @@ -111,9 +111,10 @@ class MyTestCase(unittest.TestCase): def xtest_similarity(self): matching = self.getMatching() - utils.match_tool.getSimilarity(matching, ":database:scheme:table:_data", + utils.match_tool.setMatchkeys(matching, ":database:scheme:table:_data") + utils.match_tool.getSimilarity(matching, tdata[M.MATCH_SIDE_PREACTUAL]["data"]["database"]["scheme"]["table"]["_data"][0], - tdata[M.MATCH_SIDE_POSTACTUAL]["data"]["database"]["scheme"]["table"]["_data"][0],1) + tdata[M.MATCH_SIDE_POSTACTUAL]["data"]["database"]["scheme"]["table"]["_data"][0], 1) def test_bestfit(self): job = Job("unit") diff --git a/utils/match_const.py b/utils/match_const.py index 10e936a..67a55c4 100644 --- a/utils/match_const.py +++ b/utils/match_const.py @@ -2,6 +2,11 @@ """ constants for used for api-functions """ + +SIM_BUSINESS = "B" +SIM_TECHNICAL = "T" +SIM_DEFAULT = "BT" + MATCH_SIDE_PREEXPECT = "preexpect" """ it implies the precondition of the expectation """ MATCH_DICT_PREEXPECT = { @@ -67,10 +72,7 @@ MATCH = { MATCH_PRECOND: { "A": MATCH_SIDE_PREEXPECT, "B": MATCH_SIDE_PREACTUAL, - "shortA": "SV", - "shortB": "IV", - "longA": "Soll-Vorher", - "longB": "Ist-Vorher", + "simorder": SIM_BUSINESS + SIM_TECHNICAL, "mode": "info", "filename": "01_Vorbedingungen", "title": "Pruefung Vorbedingung (Soll-Vorher - Ist-Vorher)" @@ -78,10 +80,7 @@ MATCH = { MATCH_POSTCOND: { "A": MATCH_SIDE_POSTEXPECT, "B": MATCH_SIDE_POSTACTUAL, - "shortA": "SN", - "shortB": "IN", - "longA": "Soll-Nachher", - "longB": "Ist-Nachher", + "simorder": SIM_BUSINESS + SIM_TECHNICAL, "mode": "hard", "filename": "00_Fachabgleich", "title": "Fachliche Auswertung (Soll-Nachher - Ist-Nachher)" @@ -89,10 +88,7 @@ MATCH = { MATCH_SUCCESS: { "A": MATCH_SIDE_PREACTUAL, "B": MATCH_SIDE_POSTACTUAL, - "shortA": "IV", - "shortB": "IN", - "longA": "Ist-Vorher", - "longB": "Ist-Nachher", + "simorder": SIM_TECHNICAL + SIM_BUSINESS, "mode": "action", "filename": "04_Ablauf", "title": "Ablauf-Differenz (Ist-Vorher - Ist-Nachher)" @@ -100,10 +96,7 @@ MATCH = { MATCH_PRESTEP: { "A": MATCH_SIDE_PRESTEP, "B": MATCH_SIDE_POSTACTUAL, - "shortA": "VN", - "shortB": "IN", - "longA": "Vor-Schritt", - "longB": "Ist-Nachher", + "simorder": SIM_TECHNICAL + SIM_BUSINESS, "mode": "action", "filename": "02_Vorschritt", "title": "Schritt-Differenz (Vorschritt-Nachher - Ist-Nachher)" @@ -111,10 +104,7 @@ MATCH = { MATCH_TESTEXAMPLE: { "A": MATCH_SIDE_TESTCASE, "B": MATCH_SIDE_POSTACTUAL, - "shortA": "TN", - "shortB": "IN", - "longA": "Vergleich-Soll", - "longB": "Ist-Nachher", + "simorder": SIM_BUSINESS + SIM_TECHNICAL, "mode": "action", "filename": "03_Vergleichstestfall", "title": "Vergleichstestfall (Vergleich-Soll - Ist-Nachher)" diff --git a/utils/match_tool.py b/utils/match_tool.py index eabb96d..3cbfc36 100644 --- a/utils/match_tool.py +++ b/utils/match_tool.py @@ -10,11 +10,13 @@ import utils.report_tool import basic.program import basic.constants as B import utils.match_const as M + # ------------------------------------------------------------ """ """ + class Matching(): def __init__(self, comp): self.comp = comp @@ -64,21 +66,25 @@ class Matching(): def isHitA(self, key): return ((key in self.linksA) and (self.linksA[key] != B.SVAL_NULL)) + def isHitB(self, key): return ((key in self.linksB) and (self.linksB[key] != B.SVAL_NULL)) + def setHit(self, keyA, keyB): if (not self.isHitA(keyA)) and (not self.isHitB(keyB)): if (keyA != B.SVAL_NULL): self.linksA[keyA] = keyB if (keyB != B.SVAL_NULL): self.linksB[keyB] = keyA return "OK" raise Exception("one of the links are set") + def setNohit(self, similarity, keyA, keyB): """ The similarity must be unique. Only a mismatch will be set. """ if similarity in self.nomatch: - raise Exception("similarity "+similarity+" exists") + raise Exception("similarity " + similarity + " exists") if (self.isHitA(keyA) or self.isHitB(keyB)): return self.nomatch[similarity] = [keyA, keyB] + def getTableDdl(self, path): a = path.split(":") ddl = self.comp.conf[B.DATA_NODE_DDL] @@ -87,19 +93,22 @@ class Matching(): if (x == B.DATA_NODE_DATA): break if x in ddl: ddl = ddl[x] return ddl + def setDiffHeader(matching): job = basic.program.Job.getInstance() - verify = int(job.getDebugLevel("match_tool"))-1 + verify = int(job.getDebugLevel("match_tool")) - 1 job.debug(verify, "getDiffHeader ") htmltxt = "" htmltxt += "" - htmltxt += ""+M.MATCH[matching.matchtype]["title"]+"" + htmltxt += "" + M.MATCH[matching.matchtype]["title"] + "" htmltxt += utils.css_tool.getInternalStyle("diffFiles") htmltxt += "" htmltxt += "" - htmltxt += "

"+M.MATCH[matching.matchtype]["title"]+"

" - htmltxt += "

"+M.MATCH[M.MATCH[matching.matchtype]["A"]]["long"]+": "+matching.matchfiles["A"]+"

" - htmltxt += "

"+M.MATCH[M.MATCH[matching.matchtype]["B"]]["long"]+": "+matching.matchfiles["B"]+"


" + htmltxt += "

" + M.MATCH[matching.matchtype]["title"] + "

" + htmltxt += "

" + M.MATCH[M.MATCH[matching.matchtype]["A"]]["long"] + ": " + matching.matchfiles[ + "A"] + "

" + htmltxt += "

" + M.MATCH[M.MATCH[matching.matchtype]["B"]]["long"] + ": " + matching.matchfiles[ + "B"] + "


" matching.htmltext = htmltxt def setDiffFooter(self): @@ -110,6 +119,7 @@ class Matching(): htmltext += "" self.htmltext = htmltext + def matchFiles(matching): """ @@ -118,6 +128,7 @@ def matchFiles(matching): :return: """ + def matchBestfit(matching, path): """ in this strategy the difference-score of all elements of both sides will be calculated. @@ -128,7 +139,7 @@ def matchBestfit(matching, path): :return: """ # initialize all potential links with null - i=0 + i = 0 if (matching.sideA is not None): for r in matching.sideA: k = composeKey("a", i) @@ -154,7 +165,7 @@ def matchBestfit(matching, path): if (matching.isHitB(composeKey("b", ib))): ib += 1 continue - similarity=getSimilarity(matching, path, rA, rB, ix) + similarity = getSimilarity(matching, rA, rB, ix, M.MATCH[matching.matchtype]["simorder"]) if (similarity == "MATCH"): matching.setHit(composeKey("a", ia), composeKey("b", ib)) continue @@ -164,70 +175,100 @@ def matchBestfit(matching, path): ix += 1 ia += 1 + def matchRestfit(matching): """ """ job = basic.program.Job.getInstance() - verify = int(job.getDebugLevel("match_tool"))-1 + verify = int(job.getDebugLevel("match_tool")) - 1 job.debug(verify, "matchRestfit ") for x in sorted(matching.nomatch, reverse=True): - job.debug(verify, "matchRestfit " +x) + job.debug(verify, "matchRestfit " + x) pair = matching.nomatch[x] if (matching.isHitA(pair[0])): - print("A "+pair[0]+" bereits zugeordnet mit "+matching.linksA[pair[0]]) + print("A " + pair[0] + " bereits zugeordnet mit " + matching.linksA[pair[0]]) if (matching.isHitB(pair[1])): print("B " + pair[1] + " bereits zugeordnet mit " + matching.linksB[pair[1]]) continue if (matching.isHitA(pair[0])): continue - print("neues Matching "+pair[0]+" "+pair[1]) + print("neues Matching " + pair[0] + " " + pair[1]) matching.setHit(pair[0], pair[1]) + def composeKey(side, i): - return side.lower()+str(i+1).zfill(4) + return side.lower() + str(i + 1).zfill(4) + + def extractKeyI(key): - return int(key[1:])-1 + return int(key[1:]) - 1 -def getSimilarity(matching, path, rA, rB, i): - """ it calculates the similarity between both rows by: - concat each criteria with single-similarity 00..99 and i with 999..000 """ + +def setMatchkeys(matching, path): job = basic.program.Job.getInstance() - verify = int(job.getDebugLevel("match_tool"))-1 - job.debug(verify, "getSimilarity "+path+" "+str(i)) + verify = int(job.getDebugLevel("match_tool")) - 1 + job.debug(verify, "getSimilarity " + path) if len(matching.matchkeys) > 0: - keys = matching.matchkeys - elif (B.DATA_NODE_DDL in matching.comp.conf): - job.debug(verify, "ddl " + path + " " + str(i)) + return + if (B.DATA_NODE_DDL in matching.comp.conf): + job.debug(verify, "ddl " + path) a = path.split(":") ddl = matching.comp.conf[B.DATA_NODE_DDL] for x in a: - if (len(x) < 2): continue - if (x == B.DATA_NODE_DATA): break - if x in ddl: ddl = ddl[x] - job.debug(verify, "ddl " + json.dumps(ddl) + " " + str(i)) + if (len(x) < 2): + continue + if (x == B.DATA_NODE_DATA): + break + if x in ddl: + ddl = ddl[x] + job.debug(verify, "ddl " + json.dumps(ddl)) keys = {} for f in ddl: - job.debug(verify, "ddl " + f + " " + str(i)) - if ("key" in ddl[f]) and (len(ddl[f]["key"])>0): + job.debug(verify, "ddl " + f) + if ("key" in ddl[f]) and (len(ddl[f]["key"]) > 0): b = ddl[f]["key"].split(":") - if (len(b)!=2): raise Exception("falsch formatierter Schluessel "+ddl[f]["key"]) - if (not b[1].isnumeric()): raise Exception("falsch formatierter Schluessel "+ddl[f]["key"]) - k = "k"+b[1].zfill(2) - job.debug(verify, "ddl " + f + " " + str(i)) - keys[k]={ "ktyp": b[0], "field": ddl[f]["feld"], "type": ddl[f]["type"], "rule": ddl[f]["acceptance"]} - matching.matchkeys=keys - job.debug(verify, "ddl " + json.dumps(keys) + " " + str(i)) - msim="" - topsim="" + if (len(b) != 2): + raise Exception("falsch formatierter Schluessel " + ddl[f]["key"]) + if (not b[1].isnumeric()): + raise Exception("falsch formatierter Schluessel " + ddl[f]["key"]) + k = "k"+b[0]+""+b[1].zfill(2) + job.debug(verify, "ddl " + f) + keys[k] = {"ktyp": b[0], "field": ddl[f]["feld"], "type": ddl[f]["type"], "rule": ddl[f]["acceptance"]} + matching.matchkeys = keys + job.debug(verify, "ddl " + json.dumps(keys)) + + +def getSimilarity(matching, rA, rB, i, simorder=M.SIM_DEFAULT): + """ it calculates the similarity between both rows by: + concat each criteria with single-similarity 00..99 and i with 999..000 """ + job = basic.program.Job.getInstance() + verify = int(job.getDebugLevel("match_tool")) - 1 + job.debug(verify, "getSimilarity ") + mBsim = "" + mTsim = "" + topBsim = "" + topTsim = "" for k in sorted(matching.matchkeys): - msim += getStringSimilarity(str(rA[matching.matchkeys[k]["field"]]), str(rB[matching.matchkeys[k]["field"]])) - topsim += "99" - if msim == topsim: + if M.SIM_TECHNICAL in k: + mTsim += getStringSimilarity(str(rA[matching.matchkeys[k]["field"]]), str(rB[matching.matchkeys[k]["field"]])) + topTsim += "99" + if M.SIM_BUSINESS in k: + mTsim += getStringSimilarity(str(rA[matching.matchkeys[k]["field"]]), str(rB[matching.matchkeys[k]["field"]])) + topTsim += "99" + + if mBsim == topBsim and mTsim == topTsim: job.debug(verify, "Treffer ") return "MATCH" - else: - job.debug(verify, "nomatch S"+msim+str(i).zfill(3)) - return "S"+msim+str(i).zfill(3) - pass + elif simorder[0:1] == M.SIM_TECHNICAL and mTsim == topTsim: + job.debug(verify, "Treffer ") + return "MATCH" + elif simorder[0:1] == M.SIM_BUSINESS and mBsim == topBsim: + job.debug(verify, "Treffer ") + return "MATCH" + elif simorder[0:1] == M.SIM_TECHNICAL: + return "S"+mTsim+mBsim+str(i).zfill(3) + else: # if simorder[0:1] == M.SIM_BUSINESS: + return "S" + mBsim + mTsim + str(i).zfill(3) + def matchTree(matching): """ @@ -235,41 +276,44 @@ def matchTree(matching): :return: """ job = basic.program.Job.getInstance() - verify = int(job.getDebugLevel("match_tool"))-4 - job.debug(verify, "..>> start matching "+matching.mode) + verify = int(job.getDebugLevel("match_tool")) - 4 + job.debug(verify, "..>> start matching " + matching.mode) matchElement(matching, matching.sideA, matching.sideB, "") matching.setDiffFooter() - job.debug(verify, "..>> ende matching "+matching.htmltext) + job.debug(verify, "..>> ende matching " + matching.htmltext) return matching.htmltext + def matchElement(matching, A, B, path): """ travers through the datatree """ job = basic.program.Job.getInstance() - verify = int(job.getDebugLevel("match_tool"))-4 - job.debug(verify, "matchElem "+path+" A "+str(type(A))+" B "+str(type(B))) + verify = int(job.getDebugLevel("match_tool")) - 4 + job.debug(verify, "matchElem " + path + " A " + str(type(A)) + " B " + str(type(B))) if ((A is not None) and (isinstance(A, list))) \ - or ((B is not None) and (isinstance(B, list))): + or ((B is not None) and (isinstance(B, list))): return matchArray(matching, A, B, path) elif ((A is not None) and (isinstance(A, dict))) \ - or ((B is not None) and (isinstance(B, dict))): + or ((B is not None) and (isinstance(B, dict))): return matchDict(matching, A, B, path) else: return matching + def getStringSimilarity(strA, strB): job = basic.program.Job.getInstance() - verify = int(job.getDebugLevel("match_tool"))-1 - job.debug(verify, "getStringSimilarity "+strA+" ?= "+strB) + verify = int(job.getDebugLevel("match_tool")) - 1 + job.debug(verify, "getStringSimilarity " + strA + " ?= " + strB) if (strA == strB): return "99" if (strA.strip() == strB.strip()): return "77" if (strA.lower() == strB.lower()): return "66" if (strA.strip().lower() == strB.strip().lower()): return "55" return "00" + def getEvaluation(matching, type, acceptance, sideA, sideB): job = basic.program.Job.getInstance() - verify = int(job.getDebugLevel("match_tool"))-1 - job.debug(verify, "getEvaluation "+str(sideA)+" ?= "+str(sideB)) + verify = int(job.getDebugLevel("match_tool")) - 1 + job.debug(verify, "getEvaluation " + str(sideA) + " ?= " + str(sideB)) match = getStringSimilarity(str(sideA), str(sideB)) classA = "novalue" classB = "novalue" @@ -285,11 +329,12 @@ def getEvaluation(matching, type, acceptance, sideA, sideB): classB = "acceptB" return [result, classA, classB] + def matchDict(matching, A, B, path): """ travers through the datatree """ job = basic.program.Job.getInstance() - verify = int(job.getDebugLevel("match_tool"))-4 - job.debug(verify, "matchDict "+path) + verify = int(job.getDebugLevel("match_tool")) - 4 + job.debug(verify, "matchDict " + path) if (A is not None): for k in A: job.debug(verify, "matchDict 400 " + k + ".") @@ -299,11 +344,11 @@ def matchDict(matching, A, B, path): if (isinstance(A[k], dict)): A[k]["_match"] = "Y" if (isinstance(B[k], dict)): B[k]["_match"] = "Y" job.debug(verify, "matchDict 404 " + k + "." + path) - matchElement(matching, A[k], B[k], path+":"+k) + matchElement(matching, A[k], B[k], path + ":" + k) else: if (isinstance(A[k], dict)): A[k]["_match"] = "N" job.debug(verify, "matchDict 408 " + path) - matchElement(matching, A[k], None, path+":"+k) + matchElement(matching, A[k], None, path + ":" + k) if (B is not None): for k in B: job.debug(verify, "matchDict 412 " + k + ".") @@ -314,15 +359,16 @@ def matchDict(matching, A, B, path): elif (A is None) or (k not in A): if (A is not None) and (isinstance(A[k], dict)): B[k]["_match"] = "N" job.debug(verify, "matchDict 418 " + k + "." + path) - matchElement(matching, None, B[k], path+":"+k) + matchElement(matching, None, B[k], path + ":" + k) job.debug(verify, "matchDict 420 ...<<---") return matching + def matchArray(matching, A, B, path): """ matches the datarows of the datatree """ job = basic.program.Job.getInstance() - verify = int(job.getDebugLevel("match_tool"))-4 - job.debug(verify, "matchArray "+path+"\n.."+matching.htmltext) + verify = int(job.getDebugLevel("match_tool")) - 4 + job.debug(verify, "matchArray " + path + "\n.." + matching.htmltext) matching.sideA = A matching.sideB = B matchBestfit(matching, path) @@ -341,10 +387,11 @@ def matchArray(matching, A, B, path): htmltext += compareRows(matching, path) matching.htmltext += htmltext + def compareRows(matching, path): """ traverse through matched rows """ job = basic.program.Job.getInstance() - verify = int(job.getDebugLevel("match_tool"))-1 + verify = int(job.getDebugLevel("match_tool")) - 1 ddl = matching.getTableDdl(path) report = utils.report_tool.Report.getInstance() table = "" @@ -353,15 +400,15 @@ def compareRows(matching, path): htmltext = "" for f in ddl[B.DATA_NODE_HEADER]: job.debug(verify, "ddl " + f + " ") - header.append({ "field": f, "type": ddl[f]["type"], "acceptance": ddl[f]["acceptance"]}) - htmltext += "" + header.append({"field": f, "type": ddl[f]["type"], "acceptance": ddl[f]["acceptance"]}) + htmltext += "" htmltext += "" matching.difftext = htmltext for k in sorted(matching.linksA): print(k) if (matching.isHitA(k)): htmltext += compareRow(matching, header, matching.sideA[int(extractKeyI(k))], - matching.sideB[int(extractKeyI(matching.linksA[k]))]) + matching.sideB[int(extractKeyI(matching.linksA[k]))]) else: htmltext += markRow(matching, header, matching.sideA[int(extractKeyI(k))], "A") matching.setCssClass("result3") @@ -373,9 +420,10 @@ def compareRows(matching, path): matching.difftext += "
"+f+"" + f + "
" return htmltext + def markRow(matching, header, row, side): job = basic.program.Job.getInstance() - verify = int(job.getDebugLevel("match_tool"))-4 + verify = int(job.getDebugLevel("match_tool")) - 4 text = "" cssClass = "" for f in header: @@ -390,16 +438,17 @@ def markRow(matching, header, row, side): res = getEvaluation(matching, f["type"], f["acceptance"], "", row[f["field"]]) val = str(row[f["field"]]) cssClass = res[2] - text += ""+val+"" - text = "" \ - + M.MATCH[M.MATCH[matching.matchtype][side]]["short"] + ""+text+"" + text += "" + val + "" + text = "" \ + + M.MATCH[M.MATCH[matching.matchtype][side]]["short"] + "" + text + "" matching.difftext += text return text + def compareRow(matching, header, rA, rB): """ traverse through matched rows """ job = basic.program.Job.getInstance() - verify = int(job.getDebugLevel("match_tool"))-4 + verify = int(job.getDebugLevel("match_tool")) - 4 allident = True textA = "" textB = "" @@ -417,7 +466,7 @@ def compareRow(matching, header, rA, rB): classB = res[2] valA = str(rA[f["field"]]) valB = str(rB[f["field"]]) - elif f["field"] in rA : + elif f["field"] in rA: valA = str(rA[f["field"]]) match = "ddl" classA = "acceptA" @@ -432,27 +481,27 @@ def compareRow(matching, header, rA, rB): classA = "acceptA" classB = "acceptB" if (match == "MATCH"): - textA += ""+valA+"" - textB += ""+valB+"" + textA += "" + valA + "" + textB += "" + valB + "" matching.setCssClass("result1") elif (match == "hard"): allident = False - textA += ""+valA+"" - textB += ""+valB+"" + textA += "" + valA + "" + textB += "" + valB + "" matching.setCssClass("result3") else: allident = False - textA += ""+valA+" ("+match+")" - textB += ""+valB+" ("+match+")" + textA += "" + valA + " (" + match + ")" + textB += "" + valB + " (" + match + ")" matching.setCssClass("result1") if allident: - return ""+textA+"" - text = ""+M.MATCH[M.MATCH[matching.matchtype]["A"]]["short"]+""+textA+"" - text += ""+M.MATCH[matching.matchtype]["shortB"]+""+textB+"" + return "" + textA + "" + text = "" + M.MATCH[M.MATCH[matching.matchtype]["A"]]["short"] + "" + textA + "" + text += "" + M.MATCH[matching.matchtype]["shortB"] + "" + textB + "" matching.difftext += text return text + # -------------------------------------------------------------------------- def matchLines(matching): pass -