Browse Source

similitary-order for business or technical match

master
Ulrich Carmesin 2 years ago
parent
commit
1bf757ced4
  1. 3
      test/test_compare.py
  2. 30
      utils/match_const.py
  3. 109
      utils/match_tool.py

3
test/test_compare.py

@ -111,7 +111,8 @@ class MyTestCase(unittest.TestCase):
def xtest_similarity(self): def xtest_similarity(self):
matching = self.getMatching() matching = self.getMatching()
utils.match_tool.getSimilarity(matching, ":database:scheme:table:_data", utils.match_tool.setMatchkeys(matching, ":database:scheme:table:_data")
utils.match_tool.getSimilarity(matching,
tdata[M.MATCH_SIDE_PREACTUAL]["data"]["database"]["scheme"]["table"]["_data"][0], tdata[M.MATCH_SIDE_PREACTUAL]["data"]["database"]["scheme"]["table"]["_data"][0],
tdata[M.MATCH_SIDE_POSTACTUAL]["data"]["database"]["scheme"]["table"]["_data"][0], 1) tdata[M.MATCH_SIDE_POSTACTUAL]["data"]["database"]["scheme"]["table"]["_data"][0], 1)

30
utils/match_const.py

@ -2,6 +2,11 @@
""" """
constants for used for api-functions constants for used for api-functions
""" """
SIM_BUSINESS = "B"
SIM_TECHNICAL = "T"
SIM_DEFAULT = "BT"
MATCH_SIDE_PREEXPECT = "preexpect" MATCH_SIDE_PREEXPECT = "preexpect"
""" it implies the precondition of the expectation """ """ it implies the precondition of the expectation """
MATCH_DICT_PREEXPECT = { MATCH_DICT_PREEXPECT = {
@ -67,10 +72,7 @@ MATCH = {
MATCH_PRECOND: { MATCH_PRECOND: {
"A": MATCH_SIDE_PREEXPECT, "A": MATCH_SIDE_PREEXPECT,
"B": MATCH_SIDE_PREACTUAL, "B": MATCH_SIDE_PREACTUAL,
"shortA": "SV", "simorder": SIM_BUSINESS + SIM_TECHNICAL,
"shortB": "IV",
"longA": "Soll-Vorher",
"longB": "Ist-Vorher",
"mode": "info", "mode": "info",
"filename": "01_Vorbedingungen", "filename": "01_Vorbedingungen",
"title": "Pruefung Vorbedingung (Soll-Vorher - Ist-Vorher)" "title": "Pruefung Vorbedingung (Soll-Vorher - Ist-Vorher)"
@ -78,10 +80,7 @@ MATCH = {
MATCH_POSTCOND: { MATCH_POSTCOND: {
"A": MATCH_SIDE_POSTEXPECT, "A": MATCH_SIDE_POSTEXPECT,
"B": MATCH_SIDE_POSTACTUAL, "B": MATCH_SIDE_POSTACTUAL,
"shortA": "SN", "simorder": SIM_BUSINESS + SIM_TECHNICAL,
"shortB": "IN",
"longA": "Soll-Nachher",
"longB": "Ist-Nachher",
"mode": "hard", "mode": "hard",
"filename": "00_Fachabgleich", "filename": "00_Fachabgleich",
"title": "Fachliche Auswertung (Soll-Nachher - Ist-Nachher)" "title": "Fachliche Auswertung (Soll-Nachher - Ist-Nachher)"
@ -89,10 +88,7 @@ MATCH = {
MATCH_SUCCESS: { MATCH_SUCCESS: {
"A": MATCH_SIDE_PREACTUAL, "A": MATCH_SIDE_PREACTUAL,
"B": MATCH_SIDE_POSTACTUAL, "B": MATCH_SIDE_POSTACTUAL,
"shortA": "IV", "simorder": SIM_TECHNICAL + SIM_BUSINESS,
"shortB": "IN",
"longA": "Ist-Vorher",
"longB": "Ist-Nachher",
"mode": "action", "mode": "action",
"filename": "04_Ablauf", "filename": "04_Ablauf",
"title": "Ablauf-Differenz (Ist-Vorher - Ist-Nachher)" "title": "Ablauf-Differenz (Ist-Vorher - Ist-Nachher)"
@ -100,10 +96,7 @@ MATCH = {
MATCH_PRESTEP: { MATCH_PRESTEP: {
"A": MATCH_SIDE_PRESTEP, "A": MATCH_SIDE_PRESTEP,
"B": MATCH_SIDE_POSTACTUAL, "B": MATCH_SIDE_POSTACTUAL,
"shortA": "VN", "simorder": SIM_TECHNICAL + SIM_BUSINESS,
"shortB": "IN",
"longA": "Vor-Schritt",
"longB": "Ist-Nachher",
"mode": "action", "mode": "action",
"filename": "02_Vorschritt", "filename": "02_Vorschritt",
"title": "Schritt-Differenz (Vorschritt-Nachher - Ist-Nachher)" "title": "Schritt-Differenz (Vorschritt-Nachher - Ist-Nachher)"
@ -111,10 +104,7 @@ MATCH = {
MATCH_TESTEXAMPLE: { MATCH_TESTEXAMPLE: {
"A": MATCH_SIDE_TESTCASE, "A": MATCH_SIDE_TESTCASE,
"B": MATCH_SIDE_POSTACTUAL, "B": MATCH_SIDE_POSTACTUAL,
"shortA": "TN", "simorder": SIM_BUSINESS + SIM_TECHNICAL,
"shortB": "IN",
"longA": "Vergleich-Soll",
"longB": "Ist-Nachher",
"mode": "action", "mode": "action",
"filename": "03_Vergleichstestfall", "filename": "03_Vergleichstestfall",
"title": "Vergleichstestfall (Vergleich-Soll - Ist-Nachher)" "title": "Vergleichstestfall (Vergleich-Soll - Ist-Nachher)"

109
utils/match_tool.py

@ -10,11 +10,13 @@ import utils.report_tool
import basic.program import basic.program
import basic.constants as B import basic.constants as B
import utils.match_const as M import utils.match_const as M
# ------------------------------------------------------------ # ------------------------------------------------------------
""" """
""" """
class Matching(): class Matching():
def __init__(self, comp): def __init__(self, comp):
self.comp = comp self.comp = comp
@ -64,14 +66,17 @@ class Matching():
def isHitA(self, key): def isHitA(self, key):
return ((key in self.linksA) and (self.linksA[key] != B.SVAL_NULL)) return ((key in self.linksA) and (self.linksA[key] != B.SVAL_NULL))
def isHitB(self, key): def isHitB(self, key):
return ((key in self.linksB) and (self.linksB[key] != B.SVAL_NULL)) return ((key in self.linksB) and (self.linksB[key] != B.SVAL_NULL))
def setHit(self, keyA, keyB): def setHit(self, keyA, keyB):
if (not self.isHitA(keyA)) and (not self.isHitB(keyB)): if (not self.isHitA(keyA)) and (not self.isHitB(keyB)):
if (keyA != B.SVAL_NULL): self.linksA[keyA] = keyB if (keyA != B.SVAL_NULL): self.linksA[keyA] = keyB
if (keyB != B.SVAL_NULL): self.linksB[keyB] = keyA if (keyB != B.SVAL_NULL): self.linksB[keyB] = keyA
return "OK" return "OK"
raise Exception("one of the links are set") raise Exception("one of the links are set")
def setNohit(self, similarity, keyA, keyB): def setNohit(self, similarity, keyA, keyB):
""" The similarity must be unique. Only a mismatch will be set. """ """ The similarity must be unique. Only a mismatch will be set. """
if similarity in self.nomatch: if similarity in self.nomatch:
@ -79,6 +84,7 @@ class Matching():
if (self.isHitA(keyA) or self.isHitB(keyB)): if (self.isHitA(keyA) or self.isHitB(keyB)):
return return
self.nomatch[similarity] = [keyA, keyB] self.nomatch[similarity] = [keyA, keyB]
def getTableDdl(self, path): def getTableDdl(self, path):
a = path.split(":") a = path.split(":")
ddl = self.comp.conf[B.DATA_NODE_DDL] ddl = self.comp.conf[B.DATA_NODE_DDL]
@ -87,6 +93,7 @@ class Matching():
if (x == B.DATA_NODE_DATA): break if (x == B.DATA_NODE_DATA): break
if x in ddl: ddl = ddl[x] if x in ddl: ddl = ddl[x]
return ddl return ddl
def setDiffHeader(matching): def setDiffHeader(matching):
job = basic.program.Job.getInstance() job = basic.program.Job.getInstance()
verify = int(job.getDebugLevel("match_tool")) - 1 verify = int(job.getDebugLevel("match_tool")) - 1
@ -98,8 +105,10 @@ class Matching():
htmltxt += "</head>" htmltxt += "</head>"
htmltxt += "<body>" htmltxt += "<body>"
htmltxt += "<h1>" + M.MATCH[matching.matchtype]["title"] + "</h1>" htmltxt += "<h1>" + M.MATCH[matching.matchtype]["title"] + "</h1>"
htmltxt += "<h4>"+M.MATCH[M.MATCH[matching.matchtype]["A"]]["long"]+": "+matching.matchfiles["A"]+"</h4>" htmltxt += "<h4>" + M.MATCH[M.MATCH[matching.matchtype]["A"]]["long"] + ": " + matching.matchfiles[
htmltxt += "<h4>"+M.MATCH[M.MATCH[matching.matchtype]["B"]]["long"]+": "+matching.matchfiles["B"]+"</h4><br>" "A"] + "</h4>"
htmltxt += "<h4>" + M.MATCH[M.MATCH[matching.matchtype]["B"]]["long"] + ": " + matching.matchfiles[
"B"] + "</h4><br>"
matching.htmltext = htmltxt matching.htmltext = htmltxt
def setDiffFooter(self): def setDiffFooter(self):
@ -110,6 +119,7 @@ class Matching():
htmltext += "</body></html>" htmltext += "</body></html>"
self.htmltext = htmltext self.htmltext = htmltext
def matchFiles(matching): def matchFiles(matching):
""" """
@ -118,6 +128,7 @@ def matchFiles(matching):
:return: :return:
""" """
def matchBestfit(matching, path): def matchBestfit(matching, path):
""" """
in this strategy the difference-score of all elements of both sides will be calculated. in this strategy the difference-score of all elements of both sides will be calculated.
@ -154,7 +165,7 @@ def matchBestfit(matching, path):
if (matching.isHitB(composeKey("b", ib))): if (matching.isHitB(composeKey("b", ib))):
ib += 1 ib += 1
continue continue
similarity=getSimilarity(matching, path, rA, rB, ix) similarity = getSimilarity(matching, rA, rB, ix, M.MATCH[matching.matchtype]["simorder"])
if (similarity == "MATCH"): if (similarity == "MATCH"):
matching.setHit(composeKey("a", ia), composeKey("b", ib)) matching.setHit(composeKey("a", ia), composeKey("b", ib))
continue continue
@ -164,6 +175,7 @@ def matchBestfit(matching, path):
ix += 1 ix += 1
ia += 1 ia += 1
def matchRestfit(matching): def matchRestfit(matching):
""" """ """ """
job = basic.program.Job.getInstance() job = basic.program.Job.getInstance()
@ -182,52 +194,81 @@ def matchRestfit(matching):
print("neues Matching " + pair[0] + " " + pair[1]) print("neues Matching " + pair[0] + " " + pair[1])
matching.setHit(pair[0], pair[1]) matching.setHit(pair[0], pair[1])
def composeKey(side, i): def composeKey(side, i):
return side.lower() + str(i + 1).zfill(4) return side.lower() + str(i + 1).zfill(4)
def extractKeyI(key): def extractKeyI(key):
return int(key[1:]) - 1 return int(key[1:]) - 1
def getSimilarity(matching, path, rA, rB, i):
""" it calculates the similarity between both rows by: def setMatchkeys(matching, path):
concat each criteria with single-similarity 00..99 and i with 999..000 """
job = basic.program.Job.getInstance() job = basic.program.Job.getInstance()
verify = int(job.getDebugLevel("match_tool")) - 1 verify = int(job.getDebugLevel("match_tool")) - 1
job.debug(verify, "getSimilarity "+path+" "+str(i)) job.debug(verify, "getSimilarity " + path)
if len(matching.matchkeys) > 0: if len(matching.matchkeys) > 0:
keys = matching.matchkeys return
elif (B.DATA_NODE_DDL in matching.comp.conf): if (B.DATA_NODE_DDL in matching.comp.conf):
job.debug(verify, "ddl " + path + " " + str(i)) job.debug(verify, "ddl " + path)
a = path.split(":") a = path.split(":")
ddl = matching.comp.conf[B.DATA_NODE_DDL] ddl = matching.comp.conf[B.DATA_NODE_DDL]
for x in a: for x in a:
if (len(x) < 2): continue if (len(x) < 2):
if (x == B.DATA_NODE_DATA): break continue
if x in ddl: ddl = ddl[x] if (x == B.DATA_NODE_DATA):
job.debug(verify, "ddl " + json.dumps(ddl) + " " + str(i)) break
if x in ddl:
ddl = ddl[x]
job.debug(verify, "ddl " + json.dumps(ddl))
keys = {} keys = {}
for f in ddl: for f in ddl:
job.debug(verify, "ddl " + f + " " + str(i)) job.debug(verify, "ddl " + f)
if ("key" in ddl[f]) and (len(ddl[f]["key"]) > 0): if ("key" in ddl[f]) and (len(ddl[f]["key"]) > 0):
b = ddl[f]["key"].split(":") b = ddl[f]["key"].split(":")
if (len(b)!=2): raise Exception("falsch formatierter Schluessel "+ddl[f]["key"]) if (len(b) != 2):
if (not b[1].isnumeric()): raise Exception("falsch formatierter Schluessel "+ddl[f]["key"]) raise Exception("falsch formatierter Schluessel " + ddl[f]["key"])
k = "k"+b[1].zfill(2) if (not b[1].isnumeric()):
job.debug(verify, "ddl " + f + " " + str(i)) raise Exception("falsch formatierter Schluessel " + ddl[f]["key"])
k = "k"+b[0]+""+b[1].zfill(2)
job.debug(verify, "ddl " + f)
keys[k] = {"ktyp": b[0], "field": ddl[f]["feld"], "type": ddl[f]["type"], "rule": ddl[f]["acceptance"]} keys[k] = {"ktyp": b[0], "field": ddl[f]["feld"], "type": ddl[f]["type"], "rule": ddl[f]["acceptance"]}
matching.matchkeys = keys matching.matchkeys = keys
job.debug(verify, "ddl " + json.dumps(keys) + " " + str(i)) job.debug(verify, "ddl " + json.dumps(keys))
msim=""
topsim=""
def getSimilarity(matching, rA, rB, i, simorder=M.SIM_DEFAULT):
""" it calculates the similarity between both rows by:
concat each criteria with single-similarity 00..99 and i with 999..000 """
job = basic.program.Job.getInstance()
verify = int(job.getDebugLevel("match_tool")) - 1
job.debug(verify, "getSimilarity ")
mBsim = ""
mTsim = ""
topBsim = ""
topTsim = ""
for k in sorted(matching.matchkeys): for k in sorted(matching.matchkeys):
msim += getStringSimilarity(str(rA[matching.matchkeys[k]["field"]]), str(rB[matching.matchkeys[k]["field"]])) if M.SIM_TECHNICAL in k:
topsim += "99" mTsim += getStringSimilarity(str(rA[matching.matchkeys[k]["field"]]), str(rB[matching.matchkeys[k]["field"]]))
if msim == topsim: topTsim += "99"
if M.SIM_BUSINESS in k:
mTsim += getStringSimilarity(str(rA[matching.matchkeys[k]["field"]]), str(rB[matching.matchkeys[k]["field"]]))
topTsim += "99"
if mBsim == topBsim and mTsim == topTsim:
job.debug(verify, "Treffer ") job.debug(verify, "Treffer ")
return "MATCH" return "MATCH"
else: elif simorder[0:1] == M.SIM_TECHNICAL and mTsim == topTsim:
job.debug(verify, "nomatch S"+msim+str(i).zfill(3)) job.debug(verify, "Treffer ")
return "S"+msim+str(i).zfill(3) return "MATCH"
pass elif simorder[0:1] == M.SIM_BUSINESS and mBsim == topBsim:
job.debug(verify, "Treffer ")
return "MATCH"
elif simorder[0:1] == M.SIM_TECHNICAL:
return "S"+mTsim+mBsim+str(i).zfill(3)
else: # if simorder[0:1] == M.SIM_BUSINESS:
return "S" + mBsim + mTsim + str(i).zfill(3)
def matchTree(matching): def matchTree(matching):
""" """
@ -242,6 +283,7 @@ def matchTree(matching):
job.debug(verify, "..>> ende matching " + matching.htmltext) job.debug(verify, "..>> ende matching " + matching.htmltext)
return matching.htmltext return matching.htmltext
def matchElement(matching, A, B, path): def matchElement(matching, A, B, path):
""" travers through the datatree """ """ travers through the datatree """
job = basic.program.Job.getInstance() job = basic.program.Job.getInstance()
@ -256,6 +298,7 @@ def matchElement(matching, A, B, path):
else: else:
return matching return matching
def getStringSimilarity(strA, strB): def getStringSimilarity(strA, strB):
job = basic.program.Job.getInstance() job = basic.program.Job.getInstance()
verify = int(job.getDebugLevel("match_tool")) - 1 verify = int(job.getDebugLevel("match_tool")) - 1
@ -266,6 +309,7 @@ def getStringSimilarity(strA, strB):
if (strA.strip().lower() == strB.strip().lower()): return "55" if (strA.strip().lower() == strB.strip().lower()): return "55"
return "00" return "00"
def getEvaluation(matching, type, acceptance, sideA, sideB): def getEvaluation(matching, type, acceptance, sideA, sideB):
job = basic.program.Job.getInstance() job = basic.program.Job.getInstance()
verify = int(job.getDebugLevel("match_tool")) - 1 verify = int(job.getDebugLevel("match_tool")) - 1
@ -285,6 +329,7 @@ def getEvaluation(matching, type, acceptance, sideA, sideB):
classB = "acceptB" classB = "acceptB"
return [result, classA, classB] return [result, classA, classB]
def matchDict(matching, A, B, path): def matchDict(matching, A, B, path):
""" travers through the datatree """ """ travers through the datatree """
job = basic.program.Job.getInstance() job = basic.program.Job.getInstance()
@ -318,6 +363,7 @@ def matchDict(matching, A, B, path):
job.debug(verify, "matchDict 420 ...<<---") job.debug(verify, "matchDict 420 ...<<---")
return matching return matching
def matchArray(matching, A, B, path): def matchArray(matching, A, B, path):
""" matches the datarows of the datatree """ """ matches the datarows of the datatree """
job = basic.program.Job.getInstance() job = basic.program.Job.getInstance()
@ -341,6 +387,7 @@ def matchArray(matching, A, B, path):
htmltext += compareRows(matching, path) htmltext += compareRows(matching, path)
matching.htmltext += htmltext matching.htmltext += htmltext
def compareRows(matching, path): def compareRows(matching, path):
""" traverse through matched rows """ """ traverse through matched rows """
job = basic.program.Job.getInstance() job = basic.program.Job.getInstance()
@ -373,6 +420,7 @@ def compareRows(matching, path):
matching.difftext += "</table>" matching.difftext += "</table>"
return htmltext return htmltext
def markRow(matching, header, row, side): def markRow(matching, header, row, side):
job = basic.program.Job.getInstance() job = basic.program.Job.getInstance()
verify = int(job.getDebugLevel("match_tool")) - 4 verify = int(job.getDebugLevel("match_tool")) - 4
@ -396,6 +444,7 @@ def markRow(matching, header, row, side):
matching.difftext += text matching.difftext += text
return text return text
def compareRow(matching, header, rA, rB): def compareRow(matching, header, rA, rB):
""" traverse through matched rows """ """ traverse through matched rows """
job = basic.program.Job.getInstance() job = basic.program.Job.getInstance()
@ -452,7 +501,7 @@ def compareRow(matching, header, rA, rB):
matching.difftext += text matching.difftext += text
return text return text
# -------------------------------------------------------------------------- # --------------------------------------------------------------------------
def matchLines(matching): def matchLines(matching):
pass pass

Loading…
Cancel
Save