Browse Source

similitary-order for business or technical match

master
Ulrich Carmesin 3 years ago
parent
commit
1bf757ced4
  1. 3
      test/test_compare.py
  2. 30
      utils/match_const.py
  3. 109
      utils/match_tool.py

3
test/test_compare.py

@ -111,7 +111,8 @@ class MyTestCase(unittest.TestCase):
def xtest_similarity(self):
matching = self.getMatching()
utils.match_tool.getSimilarity(matching, ":database:scheme:table:_data",
utils.match_tool.setMatchkeys(matching, ":database:scheme:table:_data")
utils.match_tool.getSimilarity(matching,
tdata[M.MATCH_SIDE_PREACTUAL]["data"]["database"]["scheme"]["table"]["_data"][0],
tdata[M.MATCH_SIDE_POSTACTUAL]["data"]["database"]["scheme"]["table"]["_data"][0], 1)

30
utils/match_const.py

@ -2,6 +2,11 @@
"""
constants for used for api-functions
"""
SIM_BUSINESS = "B"
SIM_TECHNICAL = "T"
SIM_DEFAULT = "BT"
MATCH_SIDE_PREEXPECT = "preexpect"
""" it implies the precondition of the expectation """
MATCH_DICT_PREEXPECT = {
@ -67,10 +72,7 @@ MATCH = {
MATCH_PRECOND: {
"A": MATCH_SIDE_PREEXPECT,
"B": MATCH_SIDE_PREACTUAL,
"shortA": "SV",
"shortB": "IV",
"longA": "Soll-Vorher",
"longB": "Ist-Vorher",
"simorder": SIM_BUSINESS + SIM_TECHNICAL,
"mode": "info",
"filename": "01_Vorbedingungen",
"title": "Pruefung Vorbedingung (Soll-Vorher - Ist-Vorher)"
@ -78,10 +80,7 @@ MATCH = {
MATCH_POSTCOND: {
"A": MATCH_SIDE_POSTEXPECT,
"B": MATCH_SIDE_POSTACTUAL,
"shortA": "SN",
"shortB": "IN",
"longA": "Soll-Nachher",
"longB": "Ist-Nachher",
"simorder": SIM_BUSINESS + SIM_TECHNICAL,
"mode": "hard",
"filename": "00_Fachabgleich",
"title": "Fachliche Auswertung (Soll-Nachher - Ist-Nachher)"
@ -89,10 +88,7 @@ MATCH = {
MATCH_SUCCESS: {
"A": MATCH_SIDE_PREACTUAL,
"B": MATCH_SIDE_POSTACTUAL,
"shortA": "IV",
"shortB": "IN",
"longA": "Ist-Vorher",
"longB": "Ist-Nachher",
"simorder": SIM_TECHNICAL + SIM_BUSINESS,
"mode": "action",
"filename": "04_Ablauf",
"title": "Ablauf-Differenz (Ist-Vorher - Ist-Nachher)"
@ -100,10 +96,7 @@ MATCH = {
MATCH_PRESTEP: {
"A": MATCH_SIDE_PRESTEP,
"B": MATCH_SIDE_POSTACTUAL,
"shortA": "VN",
"shortB": "IN",
"longA": "Vor-Schritt",
"longB": "Ist-Nachher",
"simorder": SIM_TECHNICAL + SIM_BUSINESS,
"mode": "action",
"filename": "02_Vorschritt",
"title": "Schritt-Differenz (Vorschritt-Nachher - Ist-Nachher)"
@ -111,10 +104,7 @@ MATCH = {
MATCH_TESTEXAMPLE: {
"A": MATCH_SIDE_TESTCASE,
"B": MATCH_SIDE_POSTACTUAL,
"shortA": "TN",
"shortB": "IN",
"longA": "Vergleich-Soll",
"longB": "Ist-Nachher",
"simorder": SIM_BUSINESS + SIM_TECHNICAL,
"mode": "action",
"filename": "03_Vergleichstestfall",
"title": "Vergleichstestfall (Vergleich-Soll - Ist-Nachher)"

109
utils/match_tool.py

@ -10,11 +10,13 @@ import utils.report_tool
import basic.program
import basic.constants as B
import utils.match_const as M
# ------------------------------------------------------------
"""
"""
class Matching():
def __init__(self, comp):
self.comp = comp
@ -64,14 +66,17 @@ class Matching():
def isHitA(self, key):
return ((key in self.linksA) and (self.linksA[key] != B.SVAL_NULL))
def isHitB(self, key):
return ((key in self.linksB) and (self.linksB[key] != B.SVAL_NULL))
def setHit(self, keyA, keyB):
if (not self.isHitA(keyA)) and (not self.isHitB(keyB)):
if (keyA != B.SVAL_NULL): self.linksA[keyA] = keyB
if (keyB != B.SVAL_NULL): self.linksB[keyB] = keyA
return "OK"
raise Exception("one of the links are set")
def setNohit(self, similarity, keyA, keyB):
""" The similarity must be unique. Only a mismatch will be set. """
if similarity in self.nomatch:
@ -79,6 +84,7 @@ class Matching():
if (self.isHitA(keyA) or self.isHitB(keyB)):
return
self.nomatch[similarity] = [keyA, keyB]
def getTableDdl(self, path):
a = path.split(":")
ddl = self.comp.conf[B.DATA_NODE_DDL]
@ -87,6 +93,7 @@ class Matching():
if (x == B.DATA_NODE_DATA): break
if x in ddl: ddl = ddl[x]
return ddl
def setDiffHeader(matching):
job = basic.program.Job.getInstance()
verify = int(job.getDebugLevel("match_tool")) - 1
@ -98,8 +105,10 @@ class Matching():
htmltxt += "</head>"
htmltxt += "<body>"
htmltxt += "<h1>" + M.MATCH[matching.matchtype]["title"] + "</h1>"
htmltxt += "<h4>"+M.MATCH[M.MATCH[matching.matchtype]["A"]]["long"]+": "+matching.matchfiles["A"]+"</h4>"
htmltxt += "<h4>"+M.MATCH[M.MATCH[matching.matchtype]["B"]]["long"]+": "+matching.matchfiles["B"]+"</h4><br>"
htmltxt += "<h4>" + M.MATCH[M.MATCH[matching.matchtype]["A"]]["long"] + ": " + matching.matchfiles[
"A"] + "</h4>"
htmltxt += "<h4>" + M.MATCH[M.MATCH[matching.matchtype]["B"]]["long"] + ": " + matching.matchfiles[
"B"] + "</h4><br>"
matching.htmltext = htmltxt
def setDiffFooter(self):
@ -110,6 +119,7 @@ class Matching():
htmltext += "</body></html>"
self.htmltext = htmltext
def matchFiles(matching):
"""
@ -118,6 +128,7 @@ def matchFiles(matching):
:return:
"""
def matchBestfit(matching, path):
"""
in this strategy the difference-score of all elements of both sides will be calculated.
@ -154,7 +165,7 @@ def matchBestfit(matching, path):
if (matching.isHitB(composeKey("b", ib))):
ib += 1
continue
similarity=getSimilarity(matching, path, rA, rB, ix)
similarity = getSimilarity(matching, rA, rB, ix, M.MATCH[matching.matchtype]["simorder"])
if (similarity == "MATCH"):
matching.setHit(composeKey("a", ia), composeKey("b", ib))
continue
@ -164,6 +175,7 @@ def matchBestfit(matching, path):
ix += 1
ia += 1
def matchRestfit(matching):
""" """
job = basic.program.Job.getInstance()
@ -182,52 +194,81 @@ def matchRestfit(matching):
print("neues Matching " + pair[0] + " " + pair[1])
matching.setHit(pair[0], pair[1])
def composeKey(side, i):
return side.lower() + str(i + 1).zfill(4)
def extractKeyI(key):
return int(key[1:]) - 1
def getSimilarity(matching, path, rA, rB, i):
""" it calculates the similarity between both rows by:
concat each criteria with single-similarity 00..99 and i with 999..000 """
def setMatchkeys(matching, path):
job = basic.program.Job.getInstance()
verify = int(job.getDebugLevel("match_tool")) - 1
job.debug(verify, "getSimilarity "+path+" "+str(i))
job.debug(verify, "getSimilarity " + path)
if len(matching.matchkeys) > 0:
keys = matching.matchkeys
elif (B.DATA_NODE_DDL in matching.comp.conf):
job.debug(verify, "ddl " + path + " " + str(i))
return
if (B.DATA_NODE_DDL in matching.comp.conf):
job.debug(verify, "ddl " + path)
a = path.split(":")
ddl = matching.comp.conf[B.DATA_NODE_DDL]
for x in a:
if (len(x) < 2): continue
if (x == B.DATA_NODE_DATA): break
if x in ddl: ddl = ddl[x]
job.debug(verify, "ddl " + json.dumps(ddl) + " " + str(i))
if (len(x) < 2):
continue
if (x == B.DATA_NODE_DATA):
break
if x in ddl:
ddl = ddl[x]
job.debug(verify, "ddl " + json.dumps(ddl))
keys = {}
for f in ddl:
job.debug(verify, "ddl " + f + " " + str(i))
job.debug(verify, "ddl " + f)
if ("key" in ddl[f]) and (len(ddl[f]["key"]) > 0):
b = ddl[f]["key"].split(":")
if (len(b)!=2): raise Exception("falsch formatierter Schluessel "+ddl[f]["key"])
if (not b[1].isnumeric()): raise Exception("falsch formatierter Schluessel "+ddl[f]["key"])
k = "k"+b[1].zfill(2)
job.debug(verify, "ddl " + f + " " + str(i))
if (len(b) != 2):
raise Exception("falsch formatierter Schluessel " + ddl[f]["key"])
if (not b[1].isnumeric()):
raise Exception("falsch formatierter Schluessel " + ddl[f]["key"])
k = "k"+b[0]+""+b[1].zfill(2)
job.debug(verify, "ddl " + f)
keys[k] = {"ktyp": b[0], "field": ddl[f]["feld"], "type": ddl[f]["type"], "rule": ddl[f]["acceptance"]}
matching.matchkeys = keys
job.debug(verify, "ddl " + json.dumps(keys) + " " + str(i))
msim=""
topsim=""
job.debug(verify, "ddl " + json.dumps(keys))
def getSimilarity(matching, rA, rB, i, simorder=M.SIM_DEFAULT):
""" it calculates the similarity between both rows by:
concat each criteria with single-similarity 00..99 and i with 999..000 """
job = basic.program.Job.getInstance()
verify = int(job.getDebugLevel("match_tool")) - 1
job.debug(verify, "getSimilarity ")
mBsim = ""
mTsim = ""
topBsim = ""
topTsim = ""
for k in sorted(matching.matchkeys):
msim += getStringSimilarity(str(rA[matching.matchkeys[k]["field"]]), str(rB[matching.matchkeys[k]["field"]]))
topsim += "99"
if msim == topsim:
if M.SIM_TECHNICAL in k:
mTsim += getStringSimilarity(str(rA[matching.matchkeys[k]["field"]]), str(rB[matching.matchkeys[k]["field"]]))
topTsim += "99"
if M.SIM_BUSINESS in k:
mTsim += getStringSimilarity(str(rA[matching.matchkeys[k]["field"]]), str(rB[matching.matchkeys[k]["field"]]))
topTsim += "99"
if mBsim == topBsim and mTsim == topTsim:
job.debug(verify, "Treffer ")
return "MATCH"
else:
job.debug(verify, "nomatch S"+msim+str(i).zfill(3))
return "S"+msim+str(i).zfill(3)
pass
elif simorder[0:1] == M.SIM_TECHNICAL and mTsim == topTsim:
job.debug(verify, "Treffer ")
return "MATCH"
elif simorder[0:1] == M.SIM_BUSINESS and mBsim == topBsim:
job.debug(verify, "Treffer ")
return "MATCH"
elif simorder[0:1] == M.SIM_TECHNICAL:
return "S"+mTsim+mBsim+str(i).zfill(3)
else: # if simorder[0:1] == M.SIM_BUSINESS:
return "S" + mBsim + mTsim + str(i).zfill(3)
def matchTree(matching):
"""
@ -242,6 +283,7 @@ def matchTree(matching):
job.debug(verify, "..>> ende matching " + matching.htmltext)
return matching.htmltext
def matchElement(matching, A, B, path):
""" travers through the datatree """
job = basic.program.Job.getInstance()
@ -256,6 +298,7 @@ def matchElement(matching, A, B, path):
else:
return matching
def getStringSimilarity(strA, strB):
job = basic.program.Job.getInstance()
verify = int(job.getDebugLevel("match_tool")) - 1
@ -266,6 +309,7 @@ def getStringSimilarity(strA, strB):
if (strA.strip().lower() == strB.strip().lower()): return "55"
return "00"
def getEvaluation(matching, type, acceptance, sideA, sideB):
job = basic.program.Job.getInstance()
verify = int(job.getDebugLevel("match_tool")) - 1
@ -285,6 +329,7 @@ def getEvaluation(matching, type, acceptance, sideA, sideB):
classB = "acceptB"
return [result, classA, classB]
def matchDict(matching, A, B, path):
""" travers through the datatree """
job = basic.program.Job.getInstance()
@ -318,6 +363,7 @@ def matchDict(matching, A, B, path):
job.debug(verify, "matchDict 420 ...<<---")
return matching
def matchArray(matching, A, B, path):
""" matches the datarows of the datatree """
job = basic.program.Job.getInstance()
@ -341,6 +387,7 @@ def matchArray(matching, A, B, path):
htmltext += compareRows(matching, path)
matching.htmltext += htmltext
def compareRows(matching, path):
""" traverse through matched rows """
job = basic.program.Job.getInstance()
@ -373,6 +420,7 @@ def compareRows(matching, path):
matching.difftext += "</table>"
return htmltext
def markRow(matching, header, row, side):
job = basic.program.Job.getInstance()
verify = int(job.getDebugLevel("match_tool")) - 4
@ -396,6 +444,7 @@ def markRow(matching, header, row, side):
matching.difftext += text
return text
def compareRow(matching, header, rA, rB):
""" traverse through matched rows """
job = basic.program.Job.getInstance()
@ -452,7 +501,7 @@ def compareRow(matching, header, rA, rB):
matching.difftext += text
return text
# --------------------------------------------------------------------------
def matchLines(matching):
pass

Loading…
Cancel
Save