similitary-order for business or technical match

4 years ago · 1bf757ced4
3 changed files with 144 additions and 104 deletions
--- a/test/test_compare.py
+++ b/test/test_compare.py
@ -111,7 +111,8 @@ class MyTestCase(unittest.TestCase):

    def xtest_similarity(self):
        matching = self.getMatching()
-        utils.match_tool.getSimilarity(matching, ":database:scheme:table:_data",
+        utils.match_tool.setMatchkeys(matching, ":database:scheme:table:_data")
+        utils.match_tool.getSimilarity(matching,
            tdata[M.MATCH_SIDE_PREACTUAL]["data"]["database"]["scheme"]["table"]["_data"][0],
            tdata[M.MATCH_SIDE_POSTACTUAL]["data"]["database"]["scheme"]["table"]["_data"][0], 1)

--- a/utils/match_const.py
+++ b/utils/match_const.py
@ -2,6 +2,11 @@
 """
 constants for used for api-functions
 """
+
+SIM_BUSINESS = "B"
+SIM_TECHNICAL = "T"
+SIM_DEFAULT = "BT"
+
 MATCH_SIDE_PREEXPECT = "preexpect"
 """ it implies the precondition of the expectation """
 MATCH_DICT_PREEXPECT = {
@ -67,10 +72,7 @@ MATCH = {
    MATCH_PRECOND: {
        "A": MATCH_SIDE_PREEXPECT,
        "B": MATCH_SIDE_PREACTUAL,
-        "shortA": "SV",
-        "shortB": "IV",
-        "longA": "Soll-Vorher",
-        "longB": "Ist-Vorher",
+        "simorder": SIM_BUSINESS + SIM_TECHNICAL,
        "mode": "info",
        "filename": "01_Vorbedingungen",
        "title": "Pruefung Vorbedingung (Soll-Vorher - Ist-Vorher)"
@ -78,10 +80,7 @@ MATCH = {
    MATCH_POSTCOND: {
        "A": MATCH_SIDE_POSTEXPECT,
        "B": MATCH_SIDE_POSTACTUAL,
-        "shortA": "SN",
-        "shortB": "IN",
-        "longA": "Soll-Nachher",
-        "longB": "Ist-Nachher",
+        "simorder": SIM_BUSINESS + SIM_TECHNICAL,
        "mode": "hard",
        "filename": "00_Fachabgleich",
        "title": "Fachliche Auswertung (Soll-Nachher - Ist-Nachher)"
@ -89,10 +88,7 @@ MATCH = {
    MATCH_SUCCESS: {
        "A": MATCH_SIDE_PREACTUAL,
        "B": MATCH_SIDE_POSTACTUAL,
-        "shortA": "IV",
-        "shortB": "IN",
-        "longA": "Ist-Vorher",
-        "longB": "Ist-Nachher",
+        "simorder": SIM_TECHNICAL + SIM_BUSINESS,
        "mode": "action",
        "filename": "04_Ablauf",
        "title": "Ablauf-Differenz (Ist-Vorher - Ist-Nachher)"
@ -100,10 +96,7 @@ MATCH = {
    MATCH_PRESTEP: {
        "A": MATCH_SIDE_PRESTEP,
        "B": MATCH_SIDE_POSTACTUAL,
-        "shortA": "VN",
-        "shortB": "IN",
-        "longA": "Vor-Schritt",
-        "longB": "Ist-Nachher",
+        "simorder": SIM_TECHNICAL + SIM_BUSINESS,
        "mode": "action",
        "filename": "02_Vorschritt",
        "title": "Schritt-Differenz (Vorschritt-Nachher - Ist-Nachher)"
@ -111,10 +104,7 @@ MATCH = {
    MATCH_TESTEXAMPLE: {
        "A": MATCH_SIDE_TESTCASE,
        "B": MATCH_SIDE_POSTACTUAL,
-        "shortA": "TN",
-        "shortB": "IN",
-        "longA": "Vergleich-Soll",
-        "longB": "Ist-Nachher",
+        "simorder": SIM_BUSINESS + SIM_TECHNICAL,
        "mode": "action",
        "filename": "03_Vergleichstestfall",
        "title": "Vergleichstestfall (Vergleich-Soll - Ist-Nachher)"
--- a/utils/match_tool.py
+++ b/utils/match_tool.py
@ -10,11 +10,13 @@ import utils.report_tool
 import basic.program
 import basic.constants as B
 import utils.match_const as M
+
 # ------------------------------------------------------------
 """

 """

+
 class Matching():
    def __init__(self, comp):
        self.comp = comp
@ -64,14 +66,17 @@ class Matching():

    def isHitA(self, key):
        return ((key in self.linksA) and (self.linksA[key] != B.SVAL_NULL))
+
    def isHitB(self, key):
        return ((key in self.linksB) and (self.linksB[key] != B.SVAL_NULL))
+
    def setHit(self, keyA, keyB):
        if (not self.isHitA(keyA)) and (not self.isHitB(keyB)):
            if (keyA != B.SVAL_NULL): self.linksA[keyA] = keyB
            if (keyB != B.SVAL_NULL): self.linksB[keyB] = keyA
            return "OK"
        raise Exception("one of the links are set")
+
    def setNohit(self, similarity, keyA, keyB):
        """ The similarity must be unique. Only a mismatch will be set. """
        if similarity in self.nomatch:
@ -79,6 +84,7 @@ class Matching():
        if (self.isHitA(keyA) or self.isHitB(keyB)):
            return
        self.nomatch[similarity] = [keyA, keyB]
+
    def getTableDdl(self, path):
        a = path.split(":")
        ddl = self.comp.conf[B.DATA_NODE_DDL]
@ -87,6 +93,7 @@ class Matching():
            if (x == B.DATA_NODE_DATA): break
            if x in ddl: ddl = ddl[x]
        return ddl
+
    def setDiffHeader(matching):
        job = basic.program.Job.getInstance()
        verify = int(job.getDebugLevel("match_tool")) - 1
@ -98,8 +105,10 @@ class Matching():
        htmltxt += "</head>"
        htmltxt += "<body>"
        htmltxt += "<h1>" + M.MATCH[matching.matchtype]["title"] + "</h1>"
-        htmltxt += "<h4>"+M.MATCH[M.MATCH[matching.matchtype]["A"]]["long"]+": "+matching.matchfiles["A"]+"</h4>"
-        htmltxt += "<h4>"+M.MATCH[M.MATCH[matching.matchtype]["B"]]["long"]+": "+matching.matchfiles["B"]+"</h4><br>"
+        htmltxt += "<h4>" + M.MATCH[M.MATCH[matching.matchtype]["A"]]["long"] + ": " + matching.matchfiles[
+            "A"] + "</h4>"
+        htmltxt += "<h4>" + M.MATCH[M.MATCH[matching.matchtype]["B"]]["long"] + ": " + matching.matchfiles[
+            "B"] + "</h4><br>"
        matching.htmltext = htmltxt

    def setDiffFooter(self):
@ -110,6 +119,7 @@ class Matching():
        htmltext += "</body></html>"
        self.htmltext = htmltext

+
 def matchFiles(matching):
    """

@ -118,6 +128,7 @@ def matchFiles(matching):
    :return:
    """

+
 def matchBestfit(matching, path):
    """
    in this strategy the difference-score of all elements of both sides will be calculated.
@ -154,7 +165,7 @@ def matchBestfit(matching, path):
            if (matching.isHitB(composeKey("b", ib))):
                ib += 1
                continue
-            similarity=getSimilarity(matching, path, rA, rB, ix)
+            similarity = getSimilarity(matching, rA, rB, ix, M.MATCH[matching.matchtype]["simorder"])
            if (similarity == "MATCH"):
                matching.setHit(composeKey("a", ia), composeKey("b", ib))
                continue
@ -164,6 +175,7 @@ def matchBestfit(matching, path):
            ix += 1
        ia += 1

+
 def matchRestfit(matching):
    """ """
    job = basic.program.Job.getInstance()
@ -182,52 +194,81 @@ def matchRestfit(matching):
        print("neues Matching " + pair[0] + " " + pair[1])
        matching.setHit(pair[0], pair[1])

+
 def composeKey(side, i):
    return side.lower() + str(i + 1).zfill(4)
+
+
 def extractKeyI(key):
    return int(key[1:]) - 1

-def getSimilarity(matching, path, rA, rB, i):
-    """ it calculates the similarity between both rows by:
-    concat each criteria with single-similarity 00..99 and i with 999..000 """
+
+def setMatchkeys(matching, path):
    job = basic.program.Job.getInstance()
    verify = int(job.getDebugLevel("match_tool")) - 1
-    job.debug(verify, "getSimilarity "+path+" "+str(i))
+    job.debug(verify, "getSimilarity " + path)
    if len(matching.matchkeys) > 0:
-        keys = matching.matchkeys
-    elif (B.DATA_NODE_DDL in matching.comp.conf):
-        job.debug(verify, "ddl " + path + " " + str(i))
+        return
+    if (B.DATA_NODE_DDL in matching.comp.conf):
+        job.debug(verify, "ddl " + path)
        a = path.split(":")
        ddl = matching.comp.conf[B.DATA_NODE_DDL]
        for x in a:
-            if (len(x) < 2): continue
-            if (x == B.DATA_NODE_DATA): break
-            if x in ddl: ddl = ddl[x]
-        job.debug(verify, "ddl " + json.dumps(ddl) + " " + str(i))
+            if (len(x) < 2):
+                continue
+            if (x == B.DATA_NODE_DATA):
+                break
+            if x in ddl:
+                ddl = ddl[x]
+        job.debug(verify, "ddl " + json.dumps(ddl))
        keys = {}
        for f in ddl:
-            job.debug(verify, "ddl " + f + " " + str(i))
+            job.debug(verify, "ddl " + f)
            if ("key" in ddl[f]) and (len(ddl[f]["key"]) > 0):
                b = ddl[f]["key"].split(":")
-                if (len(b)!=2): raise Exception("falsch formatierter Schluessel "+ddl[f]["key"])
-                if (not b[1].isnumeric()): raise Exception("falsch formatierter Schluessel "+ddl[f]["key"])
-                k = "k"+b[1].zfill(2)
-                job.debug(verify, "ddl " + f + " " + str(i))
+                if (len(b) != 2):
+                    raise Exception("falsch formatierter Schluessel " + ddl[f]["key"])
+                if (not b[1].isnumeric()):
+                    raise Exception("falsch formatierter Schluessel " + ddl[f]["key"])
+                k = "k"+b[0]+""+b[1].zfill(2)
+                job.debug(verify, "ddl " + f)
                keys[k] = {"ktyp": b[0], "field": ddl[f]["feld"], "type": ddl[f]["type"], "rule": ddl[f]["acceptance"]}
        matching.matchkeys = keys
-        job.debug(verify, "ddl " + json.dumps(keys) + " " + str(i))
-    msim=""
-    topsim=""
+        job.debug(verify, "ddl " + json.dumps(keys))
+
+
+def getSimilarity(matching, rA, rB, i, simorder=M.SIM_DEFAULT):
+    """ it calculates the similarity between both rows by:
+    concat each criteria with single-similarity 00..99 and i with 999..000 """
+    job = basic.program.Job.getInstance()
+    verify = int(job.getDebugLevel("match_tool")) - 1
+    job.debug(verify, "getSimilarity ")
+    mBsim = ""
+    mTsim = ""
+    topBsim = ""
+    topTsim = ""
    for k in sorted(matching.matchkeys):
-        msim += getStringSimilarity(str(rA[matching.matchkeys[k]["field"]]), str(rB[matching.matchkeys[k]["field"]]))
-        topsim += "99"
-    if msim == topsim:
+        if M.SIM_TECHNICAL in k:
+            mTsim += getStringSimilarity(str(rA[matching.matchkeys[k]["field"]]), str(rB[matching.matchkeys[k]["field"]]))
+            topTsim += "99"
+        if M.SIM_BUSINESS in k:
+            mTsim += getStringSimilarity(str(rA[matching.matchkeys[k]["field"]]), str(rB[matching.matchkeys[k]["field"]]))
+            topTsim += "99"
+
+    if mBsim == topBsim and mTsim == topTsim:
        job.debug(verify, "Treffer ")
        return "MATCH"
-    else:
-        job.debug(verify, "nomatch S"+msim+str(i).zfill(3))
-        return "S"+msim+str(i).zfill(3)
-    pass
+    elif simorder[0:1] == M.SIM_TECHNICAL and mTsim == topTsim:
+        job.debug(verify, "Treffer ")
+        return "MATCH"
+    elif simorder[0:1] == M.SIM_BUSINESS and mBsim == topBsim:
+        job.debug(verify, "Treffer ")
+        return "MATCH"
+    elif simorder[0:1] == M.SIM_TECHNICAL:
+        return "S"+mTsim+mBsim+str(i).zfill(3)
+    else:            # if simorder[0:1] == M.SIM_BUSINESS:
+        return "S" + mBsim + mTsim + str(i).zfill(3)
+

 def matchTree(matching):
    """
@ -242,6 +283,7 @@ def matchTree(matching):
    job.debug(verify, "..>> ende matching " + matching.htmltext)
    return matching.htmltext

+
 def matchElement(matching, A, B, path):
    """ travers through the datatree """
    job = basic.program.Job.getInstance()
@ -256,6 +298,7 @@ def matchElement(matching, A, B, path):
    else:
        return matching

+
 def getStringSimilarity(strA, strB):
    job = basic.program.Job.getInstance()
    verify = int(job.getDebugLevel("match_tool")) - 1
@ -266,6 +309,7 @@ def getStringSimilarity(strA, strB):
    if (strA.strip().lower() == strB.strip().lower()): return "55"
    return "00"

+
 def getEvaluation(matching, type, acceptance, sideA, sideB):
    job = basic.program.Job.getInstance()
    verify = int(job.getDebugLevel("match_tool")) - 1
@ -285,6 +329,7 @@ def getEvaluation(matching, type, acceptance, sideA, sideB):
        classB = "acceptB"
    return [result, classA, classB]

+
 def matchDict(matching, A, B, path):
    """ travers through the datatree """
    job = basic.program.Job.getInstance()
@ -318,6 +363,7 @@ def matchDict(matching, A, B, path):
    job.debug(verify, "matchDict 420 ...<<---")
    return matching

+
 def matchArray(matching, A, B, path):
    """ matches the datarows of the datatree """
    job = basic.program.Job.getInstance()
@ -341,6 +387,7 @@ def matchArray(matching, A, B, path):
    htmltext += compareRows(matching, path)
    matching.htmltext += htmltext

+
 def compareRows(matching, path):
    """ traverse through matched rows """
    job = basic.program.Job.getInstance()
@ -373,6 +420,7 @@ def compareRows(matching, path):
    matching.difftext += "</table>"
    return htmltext

+
 def markRow(matching, header, row, side):
    job = basic.program.Job.getInstance()
    verify = int(job.getDebugLevel("match_tool")) - 4
@ -396,6 +444,7 @@ def markRow(matching, header, row, side):
    matching.difftext += text
    return text

+
 def compareRow(matching, header, rA, rB):
    """ traverse through matched rows """
    job = basic.program.Job.getInstance()
@ -452,7 +501,7 @@ def compareRow(matching, header, rA, rB):
    matching.difftext += text
    return text

+
 # --------------------------------------------------------------------------
 def matchLines(matching):
    pass
-