Ulrich Carmesin
3 years ago
17 changed files with 379 additions and 163 deletions
@ -0,0 +1,151 @@ |
|||
import random |
|||
#from flask import |
|||
# |
|||
class Koeln(): |
|||
# Konfig |
|||
KOELN = { |
|||
"00" : "AI,AY,AU,EI,EY,EU,OI,Ä,Ö,Ü", # vokale |
|||
"40" : "CA,CO,CU", |
|||
"80" : "CE,CI,CY", |
|||
"88" : "SCH,ZC,DC,DS,DZ,TC,TS,TZ", |
|||
"48" : "X", |
|||
"0" : "AA,A,EE,IE,E,I,J,OU,OO,O,U,Y", # vokale |
|||
"1" : "B,P", # labiale |
|||
"2" : "D,T", |
|||
"3" : "PH,F,V,W", # labiale |
|||
"4" : "G,K,Q", |
|||
"5" : "L", |
|||
"6" : "M,N", |
|||
"7" : "R", |
|||
"8" : "S,Z" |
|||
} |
|||
def __init__(self): |
|||
self.init = "ja" |
|||
def getPhonWertMitVokal(self, wort): |
|||
out = "" |
|||
txt = wort.upper() |
|||
while (len(txt) > 0): |
|||
print("while "+txt) |
|||
a = self.getFirstPhonem(txt) |
|||
if (a[0] == "99"): |
|||
txt = txt[1:] |
|||
else: |
|||
out = out+a[0] |
|||
txt = txt[len(a[1]):] |
|||
print("while - " + txt + " "+out) |
|||
print("-------- RETURN - " + txt + " " + out) |
|||
return out |
|||
|
|||
def getPhonWert(self, wort): |
|||
out = self.getPhonWertMitVokal(wort) |
|||
for x in ["11", "22", "33", "44", "55", "66", "77", "88"]: |
|||
out = out.replace(x, x[0:1]) |
|||
out = out.replace("0", "") |
|||
print("-------- RETURN - " + wort + " " + out) |
|||
return out |
|||
|
|||
def getAehnlicherPhon(self, wort): |
|||
out = [wort] |
|||
gen = [""] |
|||
txt = self.getPhonWertMitVokal(wort) |
|||
while (len(txt) > 0): |
|||
for k in ["00", "40", "80", "88"]: |
|||
print ("+++ Umlaute +++") |
|||
if (txt[0:2]==k): |
|||
neu = [] |
|||
chc = [] |
|||
for y in gen: |
|||
c = "XXX" |
|||
for i in range(0, len(self.KOELN[k])/2): |
|||
c = random.choice(self.KOELN[k]) |
|||
while not c in chc: |
|||
print (" choice "+k+" = "+c+" :: "+self.KOELN[k]) |
|||
neu.append(y+c) |
|||
c = random.choice(self.KOELN[k]) |
|||
break |
|||
|
|||
for x in ["11", "22", "33", "44", "55", "66", "77"]: |
|||
if (txt[0:2] == x): |
|||
pass |
|||
def getAehnlicherPhonem(self, txt): |
|||
gen = [""] |
|||
phonems = { |
|||
"umlaut" : ["00", "40", "80", "88"], |
|||
"doppelk" : ["11", "22", "33", "44", "55", "66", "77"], |
|||
"literal" : ["0","1","2","3","4","5","6","7","8"] |
|||
} |
|||
while (len(txt) > 0): |
|||
for p in phonems: |
|||
for k in phonems[p]: |
|||
# print ("+++ Umlaute +++"+txt+"--"+k) |
|||
if (txt[0:len(k)]==k): |
|||
print("+++ Umlaute +++" + txt + "--" + k) |
|||
neu = [] |
|||
chc = [] |
|||
#print("+++ Umlaute +++" + txt + "--" + k) |
|||
c = "XXX" |
|||
for i in range(0, int(len(self.KOELN[k].split(","))/3 +1)): |
|||
status = 1 |
|||
c = random.choice(self.KOELN[k].split(",")) |
|||
while ((status==1) and (not c in chc)): |
|||
#print (str(i)+" choice "+k+" = "+c) |
|||
for y in gen: |
|||
neu.append(y+c) |
|||
chc.append(c) |
|||
c = random.choice(self.KOELN[k].split(",")) |
|||
status=2 |
|||
gen = neu |
|||
print(neu) |
|||
txt=txt[len(k):] |
|||
# txt="" |
|||
print(gen) |
|||
|
|||
def getFirstPhonem(self, txt): |
|||
for k in self.KOELN: |
|||
print(k + " wert " + self.KOELN[k]) |
|||
vv = self.KOELN[k].split(",") |
|||
for x in vv: |
|||
if (txt.find(x) == 0): |
|||
return (k, x); |
|||
return ("99", "H") |
|||
|
|||
# Funktionen |
|||
def generiereAehnlicheWerte(feld, basis): |
|||
print ("generiereAehnlicheWerte("+feld+", "+basis) |
|||
if (k == "FAMNAM"): |
|||
koeln.getPhonWert(basis) |
|||
# Parameter |
|||
koeln = Koeln() |
|||
job = {} |
|||
job["option"] = {} |
|||
job["option"]["regression"] = "g" # kreuz | orthogonal |
|||
job["option"]["clusteranz"] = 2 # |
|||
content = {} |
|||
content["Tabelle"] = {} |
|||
content["Tabelle"]["Person"] = {} |
|||
content["Tabelle"]["Person"]["FAMNAM"] = {} |
|||
content["Tabelle"]["Person"]["FAMNAM"]["option"] = "aehnlich" |
|||
content["Tabelle"]["Person"]["FAMNAM"]["gruppe"] = 1 |
|||
content["Tabelle"]["Person"]["FAMNAM"]["basis"] = ["Müller", "Schmidt"] |
|||
content["Tabelle"]["Person"]["GESCHLECHT"] = {} |
|||
content["Tabelle"]["Person"]["GESCHLECHT"]["option"] = "diff" |
|||
content["Tabelle"]["Person"]["GESCHLECHT"]["gruppe"] = 1 |
|||
content["Tabelle"]["Person"]["VORNAM"] = {} |
|||
content["Tabelle"]["Person"]["VORNAM"]["option"] = "beliebig" |
|||
content["Tabelle"]["Person"]["GEBDAT"] = {} |
|||
content["Tabelle"]["Person"]["GEBDAT"]["option"] = "identisch" |
|||
content["Tabelle"]["Person"]["GEBDAT"]["basis"] = ["19920123", "20210908"] |
|||
|
|||
gruppen = [] |
|||
for i in range(0, job["option"]["clusteranz"]): |
|||
print("index "+str(i)) |
|||
for k in content["Tabelle"]["Person"]: |
|||
print("key "+ k) |
|||
if content["Tabelle"]["Person"][k]["option"] == "aehnlich": |
|||
if (content["Tabelle"]["Person"][k]["basis"]): |
|||
werte = generiereAehnlicheWerte(k, content["Tabelle"]["Person"][k]["basis"][i]) |
|||
|
|||
koeln.getAehnlicherPhonem(("00")) |
|||
koeln.getAehnlicherPhonem(("3")) |
|||
koeln.getAehnlicherPhonem(("8")) |
|||
koeln.getAehnlicherPhonem(("886022")) |
Loading…
Reference in new issue