Ulrich Carmesin
3 years ago
17 changed files with 379 additions and 163 deletions
@ -0,0 +1,151 @@ |
|||||
|
import random |
||||
|
#from flask import |
||||
|
# |
||||
|
class Koeln(): |
||||
|
# Konfig |
||||
|
KOELN = { |
||||
|
"00" : "AI,AY,AU,EI,EY,EU,OI,Ä,Ö,Ü", # vokale |
||||
|
"40" : "CA,CO,CU", |
||||
|
"80" : "CE,CI,CY", |
||||
|
"88" : "SCH,ZC,DC,DS,DZ,TC,TS,TZ", |
||||
|
"48" : "X", |
||||
|
"0" : "AA,A,EE,IE,E,I,J,OU,OO,O,U,Y", # vokale |
||||
|
"1" : "B,P", # labiale |
||||
|
"2" : "D,T", |
||||
|
"3" : "PH,F,V,W", # labiale |
||||
|
"4" : "G,K,Q", |
||||
|
"5" : "L", |
||||
|
"6" : "M,N", |
||||
|
"7" : "R", |
||||
|
"8" : "S,Z" |
||||
|
} |
||||
|
def __init__(self): |
||||
|
self.init = "ja" |
||||
|
def getPhonWertMitVokal(self, wort): |
||||
|
out = "" |
||||
|
txt = wort.upper() |
||||
|
while (len(txt) > 0): |
||||
|
print("while "+txt) |
||||
|
a = self.getFirstPhonem(txt) |
||||
|
if (a[0] == "99"): |
||||
|
txt = txt[1:] |
||||
|
else: |
||||
|
out = out+a[0] |
||||
|
txt = txt[len(a[1]):] |
||||
|
print("while - " + txt + " "+out) |
||||
|
print("-------- RETURN - " + txt + " " + out) |
||||
|
return out |
||||
|
|
||||
|
def getPhonWert(self, wort): |
||||
|
out = self.getPhonWertMitVokal(wort) |
||||
|
for x in ["11", "22", "33", "44", "55", "66", "77", "88"]: |
||||
|
out = out.replace(x, x[0:1]) |
||||
|
out = out.replace("0", "") |
||||
|
print("-------- RETURN - " + wort + " " + out) |
||||
|
return out |
||||
|
|
||||
|
def getAehnlicherPhon(self, wort): |
||||
|
out = [wort] |
||||
|
gen = [""] |
||||
|
txt = self.getPhonWertMitVokal(wort) |
||||
|
while (len(txt) > 0): |
||||
|
for k in ["00", "40", "80", "88"]: |
||||
|
print ("+++ Umlaute +++") |
||||
|
if (txt[0:2]==k): |
||||
|
neu = [] |
||||
|
chc = [] |
||||
|
for y in gen: |
||||
|
c = "XXX" |
||||
|
for i in range(0, len(self.KOELN[k])/2): |
||||
|
c = random.choice(self.KOELN[k]) |
||||
|
while not c in chc: |
||||
|
print (" choice "+k+" = "+c+" :: "+self.KOELN[k]) |
||||
|
neu.append(y+c) |
||||
|
c = random.choice(self.KOELN[k]) |
||||
|
break |
||||
|
|
||||
|
for x in ["11", "22", "33", "44", "55", "66", "77"]: |
||||
|
if (txt[0:2] == x): |
||||
|
pass |
||||
|
def getAehnlicherPhonem(self, txt): |
||||
|
gen = [""] |
||||
|
phonems = { |
||||
|
"umlaut" : ["00", "40", "80", "88"], |
||||
|
"doppelk" : ["11", "22", "33", "44", "55", "66", "77"], |
||||
|
"literal" : ["0","1","2","3","4","5","6","7","8"] |
||||
|
} |
||||
|
while (len(txt) > 0): |
||||
|
for p in phonems: |
||||
|
for k in phonems[p]: |
||||
|
# print ("+++ Umlaute +++"+txt+"--"+k) |
||||
|
if (txt[0:len(k)]==k): |
||||
|
print("+++ Umlaute +++" + txt + "--" + k) |
||||
|
neu = [] |
||||
|
chc = [] |
||||
|
#print("+++ Umlaute +++" + txt + "--" + k) |
||||
|
c = "XXX" |
||||
|
for i in range(0, int(len(self.KOELN[k].split(","))/3 +1)): |
||||
|
status = 1 |
||||
|
c = random.choice(self.KOELN[k].split(",")) |
||||
|
while ((status==1) and (not c in chc)): |
||||
|
#print (str(i)+" choice "+k+" = "+c) |
||||
|
for y in gen: |
||||
|
neu.append(y+c) |
||||
|
chc.append(c) |
||||
|
c = random.choice(self.KOELN[k].split(",")) |
||||
|
status=2 |
||||
|
gen = neu |
||||
|
print(neu) |
||||
|
txt=txt[len(k):] |
||||
|
# txt="" |
||||
|
print(gen) |
||||
|
|
||||
|
def getFirstPhonem(self, txt): |
||||
|
for k in self.KOELN: |
||||
|
print(k + " wert " + self.KOELN[k]) |
||||
|
vv = self.KOELN[k].split(",") |
||||
|
for x in vv: |
||||
|
if (txt.find(x) == 0): |
||||
|
return (k, x); |
||||
|
return ("99", "H") |
||||
|
|
||||
|
# Funktionen |
||||
|
def generiereAehnlicheWerte(feld, basis): |
||||
|
print ("generiereAehnlicheWerte("+feld+", "+basis) |
||||
|
if (k == "FAMNAM"): |
||||
|
koeln.getPhonWert(basis) |
||||
|
# Parameter |
||||
|
koeln = Koeln() |
||||
|
job = {} |
||||
|
job["option"] = {} |
||||
|
job["option"]["regression"] = "g" # kreuz | orthogonal |
||||
|
job["option"]["clusteranz"] = 2 # |
||||
|
content = {} |
||||
|
content["Tabelle"] = {} |
||||
|
content["Tabelle"]["Person"] = {} |
||||
|
content["Tabelle"]["Person"]["FAMNAM"] = {} |
||||
|
content["Tabelle"]["Person"]["FAMNAM"]["option"] = "aehnlich" |
||||
|
content["Tabelle"]["Person"]["FAMNAM"]["gruppe"] = 1 |
||||
|
content["Tabelle"]["Person"]["FAMNAM"]["basis"] = ["Müller", "Schmidt"] |
||||
|
content["Tabelle"]["Person"]["GESCHLECHT"] = {} |
||||
|
content["Tabelle"]["Person"]["GESCHLECHT"]["option"] = "diff" |
||||
|
content["Tabelle"]["Person"]["GESCHLECHT"]["gruppe"] = 1 |
||||
|
content["Tabelle"]["Person"]["VORNAM"] = {} |
||||
|
content["Tabelle"]["Person"]["VORNAM"]["option"] = "beliebig" |
||||
|
content["Tabelle"]["Person"]["GEBDAT"] = {} |
||||
|
content["Tabelle"]["Person"]["GEBDAT"]["option"] = "identisch" |
||||
|
content["Tabelle"]["Person"]["GEBDAT"]["basis"] = ["19920123", "20210908"] |
||||
|
|
||||
|
gruppen = [] |
||||
|
for i in range(0, job["option"]["clusteranz"]): |
||||
|
print("index "+str(i)) |
||||
|
for k in content["Tabelle"]["Person"]: |
||||
|
print("key "+ k) |
||||
|
if content["Tabelle"]["Person"][k]["option"] == "aehnlich": |
||||
|
if (content["Tabelle"]["Person"][k]["basis"]): |
||||
|
werte = generiereAehnlicheWerte(k, content["Tabelle"]["Person"][k]["basis"][i]) |
||||
|
|
||||
|
koeln.getAehnlicherPhonem(("00")) |
||||
|
koeln.getAehnlicherPhonem(("3")) |
||||
|
koeln.getAehnlicherPhonem(("8")) |
||||
|
koeln.getAehnlicherPhonem(("886022")) |
Loading…
Reference in new issue