Ulrich
2 years ago
14 changed files with 344 additions and 389 deletions
@ -0,0 +1,281 @@ |
|||
#!/usr/bin/python |
|||
# -*- coding: utf-8 -*- |
|||
# --------------------------------------------------------------------------------------------------------- |
|||
# Author : Ulrich Carmesin |
|||
# Source : gitea.ucarmesin.de |
|||
# --------------------------------------------------------------------------------------------------------- |
|||
# managing the components |
|||
# ----------------------------------------------------------------------------- |
|||
""" |
|||
component has to be created in relation of the application (basis.yml). |
|||
Each componente could be created mostly once, but not everytime: |
|||
* the same instance of a component is used in different contexts |
|||
* there could be exist more instances |
|||
* there could be alternatives of an instance |
|||
Each kind of instance has its component-class and for each use should be an object be created. |
|||
Each crated component-onject are documented in the parameter-file. |
|||
""" |
|||
import tools.config_tool |
|||
import tools.conn_tool |
|||
import basic.program |
|||
import basic.message |
|||
import basic.component |
|||
import importlib |
|||
import copy |
|||
import basic.constants as B |
|||
import tools.data_const as D |
|||
|
|||
comps = {} |
|||
PARAM_NOSUBNODE = [B.SUBJECT_ARTS, "components", "instance"] |
|||
DEFAULT_INST_CNT = 1 |
|||
DEFAULT_INST_SGL = "y" |
|||
|
|||
|
|||
def getInstanceAttributes(conf): |
|||
""" |
|||
the attributes for instancing the component are set from configuration or from default |
|||
:param conf: |
|||
:return: a complete set of these attributes |
|||
""" |
|||
out = { |
|||
B.ATTR_INST_CNT: DEFAULT_INST_CNT, |
|||
B.ATTR_INST_SGL: DEFAULT_INST_SGL |
|||
} |
|||
if B.SUBJECT_INST in conf: |
|||
for attr in [B.ATTR_INST_CNT, B.ATTR_INST_SGL]: |
|||
if attr in conf[B.SUBJECT_INST]: |
|||
out[attr] = conf[B.SUBJECT_INST][attr] |
|||
return out |
|||
|
|||
|
|||
def getComponents(job, mainfct): |
|||
#job = basic.program.Job.getInstance() |
|||
verify = -2 + job.getDebugLevel("job_tool") |
|||
job.debug(verify, "getComponents " + mainfct) |
|||
out = [] |
|||
for c in comps: |
|||
job.debug(verify, "getComponents " + c + ": " + str(comps[c].conf)) |
|||
print("getComponents " + c + ": " + str(comps[c].conf)) |
|||
if mainfct in comps[c].conf["function"]: |
|||
out.append(c) |
|||
return out |
|||
|
|||
|
|||
class ComponentManager: |
|||
__instance = None |
|||
__instances = {} |
|||
""" |
|||
initializes the Manager with all necessary components |
|||
""" |
|||
|
|||
def __init__(self, job, option=""): |
|||
#job = basic.program.Job.getInstance() |
|||
job.m.logDebug("applicationscomponente -- " + str(type(job.par))) |
|||
self.components = {} |
|||
self.comps = {} |
|||
self.job = job |
|||
ComponentManager.__instances[job.jobid] = self |
|||
ComponentManager.__instance = self |
|||
print ("init ComponentHandling "+str(self)) |
|||
|
|||
|
|||
def initComponents(self): |
|||
# sets components the first time |
|||
# afterwards set components from parameter-file |
|||
job = self.job # basic.program.Job.getInstance() |
|||
anw = job.par.application |
|||
job.m.logDebug("applicationscomponente -- " + str(type(job.par))) |
|||
if not job.conf[B.SUBJECT_APPS].get(anw): |
|||
job.m.setFatal("application " + job.par.application + " is not configured") |
|||
return |
|||
for k in job.conf[B.SUBJECT_APPS].get(anw): |
|||
if k == B.ATTR_APPS_PROJECT: |
|||
continue |
|||
job.m.logDebug("applicationscomponente -- " + k + ":") |
|||
print("applicationscomponente -- " + k + ":") |
|||
self.createComponent(k, 0, "") |
|||
|
|||
|
|||
def getComponent(self, compobjname): |
|||
job = self.job #basic.program.Job.getInstance() |
|||
verify = -2 + job.getDebugLevel("job_tool") |
|||
job.debug(verify, "getComponents " + compobjname) |
|||
if compobjname in self.comps: |
|||
return self.comps[compobjname] |
|||
return None |
|||
|
|||
|
|||
def getComponents(self, mainfct): |
|||
job = self.job #basic.program.Job.getInstance() |
|||
verify = -2 + job.getDebugLevel("job_tool") |
|||
job.debug(verify, "getComponents " + mainfct) |
|||
out = [] |
|||
for c in self.comps: |
|||
job.debug(verify, "getComponents " + c + ": " + str(self.comps[c].conf)) |
|||
#print("getComponents " + c + ": " + str(self.comps[c].conf)) |
|||
if mainfct in self.comps[c].conf["function"]: |
|||
out.append(c) |
|||
return out |
|||
|
|||
|
|||
@staticmethod |
|||
def getInstance(job, init="N"): |
|||
if (job.jobid in ComponentManager.__instances): |
|||
return ComponentManager.__instances[job.jobid] |
|||
else: |
|||
return ComponentManager(job) |
|||
|
|||
|
|||
def createComponent(self, componentName, nr, suffix): |
|||
""" |
|||
in order to create a component it must be loaded |
|||
* knogwedge of the application - which components should be created |
|||
* technical-knowledge of the instanciated component, especially the connection, user, password |
|||
* business-knowledge of the component, especially of their interfaces resp. artifacts |
|||
:param componentName: Name of the component |
|||
:param nr: for numbered instance if component is multiple |
|||
:param suffix: suffix for specific context of the component |
|||
:return: |
|||
""" |
|||
job = self.job #basic.program.Job.getInstance() |
|||
verify = job.getDebugLevel("job_tool") |
|||
componentName = componentName.lower() |
|||
job.debug(verify, "createComponent " + componentName) |
|||
confs = tools.config_tool.getConfig(job, "comp", componentName) |
|||
conns = tools.conn_tool.getConnections(job, componentName) |
|||
instAttr = getInstanceAttributes(confs) |
|||
job.debug(verify, "createComponent -91- " + componentName + " : " + str(confs)) |
|||
if nr > 0 and int(instAttr[B.ATTR_INST_CNT]) > 1: |
|||
job.m.setError("for multiple callers are multiple calls not implemented ") |
|||
if nr > 0 and len(conns) == 0: |
|||
job.m.setError("for multiple calls has only one call configured") |
|||
#print(confs) |
|||
parContent = job.loadParameter() |
|||
if len(conns) == 1: |
|||
c = self.createInstance(componentName, parContent, confs, conns, 0) |
|||
#print("createComponent 3 a " + componentName) |
|||
self.createSubComponents(c, nr, suffix) |
|||
else: |
|||
i = 1 |
|||
#print("createComponent 3 b " + componentName) |
|||
for cn in conns: |
|||
c = self.createInstance(componentName, parContent, confs, conns, i) |
|||
self.createSubComponents(c, i, suffix) |
|||
i = i + 1 |
|||
#print("createComponent 9 " + componentName) |
|||
#print(self.comps) |
|||
|
|||
|
|||
def createInstance(self, compName, parContent, confs, conns, nr): |
|||
""" |
|||
instance a component |
|||
:param compName: name without suffix or number |
|||
:param parContent: content of the parameter-file which is dumped from a pre-step |
|||
:param confs: configuration of the component |
|||
:param conns: connection-attributes for the specific environment |
|||
:param nr: number if component is multiple |
|||
:return: instance of the component with all necessary attributes |
|||
""" |
|||
job = self.job #basic.program.Job.getInstance() |
|||
cmodul = importlib.import_module(getComponentPath(compName)) |
|||
class_ = getattr(cmodul, getComponentClass(compName)) |
|||
c = class_() |
|||
if nr > 0: |
|||
name = compName + "_0" + str(nr) |
|||
i = nr - 1 |
|||
else: |
|||
name = compName |
|||
i = 0 |
|||
c.name = name |
|||
c.classname = compName |
|||
|
|||
c.m = basic.message.Message(job, basic.message.LIMIT_DEBUG, job.start, c.name) |
|||
c.conf = tools.config_tool.mergeConn(c.m, confs["conf"], conns[i]) |
|||
c.conf[B.SUBJECT_CONN] = conns[i] |
|||
c.init(job) |
|||
if parContent is not None: |
|||
print("createComponent 5 a " + compName + " : " + str(parContent)) |
|||
if B.SUBJECT_COMPS in parContent and compName in parContent[B.SUBJECT_COMPS]: |
|||
for k in parContent[B.SUBJECT_COMPS][compName].keys(): |
|||
c.conf[k] = parContent[B.SUBJECT_COMPS][compName][k] |
|||
if B.SUBJECT_ARTS in c.conf and B.TOPIC_NODE_DB in c.conf[B.SUBJECT_ARTS]: |
|||
if not B.DATA_NODE_DDL in c.conf: |
|||
c.conf[B.DATA_NODE_DDL] = {} |
|||
for table in c.conf[B.SUBJECT_ARTS][B.TOPIC_NODE_DB]: |
|||
if table in B.LIST_DB_ATTR: |
|||
continue |
|||
conf = tools.config_tool.getConfig(job, D.DDL_FILENAME, compName, table) |
|||
if B.DATA_NODE_TABLES in conf and table in conf[B.DATA_NODE_TABLES]: |
|||
c.conf[B.DATA_NODE_DDL][table] = conf[B.DATA_NODE_TABLES][table] |
|||
elif table in conf: |
|||
c.conf[B.DATA_NODE_DDL][table] = conf[table] |
|||
else: |
|||
c.conf[B.DATA_NODE_DDL][table] = conf |
|||
comps[name] = c |
|||
self.comps[name] = c |
|||
return c |
|||
|
|||
|
|||
def createSubComponents(self, comp, nr, suffix): |
|||
job = self.job #basic.program.Job.getInstance() |
|||
verify = -2 + job.getDebugLevel("job_tool") |
|||
job.debug(verify, "getComponents " + str(comp.conf[B.ATTR_INST_SUBCOMP])) |
|||
for c in comp.conf[B.ATTR_INST_SUBCOMP].keys(): |
|||
if c == "none": |
|||
continue |
|||
self.createComponent(c, nr, suffix) |
|||
|
|||
|
|||
def getComponentDict(self): |
|||
job = self.job #basic.program.Job.getInstance() |
|||
verify = -2 + job.getDebugLevel("job_tool") |
|||
job.debug(verify, "getComponents ") |
|||
out = {} |
|||
for c in self.comps: |
|||
out[self.comps[c].name] = {} |
|||
for k in self.comps[c].conf.keys(): |
|||
if isParameterSubnode(k): # B.SUBJECT_ARTS in k or "components" in k or "instance" in k: |
|||
out[self.comps[c].name][k] = copy.deepcopy(self.comps[c].conf[k]) |
|||
return out |
|||
|
|||
|
|||
def getComponentFolder(comp): |
|||
return comp.lower() |
|||
|
|||
|
|||
def getComponentModul(comp): |
|||
return comp[0:1].upper() + comp[1:].lower() |
|||
|
|||
|
|||
def getComponentClass(comp): |
|||
return comp[0:1].upper() + comp[1:].lower() |
|||
|
|||
|
|||
def getComponentPath(comp): |
|||
return "components." + getComponentFolder(comp) + "." + getComponentModul(comp) |
|||
|
|||
|
|||
def getComponentDict(job = None): |
|||
#job = basic.program.Job.getInstance() |
|||
#verify = -2 + job.getDebugLevel("job_tool") |
|||
#job.debug(verify, "getComponents ") |
|||
out = {} |
|||
for c in comps: |
|||
out[comps[c].name] = {} |
|||
for k in comps[c].conf.keys(): |
|||
if isParameterSubnode(k): # B.SUBJECT_ARTS in k or "components" in k or "instance" in k: |
|||
out[comps[c].name][k] = copy.deepcopy(comps[c].conf[k]) |
|||
return out |
|||
|
|||
|
|||
def isParameterSubnode(key): |
|||
for k in PARAM_NOSUBNODE: |
|||
if key in k: |
|||
return False |
|||
return True |
|||
|
|||
|
|||
def getPlainCompname(name): |
|||
if "_0" in name: |
|||
return name[0:-3] |
|||
return name |
@ -1,100 +0,0 @@ |
|||
#!/usr/bin/python |
|||
# -*- coding: utf-8 -*- |
|||
# --------------------------------------------------------------------------------------------------------- |
|||
# Author : Ulrich Carmesin |
|||
# Source : gitea.ucarmesin.de |
|||
# --------------------------------------------------------------------------------------------------------- |
|||
""" |
|||
This class is a technical implementation for Hive-connection with spark - typically used in a |
|||
Machine Learning environment for example in hadoop |
|||
""" |
|||
import json |
|||
|
|||
import basic.program |
|||
import utils.config_tool |
|||
import utils.db_abstract |
|||
import pyspark |
|||
import basic.constants as B |
|||
|
|||
class DbFcts(utils.db_abstract.DbFcts): |
|||
""" |
|||
This interface defines each necessary connection to any kind of database. |
|||
The specific technique how to connect to the concrete DBMS has to be implemented in the specific tool. |
|||
""" |
|||
def __init__(self): |
|||
pass |
|||
|
|||
|
|||
def selectRows(self, table, job): |
|||
""" method to select rows from a database |
|||
statement written in sql """ |
|||
tdata = {} |
|||
dry = 0 |
|||
# attr = self.getDbAttributes(table) |
|||
verify = -1+job.getDebugLevel("db_tool") |
|||
pattern = "s3a://{hostname}/data/{tenant}/mt/sandboxes/{job.par.usecae}/{job.par.workspace}/{outfile}/VR_+reg+/" |
|||
files = self.comp.composeFileClauses(job, pattern) |
|||
data = [] |
|||
for k in files.keys(): |
|||
sql = files[k] |
|||
if dry == 1: |
|||
spark = self.getConnector() |
|||
df = spark.read.parquet(sql) |
|||
dfj = df.toJSON() |
|||
for r in dfj.collect(): |
|||
data.append(json.loads(r)) |
|||
else: |
|||
print("select "+sql) |
|||
#self.comp.m.logInfo(cmd) |
|||
#tdata[B.DATA_NODE_HEADER] = self.comp.conf[B.DATA_NODE_DDL][table][B.DATA_NODE_HEADER] |
|||
#tdata[B.DATA_NODE_DATA] = data |
|||
return tdata |
|||
|
|||
def deleteRows(self, table, job): |
|||
""" method to delete rows from a database |
|||
statement written in sql """ |
|||
dry = 0 |
|||
verify = -1+job.getDebugLevel("db_tool") |
|||
cmd = "DELETE FROM "+table |
|||
print("deleteRows "+cmd) |
|||
sqls = self.comp.composeSqlClauses(job, cmd) |
|||
print("deleteRows "+cmd) |
|||
print(sqls) |
|||
for k in sqls.keys(): |
|||
sql = sqls[k] |
|||
if dry == 1: |
|||
#spark = self.getConnector() |
|||
#df = spark.sql(cmd) |
|||
pass |
|||
else: |
|||
print("select "+sql) |
|||
#self.comp.m.logInfo(cmd) |
|||
|
|||
def insertRows(self, table, rows, job): |
|||
""" method to insert rows into a database |
|||
the rows will be interpreted by the ddl of the component |
|||
""" |
|||
job = self.job # basic.program.Job.getInstance() |
|||
verify = -1+job.getDebugLevel("db_tool") |
|||
spark = self.getConnector() |
|||
df = spark.createDataFrame(rows) |
|||
|
|||
self.comp.m.logInfo("cmd") |
|||
|
|||
def getConnector(self): |
|||
""" add-on-method to get the connector |
|||
this method should only called by the class itself """ |
|||
job = self.job # basic.program.Job.getInstance() |
|||
attr = self.getDbAttributes(B.SVAL_NULL) |
|||
spark = None |
|||
if B.ATTR_DB_CONN_JAR in attr: |
|||
spark = pyspark.SparkSession\ |
|||
.builder\ |
|||
.appName("datest")\ |
|||
.config("sparkjar", "")\ |
|||
.getOrCreate() |
|||
return spark |
|||
|
|||
|
|||
|
|||
|
@ -1,106 +0,0 @@ |
|||
#!/usr/bin/python |
|||
# -*- coding: utf-8 -*- |
|||
# --------------------------------------------------------------------------------------------------------- |
|||
# Author : Ulrich Carmesin |
|||
# Source : gitea.ucarmesin.de |
|||
# --------------------------------------------------------------------------------------------------------- |
|||
""" |
|||
This class is a technical implementation for Hive-connection with spark - typically used in a |
|||
Machine Learning environment for example in hadoop |
|||
""" |
|||
import json |
|||
import os |
|||
|
|||
import basic.program |
|||
import utils.config_tool |
|||
import utils.db_abstract |
|||
import pyspark |
|||
import basic.constants as B |
|||
|
|||
class DbFcts(utils.db_abstract.DbFcts): |
|||
""" |
|||
This interface defines each necessary connection to any kind of database. |
|||
The specific technique how to connect to the concrete DBMS has to be implemented in the specific tool. |
|||
""" |
|||
def __init__(self): |
|||
pass |
|||
|
|||
|
|||
def selectRows(self, table, job): |
|||
""" method to select rows from a database |
|||
statement written in sql """ |
|||
tdata = {} |
|||
dry = 0 |
|||
verify = -1+job.getDebugLevel("db_tool") |
|||
attr = self.getDbAttributes(B.SVAL_NULL) |
|||
cmd = "SELECT "+",".join(self.comp.conf[B.DATA_NODE_DDL][table][B.DATA_NODE_HEADER]) |
|||
cmd += " FROM "+table |
|||
sqls = self.comp.composeSqlClauses(job, cmd) |
|||
data = [] |
|||
for k in sqls.keys(): |
|||
sql = sqls[k] |
|||
if dry == 1: |
|||
try: |
|||
spark = self.getConnector() |
|||
df = spark.sql(sql) |
|||
dfj = df.toJSON() |
|||
for r in dfj.collect(): |
|||
data.append(json.loads(r)) |
|||
except: |
|||
self.comp.m.setError("Table couldnt read "+table) |
|||
else: |
|||
print("select "+sql) |
|||
self.comp.m.logInfo(sql) |
|||
tdata[B.DATA_NODE_HEADER] = self.comp.conf[B.DATA_NODE_DDL][table][B.DATA_NODE_HEADER] |
|||
tdata[B.DATA_NODE_DATA] = data |
|||
return tdata |
|||
|
|||
def deleteRows(self, table, job): |
|||
""" method to delete rows from a database |
|||
statement written in sql """ |
|||
dry = 0 |
|||
verify = -1+job.getDebugLevel("db_tool") |
|||
cmd = "DELETE FROM "+table |
|||
print("deleteRows "+cmd) |
|||
sqls = self.comp.composeSqlClauses(job, cmd) |
|||
print("deleteRows "+cmd) |
|||
print(sqls) |
|||
for k in sqls.keys(): |
|||
sql = sqls[k] |
|||
if dry == 1: |
|||
#spark = self.getConnector() |
|||
#df = spark.sql(cmd) |
|||
pass |
|||
else: |
|||
print("select "+sql) |
|||
#self.comp.m.logInfo(cmd) |
|||
|
|||
def insertRows(self, table, rows, job): |
|||
""" method to insert rows into a database |
|||
the rows will be interpreted by the ddl of the component |
|||
""" |
|||
job = self.job # basic.program.Job.getInstance() |
|||
verify = -1+job.getDebugLevel("db_tool") |
|||
spark = self.getConnector() |
|||
df = spark.createDataFrame(rows) |
|||
|
|||
self.comp.m.logInfo("cmd") |
|||
|
|||
def getConnector(self): |
|||
""" add-on-method to get the connector |
|||
this method should only called by the class itself """ |
|||
job = self.job # basic.program.Job.getInstance() |
|||
attr = self.getDbAttributes(B.SVAL_NULL) |
|||
spark = None |
|||
if B.ATTR_DB_CONN_JAR in attr: |
|||
connectorJar = os.environ.get(attr[B.ATTR_DB_CONN_JAR]) |
|||
spark = pyspark.SparkSession\ |
|||
.builder\ |
|||
.appName("datest")\ |
|||
.config("spark.jars", f"{connectorJar}")\ |
|||
.getOrCreate() |
|||
return spark |
|||
|
|||
|
|||
|
|||
|
@ -1,73 +0,0 @@ |
|||
#!/usr/bin/python |
|||
# -*- coding: utf-8 -*- |
|||
# --------------------------------------------------------------------------------------------------------- |
|||
# Author : Ulrich Carmesin |
|||
# Source : gitea.ucarmesin.de |
|||
# --------------------------------------------------------------------------------------------------------- |
|||
import basic.program |
|||
import utils.config_tool |
|||
import utils.db_abstract |
|||
import pyspark |
|||
import basic.constants as B |
|||
|
|||
class DbFcts(utils.db_abstract.DbFcts): |
|||
""" |
|||
This interface defines each necessary connection to any kind of database. |
|||
The specific technique how to connect to the concrete DBMS has to be implemented in the specific tool. |
|||
""" |
|||
def __init__(self): |
|||
pass |
|||
|
|||
def getDbAttributes(self): |
|||
out = {} |
|||
|
|||
return out |
|||
|
|||
def selectRows(self, table, job): |
|||
""" method to select rows from a database |
|||
statement written in sql """ |
|||
tdata = {} |
|||
verify = -1+job.getDebugLevel("db_tool") |
|||
cmd = "SELECT "+",".join(self.comp.conf[B.DATA_NODE_DDL][table][B.DATA_NODE_HEADER]) |
|||
cmd += " FROM "+table+""+self.getWhere()+""+self.getOrder() |
|||
spark = self.getConnector() |
|||
df = spark.sql(cmd) |
|||
data = [] |
|||
for r in df: |
|||
data.append(r) |
|||
tdata[B.DATA_NODE_HEADER] = self.comp.conf[B.DATA_NODE_DDL][table][B.DATA_NODE_HEADER] |
|||
tdata[B.DATA_NODE_DATA] = data |
|||
return tdata |
|||
|
|||
def deleteRows(self, table): |
|||
""" method to delete rows from a database |
|||
statement written in sql """ |
|||
job = self.job # basic.program.Job.getInstance() |
|||
verify = -1+job.getDebugLevel("db_tool") |
|||
cmd = "DELETE FROM "+table+";" |
|||
self.comp.m.logInfo(cmd) |
|||
|
|||
def insertRows(self, table, rows): |
|||
""" method to insert rows into a database |
|||
the rows will be interpreted by the ddl of the component |
|||
""" |
|||
job = self.job # basic.program.Job.getInstance() |
|||
verify = -1+job.getDebugLevel("db_tool") |
|||
spark = self.getConnector() |
|||
df = spark.createDataFrame(rows) |
|||
|
|||
self.comp.m.logInfo("cmd") |
|||
|
|||
def getConnector(self): |
|||
""" add-on-method to get the connector |
|||
this method should only called by the class itself """ |
|||
job = self.job # basic.program.Job.getInstance() |
|||
spark = pyspark.SparkSession\ |
|||
.builder\ |
|||
.appName("datest")\ |
|||
.getOrCreate() |
|||
return spark |
|||
|
|||
|
|||
|
|||
|
Loading…
Reference in new issue