Browse Source

xample job unit-tester

refactor
Ulrich 2 years ago
parent
commit
3f260bf6e5
  1. 281
      basic/componentHandling.py
  2. 83
      basic/program.py
  3. 6
      test/test_18i18n.py
  4. 6
      test/test_90testserver.py
  5. 3
      tools/config_tool.py
  6. 14
      tools/conn_tool.py
  7. 6
      tools/file_tool.py
  8. 11
      tools/filecsv_fcts.py
  9. 5
      tools/path_tool.py
  10. 8
      tools/value_tool.py
  11. 31
      unit_tester.py
  12. 100
      utils/dbsfile_tool.py
  13. 106
      utils/dbshive_tool.py
  14. 73
      utils/dbspark_tool.py

281
basic/componentHandling.py

@ -0,0 +1,281 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# ---------------------------------------------------------------------------------------------------------
# Author : Ulrich Carmesin
# Source : gitea.ucarmesin.de
# ---------------------------------------------------------------------------------------------------------
# managing the components
# -----------------------------------------------------------------------------
"""
component has to be created in relation of the application (basis.yml).
Each componente could be created mostly once, but not everytime:
* the same instance of a component is used in different contexts
* there could be exist more instances
* there could be alternatives of an instance
Each kind of instance has its component-class and for each use should be an object be created.
Each crated component-onject are documented in the parameter-file.
"""
import tools.config_tool
import tools.conn_tool
import basic.program
import basic.message
import basic.component
import importlib
import copy
import basic.constants as B
import tools.data_const as D
comps = {}
PARAM_NOSUBNODE = [B.SUBJECT_ARTS, "components", "instance"]
DEFAULT_INST_CNT = 1
DEFAULT_INST_SGL = "y"
def getInstanceAttributes(conf):
"""
the attributes for instancing the component are set from configuration or from default
:param conf:
:return: a complete set of these attributes
"""
out = {
B.ATTR_INST_CNT: DEFAULT_INST_CNT,
B.ATTR_INST_SGL: DEFAULT_INST_SGL
}
if B.SUBJECT_INST in conf:
for attr in [B.ATTR_INST_CNT, B.ATTR_INST_SGL]:
if attr in conf[B.SUBJECT_INST]:
out[attr] = conf[B.SUBJECT_INST][attr]
return out
def getComponents(job, mainfct):
#job = basic.program.Job.getInstance()
verify = -2 + job.getDebugLevel("job_tool")
job.debug(verify, "getComponents " + mainfct)
out = []
for c in comps:
job.debug(verify, "getComponents " + c + ": " + str(comps[c].conf))
print("getComponents " + c + ": " + str(comps[c].conf))
if mainfct in comps[c].conf["function"]:
out.append(c)
return out
class ComponentManager:
__instance = None
__instances = {}
"""
initializes the Manager with all necessary components
"""
def __init__(self, job, option=""):
#job = basic.program.Job.getInstance()
job.m.logDebug("applicationscomponente -- " + str(type(job.par)))
self.components = {}
self.comps = {}
self.job = job
ComponentManager.__instances[job.jobid] = self
ComponentManager.__instance = self
print ("init ComponentHandling "+str(self))
def initComponents(self):
# sets components the first time
# afterwards set components from parameter-file
job = self.job # basic.program.Job.getInstance()
anw = job.par.application
job.m.logDebug("applicationscomponente -- " + str(type(job.par)))
if not job.conf[B.SUBJECT_APPS].get(anw):
job.m.setFatal("application " + job.par.application + " is not configured")
return
for k in job.conf[B.SUBJECT_APPS].get(anw):
if k == B.ATTR_APPS_PROJECT:
continue
job.m.logDebug("applicationscomponente -- " + k + ":")
print("applicationscomponente -- " + k + ":")
self.createComponent(k, 0, "")
def getComponent(self, compobjname):
job = self.job #basic.program.Job.getInstance()
verify = -2 + job.getDebugLevel("job_tool")
job.debug(verify, "getComponents " + compobjname)
if compobjname in self.comps:
return self.comps[compobjname]
return None
def getComponents(self, mainfct):
job = self.job #basic.program.Job.getInstance()
verify = -2 + job.getDebugLevel("job_tool")
job.debug(verify, "getComponents " + mainfct)
out = []
for c in self.comps:
job.debug(verify, "getComponents " + c + ": " + str(self.comps[c].conf))
#print("getComponents " + c + ": " + str(self.comps[c].conf))
if mainfct in self.comps[c].conf["function"]:
out.append(c)
return out
@staticmethod
def getInstance(job, init="N"):
if (job.jobid in ComponentManager.__instances):
return ComponentManager.__instances[job.jobid]
else:
return ComponentManager(job)
def createComponent(self, componentName, nr, suffix):
"""
in order to create a component it must be loaded
* knogwedge of the application - which components should be created
* technical-knowledge of the instanciated component, especially the connection, user, password
* business-knowledge of the component, especially of their interfaces resp. artifacts
:param componentName: Name of the component
:param nr: for numbered instance if component is multiple
:param suffix: suffix for specific context of the component
:return:
"""
job = self.job #basic.program.Job.getInstance()
verify = job.getDebugLevel("job_tool")
componentName = componentName.lower()
job.debug(verify, "createComponent " + componentName)
confs = tools.config_tool.getConfig(job, "comp", componentName)
conns = tools.conn_tool.getConnections(job, componentName)
instAttr = getInstanceAttributes(confs)
job.debug(verify, "createComponent -91- " + componentName + " : " + str(confs))
if nr > 0 and int(instAttr[B.ATTR_INST_CNT]) > 1:
job.m.setError("for multiple callers are multiple calls not implemented ")
if nr > 0 and len(conns) == 0:
job.m.setError("for multiple calls has only one call configured")
#print(confs)
parContent = job.loadParameter()
if len(conns) == 1:
c = self.createInstance(componentName, parContent, confs, conns, 0)
#print("createComponent 3 a " + componentName)
self.createSubComponents(c, nr, suffix)
else:
i = 1
#print("createComponent 3 b " + componentName)
for cn in conns:
c = self.createInstance(componentName, parContent, confs, conns, i)
self.createSubComponents(c, i, suffix)
i = i + 1
#print("createComponent 9 " + componentName)
#print(self.comps)
def createInstance(self, compName, parContent, confs, conns, nr):
"""
instance a component
:param compName: name without suffix or number
:param parContent: content of the parameter-file which is dumped from a pre-step
:param confs: configuration of the component
:param conns: connection-attributes for the specific environment
:param nr: number if component is multiple
:return: instance of the component with all necessary attributes
"""
job = self.job #basic.program.Job.getInstance()
cmodul = importlib.import_module(getComponentPath(compName))
class_ = getattr(cmodul, getComponentClass(compName))
c = class_()
if nr > 0:
name = compName + "_0" + str(nr)
i = nr - 1
else:
name = compName
i = 0
c.name = name
c.classname = compName
c.m = basic.message.Message(job, basic.message.LIMIT_DEBUG, job.start, c.name)
c.conf = tools.config_tool.mergeConn(c.m, confs["conf"], conns[i])
c.conf[B.SUBJECT_CONN] = conns[i]
c.init(job)
if parContent is not None:
print("createComponent 5 a " + compName + " : " + str(parContent))
if B.SUBJECT_COMPS in parContent and compName in parContent[B.SUBJECT_COMPS]:
for k in parContent[B.SUBJECT_COMPS][compName].keys():
c.conf[k] = parContent[B.SUBJECT_COMPS][compName][k]
if B.SUBJECT_ARTS in c.conf and B.TOPIC_NODE_DB in c.conf[B.SUBJECT_ARTS]:
if not B.DATA_NODE_DDL in c.conf:
c.conf[B.DATA_NODE_DDL] = {}
for table in c.conf[B.SUBJECT_ARTS][B.TOPIC_NODE_DB]:
if table in B.LIST_DB_ATTR:
continue
conf = tools.config_tool.getConfig(job, D.DDL_FILENAME, compName, table)
if B.DATA_NODE_TABLES in conf and table in conf[B.DATA_NODE_TABLES]:
c.conf[B.DATA_NODE_DDL][table] = conf[B.DATA_NODE_TABLES][table]
elif table in conf:
c.conf[B.DATA_NODE_DDL][table] = conf[table]
else:
c.conf[B.DATA_NODE_DDL][table] = conf
comps[name] = c
self.comps[name] = c
return c
def createSubComponents(self, comp, nr, suffix):
job = self.job #basic.program.Job.getInstance()
verify = -2 + job.getDebugLevel("job_tool")
job.debug(verify, "getComponents " + str(comp.conf[B.ATTR_INST_SUBCOMP]))
for c in comp.conf[B.ATTR_INST_SUBCOMP].keys():
if c == "none":
continue
self.createComponent(c, nr, suffix)
def getComponentDict(self):
job = self.job #basic.program.Job.getInstance()
verify = -2 + job.getDebugLevel("job_tool")
job.debug(verify, "getComponents ")
out = {}
for c in self.comps:
out[self.comps[c].name] = {}
for k in self.comps[c].conf.keys():
if isParameterSubnode(k): # B.SUBJECT_ARTS in k or "components" in k or "instance" in k:
out[self.comps[c].name][k] = copy.deepcopy(self.comps[c].conf[k])
return out
def getComponentFolder(comp):
return comp.lower()
def getComponentModul(comp):
return comp[0:1].upper() + comp[1:].lower()
def getComponentClass(comp):
return comp[0:1].upper() + comp[1:].lower()
def getComponentPath(comp):
return "components." + getComponentFolder(comp) + "." + getComponentModul(comp)
def getComponentDict(job = None):
#job = basic.program.Job.getInstance()
#verify = -2 + job.getDebugLevel("job_tool")
#job.debug(verify, "getComponents ")
out = {}
for c in comps:
out[comps[c].name] = {}
for k in comps[c].conf.keys():
if isParameterSubnode(k): # B.SUBJECT_ARTS in k or "components" in k or "instance" in k:
out[comps[c].name][k] = copy.deepcopy(comps[c].conf[k])
return out
def isParameterSubnode(key):
for k in PARAM_NOSUBNODE:
if key in k:
return False
return True
def getPlainCompname(name):
if "_0" in name:
return name[0:-3]
return name

83
basic/program.py

@ -40,75 +40,8 @@ CTLG_BASEDIR = "basedir"
CTLG_LOGPATH = "logpath"
CTLG_LOGLEVEL = "loglevel"
def setGlobal():
pass
EXCP_CANT_POP = "cant pop this job from the instances"
DEFAULT_ARCHIV_DIR = T.DATA_PATH + "/lauf"
DEFAULT_GRAN = "ws"
DEFAULT_PRG = "webflask"
DEFAULT_GRAN = "ws"
DEFAULT_APP = "WEBFLASK"
DEFAULT_ENV = "Workspace"
DEFAULT_MODE = "test"
DEFAULT_TIME = "2022-08-29_17-29-59"
def createJob(pprg="", pgran="", papp="", penv="", ptstamp="", pmode=""):
"""
this creates a Job-Object with the main arguments ## same as in testtools?
:param pprg: program-name
:param pgran: tc|ts|
:param papp: application
:param penv: environment
:param ptstamp: timestamp - part of specific testfolder
:param pmode: if it is a productive or development execution
:return:
"""
if len(pprg) < 1:
prgname = DEFAULT_PRG
else:
prgname = pprg
if len(pgran) < 1:
gran = DEFAULT_GRAN
else:
gran = pgran
if len(pgran) < 1:
gran = DEFAULT_GRAN
else:
gran = pgran
if len(papp) < 1:
app = DEFAULT_APP
else:
app = papp
if len(penv) < 1:
env = DEFAULT_ENV
else:
env = penv
if len(ptstamp) < 1:
tstamp = DEFAULT_TIME
else:
tstamp = ptstamp
if len(pmode) < 1:
mode = DEFAULT_MODE
else:
mode = pmode
if gran == "tc":
path = DEFAULT_ARCHIV_DIR + "/TC0001/" + tstamp
elif gran == "ts":
path = DEFAULT_ARCHIV_DIR + "/testlauf/TST001_" + tstamp
else:
path = T.DATA_PATH + "/workspace/"
job = basic.program.Job(prgname)
args = {"application": app, "environment": env, "modus": mode, gran + "time": tstamp,
gran + "dir": path,
"step": 1}
job.par.setParameterArgs(job, args)
return job
class SimpleJob:
"""
@ -275,9 +208,12 @@ class Job:
logTime = self.start.strftime("%Y%m%d_%H%M%S")
self.m = basic.message.Message(self, basic.message.LIMIT_DEBUG, logTime, None)
tools.job_tool.startJobProcesses(self)
self.par.setParameterLoaded(self)
self.m.logInfo("# # # Start Job " + self.start.strftime("%d.%m.%Y %H:%M:%S") + " # # # ")
self.m.debug(basic.message.LIMIT_INFO, "# # # Start Job " + self.start.strftime("%d.%m.%Y %H:%M:%S") + " # # # ")
if self.programDef[CTLG_PARSOURCE] != "":
self.par.setParameterLoaded(self)
header1 = "# # # Start Job " + tools.date_tool.formatParsedDate(str(self.start), tools.date_tool.F_DE_TSTAMP) + " # # # "
self.m.logInfo(header1)
self.m.debug(basic.message.LIMIT_INFO, header1)
print(header1)
self.par.checkParameter(self)
self.m.logInfo(self.par.parstring)
@ -285,7 +221,8 @@ class Job:
def stopJob(self, reboot=0):
tools.job_tool.stopJobProcesses(self)
self.ende = datetime.now()
self.dumpParameter()
if self.programDef[CTLG_PARTARGET] != "":
self.dumpParameter()
footer1 = "# # " + self.m.topmessage + " # # # "
footer2 = "# # # Stop Job " + tools.date_tool.formatParsedDate(str(self.start), tools.date_tool.F_DE_TSTAMP)
footer2 += " # " + tools.date_tool.formatParsedDate(str(self.ende), tools.date_tool.F_DE_TSTAMP) + " # # # "
@ -463,8 +400,8 @@ class Parameter:
def checkParameter(self, job):
print (f"Parameter initialisiert {self.program}")
pardef = self.programDef[CTLG_PARDEF]
for p in pardef.split(","):
pardef = job.programDef[CTLG_PARDEF]
for p in pardef["par"]:
if len(p) > 1 and not hasattr(self, p):
job.m.setFatal("Parameter " + p + " is not set!")

6
test/test_08i18n.py → test/test_18i18n.py

@ -51,7 +51,7 @@ class MyTestCase(unittest.TestCase):
if actfunction not in TEST_FUNCTIONS:
return
job = test.testtools.getJob()
job.conf.confs["language"] = "de"
job.conf["language"] = "de"
i18n = utils.i18n_tool.I18n.getInstance(job)
res = i18n.getText(f"{EXP_KEY_MISSING=}", job)
if verbose: print("RESULT "+res)
@ -65,7 +65,7 @@ class MyTestCase(unittest.TestCase):
if verbose: print("RESULT "+res)
self.assertEqual(res, "project")
cnttest += 1
job.conf.confs["language"] = "fr"
job.conf["language"] = "fr"
i18n = utils.i18n_tool.I18n.getInstance(job)
self.assertRaises(Exception, i18n.getText, (f"{EXP_KEY_MISSING=}", job))
cnttest += 1
@ -79,7 +79,7 @@ class MyTestCase(unittest.TestCase):
if actfunction not in TEST_FUNCTIONS:
return
job = test.testtools.getJob()
job.conf.confs["language"] = "de"
job.conf["language"] = "de"
i18n = utils.i18n_tool.I18n.getInstance(job)
# i18n.getText("EXP_KEY_MISSING", EXP_KEY_MISSING, job)
res = i18n.getAliasList(f"{EXP_KEY_MISSING=}", job)

6
test/test_10testserver.py → test/test_90testserver.py

@ -33,7 +33,7 @@ class MyTestCase(unittest.TestCase):
testserver = basic.Testserver.Testserver(job)
self.assertIsNotNone(testserver)
cnttest += 1
if B.TOPIC_NODE_DB in job.conf.confs:
if B.TOPIC_NODE_DB in job.conf:
self.assertIn(B.TOPIC_NODE_DB, testserver.conf[B.SUBJECT_CONN])
self.assertIn(B.ATTR_DB_DATABASE, testserver.conf[B.SUBJECT_CONN][B.TOPIC_NODE_DB])
self.assertIn(B.DATA_NODE_DDL, testserver.conf)
@ -48,8 +48,8 @@ class MyTestCase(unittest.TestCase):
return
job = test.testtools.getJob()
testserver = basic.Testserver.Testserver(job)
if B.TOPIC_NODE_DB in job.conf.confs:
dbi = basic.toolHandling.getDbTool(job, self, job.conf.confs[B.TOPIC_NODE_DB][B.ATTR_TYPE])
if B.TOPIC_NODE_DB in job.conf:
dbi = basic.toolHandling.getDbTool(job, self, job.conf[B.TOPIC_NODE_DB][B.ATTR_TYPE])
else:
return "No DB in job-config"
sql = testserver.getDBSchema(job, dbi, "application")

3
tools/config_tool.py

@ -309,6 +309,7 @@ def mergeConn(msg, conf, conn):
:param conn:
:return:
"""
if B.SUBJECT_INST not in conf:
conf[B.SUBJECT_INST] = {}
for a in conn[B.SUBJECT_INST]:
@ -324,7 +325,6 @@ def mergeConn(msg, conf, conn):
list = B.LIST_API_ATTR
if topic == B.TOPIC_NODE_FILE:
list = B.LIST_FILE_ATTR
print(" --- merge-conn " + topic + " " + str(list))
for a in conf[B.SUBJECT_ARTS][topic]:
if topic not in conn:
continue
@ -333,7 +333,6 @@ def mergeConn(msg, conf, conn):
conf[B.SUBJECT_ARTS][topic][a] = conn[topic][a]
else:
for b in conf[B.SUBJECT_ARTS][topic][a]:
print(" --- merge-conn b " + topic + " " + a+" "+b)
if b not in list:
msg.logError("not-topic-attribute in topic-connection: "+topic+", "+b)
continue

14
tools/conn_tool.py

@ -49,9 +49,9 @@ def getConnections(job, comp):
conns = []
# if a datest-database exists read the connections
conndb = {}
if job.conf.confs.get("db"):
# select
pass
#if job.conf["db"]:
# # select
# pass
conn = tools.config_tool.getConfig(job, "tool", B.SUBJECT_CONN)
if not comp in conn[B.SUBJECT_ENV]:
@ -64,7 +64,7 @@ def getConnections(job, comp):
for a in conn[B.SUBJECT_ENV][comp]:
if "inst" in a and a != B.SUBJECT_INST:
continue
attr[a] = conn["env"][comp][a]
attr[a] = conn[B.SUBJECT_ENV][comp][a]
#if ("types" in conn["env"][comp]):
# xtypes = conn["env"][comp]["types"]
for i in range(conn[B.SUBJECT_ENV][comp][B.SUBJECT_INST][B.ATTR_INST_CNT]):
@ -73,9 +73,9 @@ def getConnections(job, comp):
#if (xtypes is not None):
# conn["env"][comp][instnr]["types"] = xtypes
for a in attr:
if a in conn["env"][comp][instnr]:
if a in conn[B.SUBJECT_ENV][comp][instnr]:
continue # dont overwrite an instance-specific value
conn["env"][comp][instnr][a] = attr[a]
conns.append(conn["env"][comp][instnr])
conn[B.SUBJECT_ENV][comp][instnr][a] = attr[a]
conns.append(conn[B.SUBJECT_ENV][comp][instnr])
return conns

6
tools/file_tool.py

@ -22,8 +22,6 @@ import tools.date_tool
def getDump(obj):
result = ""
print(str(type(obj)))
result = vars(obj)
return str(result)
@ -141,10 +139,8 @@ def mkPaths(job, path, msg):
def getFileEncoding(msg, job, path):
print("--- getFileEncoding " + path)
encodings = ['utf-8', 'iso-8859-1'] # add more
for e in encodings:
print(e)
try:
fh = codecs.open(path, 'r', encoding=e)
fh.readlines()
@ -160,9 +156,7 @@ def getFileEncoding(msg, job, path):
def detectFileEncode(job, path, msg): # return ""
# job = basic.program.Job.getInstance()
verify = int(job.getDebugLevel("file_tool"))
print(path)
cntIso = 0
cntUtf = 0
j = 0

11
tools/filecsv_fcts.py

@ -4,6 +4,7 @@
# Author : Ulrich Carmesin
# Source : gitea.ucarmesin.de
# ---------------------------------------------------------------------------------------------------------
import json
import re
import basic.program
import tools.file_abstract
@ -135,6 +136,14 @@ def splitFields(line, delimiter, job):
break
if re.match(r"^\"(.*)\"$", fields[i]):
fields[i] = fields[i][1:-1]
if fields[i].find("{\"") == 0:
if fields[i].find("{\"\"") == 0:
fields[i] = fields[i].replace("\"\"", "\"")
try:
val = json.loads(fields[i])
fields[i] = val
except Exception as e:
pass
out.append(fields[i])
return out
@ -225,7 +234,7 @@ def setTableData(tableDict, fields, ttype, job):
fields = [tableDict[D.DATA_ATTR_ALIAS]] + fields
i = 1
for f in tableDict[B.DATA_NODE_HEADER]:
row[f] = fields[i].strip()
row[f] = fields[i]
i += 1
if ttype == D.CSV_SPECTYPE_DATA:
if B.ATTR_DATA_COMP in tableDict:

5
tools/path_tool.py

@ -65,10 +65,7 @@ def compose_path(job, pathname, comp):
"""
verify = job.getDebugLevel(TOOL_NAME)
job.debug(verify, "composePath " + pathname)
if "{" in pathname:
return tools.value_tool.compose_pattern(job, pathname, comp)
else:
job.debug(verify, "in Pattern nicht vorhanden: " + pathname)
return tools.value_tool.compose_pattern(job, pathname, comp)
def rejoinPath(a, b="", c="", d="", e="", f=""):

8
tools/value_tool.py

@ -67,7 +67,11 @@ def get_key_value(job, key, comp=None):
if not hasattr(job, a[1]):
job.m.setError("key, job has not attribute "+a[1])
return ""
return getattr(job, a[1])
val = getattr(job, a[1])
if a[1] == "start":
val = tools.date_tool.formatParsedDate(str(val), tools.date_tool.F_LOG)
return val
elif DOM_COMP in key:
if comp is None:
raise Exception(P.EXP_COMP_MISSING.format(key))
@ -102,7 +106,7 @@ def get_key_value(job, key, comp=None):
def compose_pattern(job, pattern, comp):
if "{" in pattern and "}" in pattern:
return compose_string(job, pattern,comp)
return compose_string(job, pattern, comp)
vc = ValueConf.getInstance(job)
if pattern in vc.pattern:
return compose_string(job, "{" + pattern + "}", comp)

31
unit_run.py → unit_tester.py

@ -24,6 +24,9 @@ import os, glob
import io
import sys
import test.constants as T
import basic.program
PROGRAM_NAME = "unit_tester"
VERIFY = False
@ -31,15 +34,15 @@ class MyTestLoader(unittest.TestLoader):
def start_testcases(self):
pass
def create_test_suite():
def create_test_suite(job):
suite = unittest.TestSuite()
for testdir in [T.PROG_PATH, T.COMP_PATH]:
print("testdir "+testdir)
testFileStrings = sorted(glob.glob(os.path.join(testdir, "test", 'test_*.py')))
print("testdir "+str(testFileStrings))
for t in testFileStrings:
print("t "+t)
if not t[len(testdir)+11:len(testdir)+13].isnumeric():
testnum = t[len(testdir)+11:len(testdir)+13]
if not testnum.isnumeric():
continue
if testnum not in ["01", "03", "08", "10", "11"]:
continue
if testdir == T.COMP_PATH:
v = "components.test." + t[len(testdir) + 6:-3]
@ -53,9 +56,19 @@ def create_test_suite():
suite.addTest(class_(f))
return suite
if __name__ == '__main__':
print ("start")
loader = unittest.TestLoader()
suite = create_test_suite()
def startPyJob(job):
suite = create_test_suite(job)
runner = unittest.TextTestRunner(sys.stdout, True, 3, True, True)
runner.run(suite)
if __name__ == '__main__':
job = basic.program.Job(PROGRAM_NAME)
# TODO to move to job_tool
args = {}
if isinstance(job.programDef[basic.program.CTLG_PARDEF], dict) and "par" in job.programDef[basic.program.CTLG_PARDEF]:
for p in job.programDef[basic.program.CTLG_PARDEF]["par"]:
args[p] = job.programDef[basic.program.CTLG_PARDEF]["par"][p]
job.setParameter(args)
job.startJob()
startPyJob(job)
job.stopJob(0)

100
utils/dbsfile_tool.py

@ -1,100 +0,0 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# ---------------------------------------------------------------------------------------------------------
# Author : Ulrich Carmesin
# Source : gitea.ucarmesin.de
# ---------------------------------------------------------------------------------------------------------
"""
This class is a technical implementation for Hive-connection with spark - typically used in a
Machine Learning environment for example in hadoop
"""
import json
import basic.program
import utils.config_tool
import utils.db_abstract
import pyspark
import basic.constants as B
class DbFcts(utils.db_abstract.DbFcts):
"""
This interface defines each necessary connection to any kind of database.
The specific technique how to connect to the concrete DBMS has to be implemented in the specific tool.
"""
def __init__(self):
pass
def selectRows(self, table, job):
""" method to select rows from a database
statement written in sql """
tdata = {}
dry = 0
# attr = self.getDbAttributes(table)
verify = -1+job.getDebugLevel("db_tool")
pattern = "s3a://{hostname}/data/{tenant}/mt/sandboxes/{job.par.usecae}/{job.par.workspace}/{outfile}/VR_+reg+/"
files = self.comp.composeFileClauses(job, pattern)
data = []
for k in files.keys():
sql = files[k]
if dry == 1:
spark = self.getConnector()
df = spark.read.parquet(sql)
dfj = df.toJSON()
for r in dfj.collect():
data.append(json.loads(r))
else:
print("select "+sql)
#self.comp.m.logInfo(cmd)
#tdata[B.DATA_NODE_HEADER] = self.comp.conf[B.DATA_NODE_DDL][table][B.DATA_NODE_HEADER]
#tdata[B.DATA_NODE_DATA] = data
return tdata
def deleteRows(self, table, job):
""" method to delete rows from a database
statement written in sql """
dry = 0
verify = -1+job.getDebugLevel("db_tool")
cmd = "DELETE FROM "+table
print("deleteRows "+cmd)
sqls = self.comp.composeSqlClauses(job, cmd)
print("deleteRows "+cmd)
print(sqls)
for k in sqls.keys():
sql = sqls[k]
if dry == 1:
#spark = self.getConnector()
#df = spark.sql(cmd)
pass
else:
print("select "+sql)
#self.comp.m.logInfo(cmd)
def insertRows(self, table, rows, job):
""" method to insert rows into a database
the rows will be interpreted by the ddl of the component
"""
job = self.job # basic.program.Job.getInstance()
verify = -1+job.getDebugLevel("db_tool")
spark = self.getConnector()
df = spark.createDataFrame(rows)
self.comp.m.logInfo("cmd")
def getConnector(self):
""" add-on-method to get the connector
this method should only called by the class itself """
job = self.job # basic.program.Job.getInstance()
attr = self.getDbAttributes(B.SVAL_NULL)
spark = None
if B.ATTR_DB_CONN_JAR in attr:
spark = pyspark.SparkSession\
.builder\
.appName("datest")\
.config("sparkjar", "")\
.getOrCreate()
return spark

106
utils/dbshive_tool.py

@ -1,106 +0,0 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# ---------------------------------------------------------------------------------------------------------
# Author : Ulrich Carmesin
# Source : gitea.ucarmesin.de
# ---------------------------------------------------------------------------------------------------------
"""
This class is a technical implementation for Hive-connection with spark - typically used in a
Machine Learning environment for example in hadoop
"""
import json
import os
import basic.program
import utils.config_tool
import utils.db_abstract
import pyspark
import basic.constants as B
class DbFcts(utils.db_abstract.DbFcts):
"""
This interface defines each necessary connection to any kind of database.
The specific technique how to connect to the concrete DBMS has to be implemented in the specific tool.
"""
def __init__(self):
pass
def selectRows(self, table, job):
""" method to select rows from a database
statement written in sql """
tdata = {}
dry = 0
verify = -1+job.getDebugLevel("db_tool")
attr = self.getDbAttributes(B.SVAL_NULL)
cmd = "SELECT "+",".join(self.comp.conf[B.DATA_NODE_DDL][table][B.DATA_NODE_HEADER])
cmd += " FROM "+table
sqls = self.comp.composeSqlClauses(job, cmd)
data = []
for k in sqls.keys():
sql = sqls[k]
if dry == 1:
try:
spark = self.getConnector()
df = spark.sql(sql)
dfj = df.toJSON()
for r in dfj.collect():
data.append(json.loads(r))
except:
self.comp.m.setError("Table couldnt read "+table)
else:
print("select "+sql)
self.comp.m.logInfo(sql)
tdata[B.DATA_NODE_HEADER] = self.comp.conf[B.DATA_NODE_DDL][table][B.DATA_NODE_HEADER]
tdata[B.DATA_NODE_DATA] = data
return tdata
def deleteRows(self, table, job):
""" method to delete rows from a database
statement written in sql """
dry = 0
verify = -1+job.getDebugLevel("db_tool")
cmd = "DELETE FROM "+table
print("deleteRows "+cmd)
sqls = self.comp.composeSqlClauses(job, cmd)
print("deleteRows "+cmd)
print(sqls)
for k in sqls.keys():
sql = sqls[k]
if dry == 1:
#spark = self.getConnector()
#df = spark.sql(cmd)
pass
else:
print("select "+sql)
#self.comp.m.logInfo(cmd)
def insertRows(self, table, rows, job):
""" method to insert rows into a database
the rows will be interpreted by the ddl of the component
"""
job = self.job # basic.program.Job.getInstance()
verify = -1+job.getDebugLevel("db_tool")
spark = self.getConnector()
df = spark.createDataFrame(rows)
self.comp.m.logInfo("cmd")
def getConnector(self):
""" add-on-method to get the connector
this method should only called by the class itself """
job = self.job # basic.program.Job.getInstance()
attr = self.getDbAttributes(B.SVAL_NULL)
spark = None
if B.ATTR_DB_CONN_JAR in attr:
connectorJar = os.environ.get(attr[B.ATTR_DB_CONN_JAR])
spark = pyspark.SparkSession\
.builder\
.appName("datest")\
.config("spark.jars", f"{connectorJar}")\
.getOrCreate()
return spark

73
utils/dbspark_tool.py

@ -1,73 +0,0 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# ---------------------------------------------------------------------------------------------------------
# Author : Ulrich Carmesin
# Source : gitea.ucarmesin.de
# ---------------------------------------------------------------------------------------------------------
import basic.program
import utils.config_tool
import utils.db_abstract
import pyspark
import basic.constants as B
class DbFcts(utils.db_abstract.DbFcts):
"""
This interface defines each necessary connection to any kind of database.
The specific technique how to connect to the concrete DBMS has to be implemented in the specific tool.
"""
def __init__(self):
pass
def getDbAttributes(self):
out = {}
return out
def selectRows(self, table, job):
""" method to select rows from a database
statement written in sql """
tdata = {}
verify = -1+job.getDebugLevel("db_tool")
cmd = "SELECT "+",".join(self.comp.conf[B.DATA_NODE_DDL][table][B.DATA_NODE_HEADER])
cmd += " FROM "+table+""+self.getWhere()+""+self.getOrder()
spark = self.getConnector()
df = spark.sql(cmd)
data = []
for r in df:
data.append(r)
tdata[B.DATA_NODE_HEADER] = self.comp.conf[B.DATA_NODE_DDL][table][B.DATA_NODE_HEADER]
tdata[B.DATA_NODE_DATA] = data
return tdata
def deleteRows(self, table):
""" method to delete rows from a database
statement written in sql """
job = self.job # basic.program.Job.getInstance()
verify = -1+job.getDebugLevel("db_tool")
cmd = "DELETE FROM "+table+";"
self.comp.m.logInfo(cmd)
def insertRows(self, table, rows):
""" method to insert rows into a database
the rows will be interpreted by the ddl of the component
"""
job = self.job # basic.program.Job.getInstance()
verify = -1+job.getDebugLevel("db_tool")
spark = self.getConnector()
df = spark.createDataFrame(rows)
self.comp.m.logInfo("cmd")
def getConnector(self):
""" add-on-method to get the connector
this method should only called by the class itself """
job = self.job # basic.program.Job.getInstance()
spark = pyspark.SparkSession\
.builder\
.appName("datest")\
.getOrCreate()
return spark
Loading…
Cancel
Save