datestex/utils/dbsfile_tool.py

#!/usr/bin/python
# -*- coding: utf-8 -*-
# ---------------------------------------------------------------------------------------------------------
# Author : Ulrich Carmesin
# Source : gitea.ucarmesin.de
# ---------------------------------------------------------------------------------------------------------
"""
This class is a technical implementation for Hive-connection with spark - typically used in a
Machine Learning environment for example in hadoop
"""
import json

import basic.program
import utils.config_tool
import utils.db_abstract
import pyspark
import basic.constants as B

class DbFcts(utils.db_abstract.DbFcts):
    """
    This interface defines each necessary connection to any kind of database.
    The specific technique how to connect to the concrete DBMS has to be implemented in the specific tool.
    """
    def __init__(self):
        pass


    def selectRows(self, table, job):
        """ method to select rows from a database
        statement written in sql """
        tdata = {}
        dry = 0
        # attr = self.getDbAttributes(table)
        verify = -1+job.getDebugLevel("db_tool")
        pattern = "s3a://{hostname}/data/{tenant}/mt/sandboxes/{job.par.usecae}/{job.par.workspace}/{outfile}/VR_+reg+/"
        files = self.comp.composeFileClauses(pattern)
        data = []
        for k in files.keys():
            sql = files[k]
            if dry == 1:
                spark = self.getConnector()
                df = spark.read.parquet(sql)
                dfj = df.toJSON()
                for r in dfj.collect():
                    data.append(json.loads(r))
            else:
                print("select "+sql)
            #self.comp.m.logInfo(cmd)
        #tdata[B.DATA_NODE_HEADER] = self.comp.conf[B.DATA_NODE_DDL][table][B.DATA_NODE_HEADER]
        #tdata[B.DATA_NODE_DATA] = data
        return tdata

    def deleteRows(self, table, job):
        """ method to delete rows from a database
        statement written in sql """
        dry = 0
        verify = -1+job.getDebugLevel("db_tool")
        cmd = "DELETE FROM "+table
        print("deleteRows "+cmd)
        sqls = self.comp.composeSqlClauses(cmd)
        print("deleteRows "+cmd)
        print(sqls)
        for k in sqls.keys():
            sql = sqls[k]
            if dry == 1:
                #spark = self.getConnector()
                #df = spark.sql(cmd)
                pass
            else:
                print("select "+sql)
            #self.comp.m.logInfo(cmd)

    def insertRows(self, table, rows, job):
        """ method to insert rows into a database
        the rows will be interpreted by the ddl of the component
        """
        job = basic.program.Job.getInstance()
        verify = -1+job.getDebugLevel("db_tool")
        spark = self.getConnector()
        df = spark.createDataFrame(rows)

        self.comp.m.logInfo("cmd")

    def getConnector(self):
        """ add-on-method to get the connector
        this method should only called by the class itself """
        job = basic.program.Job.getInstance()
        attr = self.getDbAttributes(B.SVAL_NULL)
        spark = None
        if B.ATTR_DB_CONN_JAR in attr:
            spark = pyspark.SparkSession\
                .builder\
                .appName("datest")\
                .config("sparkjar", "")\
                .getOrCreate()
        return spark
central get attributes 4 years ago			`#!/usr/bin/python`
			`# -- coding: utf-8 --`
			`# ---------------------------------------------------------------------------------------------------------`
			`# Author : Ulrich Carmesin`
			`# Source : gitea.ucarmesin.de`
			`# ---------------------------------------------------------------------------------------------------------`
toolHandling db, api 4 years ago			`"""`
			`This class is a technical implementation for Hive-connection with spark - typically used in a`
			`Machine Learning environment for example in hadoop`
			`"""`
			`import json`

central get attributes 4 years ago			`import basic.program`
			`import utils.config_tool`
			`import utils.db_abstract`
			`import pyspark`
			`import basic.constants as B`

			`class DbFcts(utils.db_abstract.DbFcts):`
			`"""`
			`This interface defines each necessary connection to any kind of database.`
			`The specific technique how to connect to the concrete DBMS has to be implemented in the specific tool.`
			`"""`
			`def __init__(self):`
			`pass`


execution init_testcase with bugfixes 3 years ago			`def selectRows(self, table, job):`
central get attributes 4 years ago			`""" method to select rows from a database`
			`statement written in sql """`
			`tdata = {}`
			`dry = 0`
			`# attr = self.getDbAttributes(table)`
			`verify = -1+job.getDebugLevel("db_tool")`
toolHandling db, api 4 years ago			`pattern = "s3a://{hostname}/data/{tenant}/mt/sandboxes/{job.par.usecae}/{job.par.workspace}/{outfile}/VR_+reg+/"`
			`files = self.comp.composeFileClauses(pattern)`
central get attributes 4 years ago			`data = []`
toolHandling db, api 4 years ago			`for k in files.keys():`
			`sql = files[k]`
central get attributes 4 years ago			`if dry == 1:`
			`spark = self.getConnector()`
toolHandling db, api 4 years ago			`df = spark.read.parquet(sql)`
			`dfj = df.toJSON()`
			`for r in dfj.collect():`
			`data.append(json.loads(r))`
central get attributes 4 years ago			`else:`
			`print("select "+sql)`
			`#self.comp.m.logInfo(cmd)`
			`#tdata[B.DATA_NODE_HEADER] = self.comp.conf[B.DATA_NODE_DDL][table][B.DATA_NODE_HEADER]`
			`#tdata[B.DATA_NODE_DATA] = data`
			`return tdata`

execution init_testcase with bugfixes 3 years ago			`def deleteRows(self, table, job):`
central get attributes 4 years ago			`""" method to delete rows from a database`
			`statement written in sql """`
			`dry = 0`
			`verify = -1+job.getDebugLevel("db_tool")`
			`cmd = "DELETE FROM "+table`
			`print("deleteRows "+cmd)`
			`sqls = self.comp.composeSqlClauses(cmd)`
			`print("deleteRows "+cmd)`
			`print(sqls)`
			`for k in sqls.keys():`
			`sql = sqls[k]`
			`if dry == 1:`
			`#spark = self.getConnector()`
			`#df = spark.sql(cmd)`
			`pass`
			`else:`
			`print("select "+sql)`
			`#self.comp.m.logInfo(cmd)`

execution init_testcase with bugfixes 3 years ago			`def insertRows(self, table, rows, job):`
central get attributes 4 years ago			`""" method to insert rows into a database`
			`the rows will be interpreted by the ddl of the component`
			`"""`
			`job = basic.program.Job.getInstance()`
			`verify = -1+job.getDebugLevel("db_tool")`
			`spark = self.getConnector()`
			`df = spark.createDataFrame(rows)`

			`self.comp.m.logInfo("cmd")`

			`def getConnector(self):`
			`""" add-on-method to get the connector`
			`this method should only called by the class itself """`
			`job = basic.program.Job.getInstance()`
Refactoring: Constants 4 years ago			`attr = self.getDbAttributes(B.SVAL_NULL)`
central get attributes 4 years ago			`spark = None`
			`if B.ATTR_DB_CONN_JAR in attr:`
			`spark = pyspark.SparkSession\`
			`.builder\`
			`.appName("datest")\`
			`.config("sparkjar", "")\`
			`.getOrCreate()`
			`return spark`