diff --git a/test/test_file.py b/test/test_file.py index 7ebb1bd..4057765 100644 --- a/test/test_file.py +++ b/test/test_file.py @@ -5,7 +5,7 @@ import basic.program class MyTestCase(unittest.TestCase): - def test_getFiles(self): + def xtest_getFiles(self): job = basic.program.Job("unit") args = {"application": "TEST", "application": "ENV01", "modus": "unit", "loglevel": "debug", "tool": "job_tool", "modus": "unit"} @@ -19,13 +19,22 @@ class MyTestCase(unittest.TestCase): r = t.getFilesRec(job.m, job.conf.confs.get("paths").get("program"), ".*?file.*.py") print (r) - def test_pathTool(self): + def xtest_pathTool(self): job = basic.program.Job("unit") args = {"application": "TEST", "application": "ENV01", "modus": "unit", "loglevel": "debug", "tool": "job_tool", "modus": "unit"} job.par.setParameterArgs(args) - self.assertEqual(utils.path_tool.generatePath("program", "komp", "testA", "CONFIG.yml"), - "/home/basic/6_Projekte/PythonProject/komponents/testA/COFIG.yml") + #self.assertEqual(utils.path_tool.generatePath("program", "komp", "testA", "CONFIG.yml"), + # "/home/basic/6_Projekte/PythonProject/komponents/testA/COFIG.yml") + def test_encoding(self): + print("------- test_encoding") + encodings = ['utf-8', 'windows-1250', 'iso-8859-1'] + res = utils.file_tool.getFileEncoding("/home/ulrich/6_Projekte/Programme/holtz/test/tdata/encoded_iso8859.txt") + self.assertEqual(res, "iso-8859-1") + res = utils.file_tool.getFileEncoding("/home/ulrich/6_Projekte/Programme/holtz/test/tdata/encoded_win1250.txt") + self.assertEqual(res, "iso-8859-1") + res = utils.file_tool.getFileEncoding("/home/ulrich/6_Projekte/Programme/holtz/test/tdata/encoded_utf8.txt") + self.assertEqual(res, "utf-8") if __name__ == '__main__': diff --git a/utils/file_tool.py b/utils/file_tool.py index ef4fa84..0d997df 100644 --- a/utils/file_tool.py +++ b/utils/file_tool.py @@ -3,11 +3,12 @@ """ """ +import codecs import os import os.path import re -from basic.message import Message -from basic.program import Job +import basic.message +import basic.program from pprint import pp def getDump(obj): result="" @@ -16,7 +17,7 @@ def getDump(obj): return str(result) # if type(obj) == "__dict__" -def getFiles(msg: Message, path, pattern, conn): +def getFiles(msg, path, pattern, conn): """ search filenames in the directory - if conn is set search remote :param msg: -- msg-Objekt @@ -27,7 +28,7 @@ def getFiles(msg: Message, path, pattern, conn): """ if conn is not None: return getRemoteFiles(msg, path, pattern, conn) - job = Job.getInstance() + job = basic.program.Job.getInstance() verify = int(job.getDebugLevel("file_tool")) out = [] msg.debug(verify, "getFiles " + path + " , " + pattern) @@ -38,7 +39,7 @@ def getFiles(msg: Message, path, pattern, conn): out.append(f) return out -def getRemoteFiles(msg: Message, path, pattern, conn): +def getRemoteFiles(msg, path, pattern, conn): """ search filenames in the directory - if conn is set search remote :param msg: -- msg-Objekt @@ -49,7 +50,7 @@ def getRemoteFiles(msg: Message, path, pattern, conn): """ -def getFilesRec(msg: Message, path, pattern): +def getFilesRec(msg, path, pattern): """ Sucht Dateien im Verzeichnis rekursiv :param msg: -- msg-Objekt @@ -57,7 +58,7 @@ def getFilesRec(msg: Message, path, pattern): :param pattern: -- Dateiname als Pattern :return: Array mit gefundenen Dateien, absoluter Pfad """ - job = Job.getInstance() + job = basic.program.Job.getInstance() verify = int(job.getDebugLevel("file_tool")) out = [] msg.debug(verify, "getFilesRec " + path + " , " + pattern) @@ -69,8 +70,8 @@ def getFilesRec(msg: Message, path, pattern): out.append(os.path.join(r, f)) return out -def getTree(msg: Message, pfad): - job = Job.getInstance() +def getTree(msg, pfad): + job = basic.program.Job.getInstance() verify = int(job.getDebugLevel("file_tool")) msg.debug(verify, "getTree " + pfad ) tree = {} @@ -84,7 +85,45 @@ def getTree(msg: Message, pfad): return tree def mkPaths(msg, pfad): - job = Job.getInstance() + job = basic.program.Job.getInstance() verify = int(job.getDebugLevel("file_tool")) modus = job.conf.confs["paths"]["mode"] - os.makedirs(pfad, exist_ok=True) \ No newline at end of file + os.makedirs(pfad, exist_ok=True) + +def getFileEncoding(path): + print(path) + encodings = ['utf-8', 'iso-8859-1'] # add more + for e in encodings: + print(e) + try: + fh = codecs.open(path, 'r', encoding=e) + fh.readlines() + fh.seek(0) + except UnicodeDecodeError: + print('got unicode error with %s , trying different encoding' % e) + except: + print("except") + else: + print('opening the file with encoding: %s ' % e) + return e + break + +def rest(path): # return "" + print(path) + cntIso = 0 + cntUtf = 0 + with open(path, 'rb') as file: + while (byte := file.read(1)): + i = int.from_bytes(byte, "little") + print(str(byte)+" = "+str(i)) + #byte = file.read(1) + if ((i == 196) or (i == 228) or (i == 214) or (i == 246) or (i == 220) or (i == 252) or (i == 191)): + cntIso += 1 + print("iso") + elif (i > 127): + cntUtf += 1 + print("utf8") + if (cntIso > cntUtf): + return 'iso-8859-1' + return 'utf-8' +