diff --git a/bananaSPLIT/libbananasplit/libsplit.py b/bananaSPLIT/libbananasplit/libsplit.py index 0dd08d8..b561651 100644 --- a/bananaSPLIT/libbananasplit/libsplit.py +++ b/bananaSPLIT/libbananasplit/libsplit.py @@ -3,13 +3,12 @@ Created on 2 nov 2019 @author: Emanuele Trabattoni ''' -from libfancylogger import fancyLogger from slugify.slugify import slugify -import threading, time, parse, re, copy, os +import time, parse, re, copy, os, multiprocessing -class bananaSPLITTER(threading.Thread): +class bananaSPLITTER(multiprocessing.Process): def __init__(self, fileParams=None, logger=None): - threading.Thread.__init__(self) + multiprocessing.Process.__init__(self) self.fileParams = fileParams self.log = logger self.rawFile = None @@ -44,7 +43,7 @@ class bananaSPLITTER(threading.Thread): for idx, ff in enumerate(self.fileList): ff['duplicate']=False self.fileList[idx]=ff - print('Salto il controllo dei duplicati..') + self.log.warn('Salto il controllo dei duplicati..') if self.settings['saveSeparateFiles']: self.saveSeparate() if self.settings['saveBodyFile']: @@ -128,11 +127,11 @@ class bananaSPLITTER(threading.Thread): newsPaperName = self.settings['nameNotFoundStr'] except: self.log.warn("E' successo qualcosa mentre stavo cercando il nome della pubblicazione,\ - controlla i file di uscita! \n\t[{}]".format(prevLine.strip())) + controlla i file di uscita! [{}]".format(prevLine.strip())) else: newsPaperName = self.settings['nameNotFoundStr'] except: - self.log.warn("Ho trovato una riga ambigua.. potrebbe essere una data ma non so: \n\t[{}]". format(l.strip('\r\n'))) + self.log.warn("Ho trovato una riga ambigua.. potrebbe essere una data ma non so: [{}]". format(l.strip('\r\n'))) pass elif lineWords[0] in self.docStruct['headWords']: #cambio stato e inizializzo un nuovo documento da riempire @@ -237,17 +236,17 @@ class bananaSPLITTER(threading.Thread): def saveBody(self): self.log.info('Salvo gli articoli in un singolo file vicino agli originali...') + self.log.debug('Persorso: {0}'.format(self.outPath)) os.chdir(self.outPath) - print ('Persorso: {0}'.format(self.outPath)) try: fName=slugify(self.fileName) fName='BODYFILE_{0}.txt'.format(fName[:self.settings['maxTitleLen']]) fileContent = os.linesep.join([cc['content'] for cc in self.fileList]) - out=open(self.paths['OUTworkPath']+'{0}'.format(fName),'wb') + out=open('{0}'.format(fName),'wb') out.write(fileContent.encode(self.settings['encoding'])) out.close() except IOError as e: - print("OOPS! Qualcosa e\' andato storto, non riesco a scrivere il file: {}".format(e)) + self.log.error("Qualcosa e\' andato storto, non riesco a scrivere il file: {}".format(e)) pass diff --git a/bananaSPLIT/libbananasplit/testEN.json b/bananaSPLIT/libbananasplit/testEN.json index 049bb91..7c3c712 100644 --- a/bananaSPLIT/libbananasplit/testEN.json +++ b/bananaSPLIT/libbananasplit/testEN.json @@ -67,7 +67,7 @@ "saveSeparateFiles": true, "saveBodyFile": true, "saveBodyNumber": true, - "delLF": true, + "delLF": false, "delWordBreak": true, "delChars": [ "'", diff --git a/bananaSPLIT/libtestmain.py b/bananaSPLIT/libtestmain.py index 8b0d7fb..24f4ff0 100644 --- a/bananaSPLIT/libtestmain.py +++ b/bananaSPLIT/libtestmain.py @@ -10,25 +10,24 @@ from libsplit import bananaSPLITTER from libconfload import bananaCONF from libfancylogger import fancyLogger -print("CWD-> "+os.getcwd()) -logger = fancyLogger(fileLog = False) -confl = bananaCONF(workdir=r"./libbananasplit", logger=logger) -confl.open() - -confl.use("testEN.json") -splconf = confl.getParams("splitter") -splist = [] -os.chdir(splconf["paths"]["INworkPath"]) -for f in glob("*.txt"): - splconf["name"] = f - logger.info("-"*80) - splitter = bananaSPLITTER(fileParams=deepcopy(splconf), logger=logger) - splist.append(splitter) - splitter.start() - splitter.join() - del splitter - -logger.info("\n"+"="*50+"\n\tFINITO!!!\n"+"="*50) +if __name__ == "__main__": + print("CWD-> "+os.getcwd()) + logger = fancyLogger(fileLog = False) + confl = bananaCONF(workdir=r"./libbananasplit", logger=logger) + confl.open() + + confl.use("testEN.json") + splconf = confl.getParams("splitter") + splist = [] + os.chdir(splconf["paths"]["INworkPath"]) + for f in glob("*.txt"): + splconf["name"] = f + logger.info("-"*80) + splitter = bananaSPLITTER(fileParams=deepcopy(splconf), logger=logger) + splist.append(splitter) + splitter.start() + + logger.info("\n"+"="*50+"\n\tFINITO!!!\n"+"="*50)