diff --git a/bananaSPLIT/libbananasplit/libsplit.py b/bananaSPLIT/libbananasplit/libsplit.py index 5c95994..4c5d1cb 100644 --- a/bananaSPLIT/libbananasplit/libsplit.py +++ b/bananaSPLIT/libbananasplit/libsplit.py @@ -57,6 +57,9 @@ class bananaSPLITTER(threading.Thread): except IOError as e: self.log.critical("Impossibile aprire il file: {}! [{}]".format(self.fileName,e)) raise BaseException("OpenFile") + except UnicodeDecodeError as ee: + self.log.critical("Il file [{}] contiene caratteri non compatibili con la codifica scelta! [{}]" + .format(self.fileParams['name'],ee)) pass def remEmptyLines(self): @@ -205,6 +208,8 @@ class bananaSPLITTER(threading.Thread): pass def saveSeparate(self): + os.mkdir(self.paths['OUTworkPath']+slugify(self.fileName)) + self.paths['OUTworkPath']=self.paths['OUTworkPath']+slugify(self.fileName) self.log.info("Salvo gli articoli in file separati...") self.log.debug("Persorso: {0}".format(self.paths['OUTworkPath'].format('nomeFile'))) for ff in self.fileList: diff --git a/bananaSPLIT/libtestmain.py b/bananaSPLIT/libtestmain.py index 6183d4b..34e5b98 100644 --- a/bananaSPLIT/libtestmain.py +++ b/bananaSPLIT/libtestmain.py @@ -4,6 +4,7 @@ Created on 1 dic 2019 @author: Emanuele Trabattoni ''' import os +from glob import glob from libsplit import bananaSPLITTER from libconfload import bananaCONF from libfancylogger import fancyLogger @@ -15,14 +16,15 @@ confl.open() confl.use("testEN.json") splconf = confl.getParams("splitter") -splconf["name"] = splconf["paths"]["INworkPath"]+"GUARDIAN 1989.txt" -splitter = bananaSPLITTER(fileParams=splconf, logger=logger) -splitter.openFile() -splitter.remEmptyLines() -splitter.splitFile() -splitter.removeDuplicates() -splitter.saveBody() -splitter.saveSeparate() +for f in glob(splconf["paths"]["INworkPath"]+"*.txt"): + splconf["name"] = f + splitter = bananaSPLITTER(fileParams=splconf, logger=logger) + splitter.openFile() + splitter.remEmptyLines() + splitter.splitFile() + splitter.removeDuplicates() + splitter.saveBody() + splitter.saveSeparate()