diff --git a/bananaSPLIT/libbananasplit/libsplit.py b/bananaSPLIT/libbananasplit/libsplit.py index a421828..e87441b 100644 --- a/bananaSPLIT/libbananasplit/libsplit.py +++ b/bananaSPLIT/libbananasplit/libsplit.py @@ -44,26 +44,31 @@ class bananaSPLITTER(threading.Thread): ff['duplicate']=False self.fileList[idx]=ff self.log.warn('Salto il controllo dei duplicati..') - # se il parse e la rimozione dei duplicati e' andata bene # preparo e inizio il salvataggio if os.path.exists(self.outPath): + if self.settings['removeOldFiles']: + os.chdir(self.outPath) + for f in os.listdir(self.outPath): + os.remove(f) + else: + raise FileExistsError("Non posso sovrascrivere i vecchi file, eliminali manualmente!") + else: + os.mkdir(self.outPath) os.chdir(self.outPath) - for f in os.listdir(self.outPath): - os.remove(f) - os.chdir("..") - os.rmdir(self.outPath) - if self.settings['saveSeparateFiles']: self.saveSeparate() if self.settings['saveBodyFile']: self.saveBody() + self.log.info("L'elaborazione del file ha richiesto {:4.2f} sec".format(time.time()-self.beginTime)) + except UnicodeDecodeError as ee: self.log.critical("Il file [{}] contiene caratteri non compatibili con la codifica scelta! [{}]" .format(self.fileParams['name'],ee)) - os.rmdir(self.outPath) + except FileExistsError as fe: + self.log.critical(fe) except BaseException as ee: - self.log.critical(ee) + self.log.warning(ee) pass def openFile(self): @@ -227,7 +232,6 @@ class bananaSPLITTER(threading.Thread): pass def saveSeparate(self): - os.chdir(self.outPath) outFileCounter = 0 self.paths['OUTworkPath']=self.paths['OUTworkPath']+slugify(self.fileName) self.log.info("Salvo gli articoli in file separati...") diff --git a/bananaSPLIT/libbananasplit/testEN.json b/bananaSPLIT/libbananasplit/testEN.json index b3de8d2..b9e9f25 100644 --- a/bananaSPLIT/libbananasplit/testEN.json +++ b/bananaSPLIT/libbananasplit/testEN.json @@ -57,7 +57,7 @@ "getNewsPaperName": true, "nameNotFoundStr": "ND", "includeTitle": true, - "removeDuplicates": false, + "removeDuplicates": true, "showSkipped": true, "showRemovedDuplicates": false, "maxTitleLen": 32,