diff --git a/bananaSPLIT/libbabanasplit/libsplit.py b/bananaSPLIT/libbabanasplit/libsplit.py index 1ed46fd..a452d92 100644 --- a/bananaSPLIT/libbabanasplit/libsplit.py +++ b/bananaSPLIT/libbabanasplit/libsplit.py @@ -15,6 +15,7 @@ class bananaSPLITTER(threading.Thread): self.status = "first" self.fileList = list() self.bodyCounter=0 + self.duplicateNumber=0 if fileParams is not None: self.log.info("Sto operando sul file: {}..".format(self.fileParams['name'])) self.paths = self.fileParams['paths'] @@ -28,8 +29,22 @@ class bananaSPLITTER(threading.Thread): pass def run(self): + self.log.info("Nuovo SPLITTER su file: {}".format(self.fileName)) self.openFile() - + self.remEmptyLines() + self.splitFile() + if self.settings['removeDuplicates']: + self.log.info("Controllo se ci sono dei duplicati..") + self.removeDuplicates() + else: + for idx, ff in enumerate(self.fileList): + ff['duplicate']=False + self.fileList[idx]=ff + print('Salto il controllo dei duplicati..') + if self.settings['saveSeparateFiles']: + self.saveSeparate() + if self.settings['saveBodyFile']: + self.saveBody() pass def openFile(self): @@ -42,7 +57,7 @@ class bananaSPLITTER(threading.Thread): self.log.critical("Impossibile aprire il file: {}! [{}]".format(self.fileName,e)) raise BaseException("OpenFile") pass - + def remEmptyLines(self): self.log.info("Elimino righe vuote e caratteri inutili..") tempContent = [] @@ -168,10 +183,8 @@ class bananaSPLITTER(threading.Thread): pass def removeDuplicates(self): - self.log.info("Controllo se ci sono dei duplicati..") titleList=[] duplicateList=[] - duplicateNumber=0 for idx, ff in enumerate(self.fileList): if ff['title'] not in titleList: titleList.append(ff['title']) @@ -185,8 +198,8 @@ class bananaSPLITTER(threading.Thread): self.log.info("Duplicato: {}".format(ff['title'].strip())) ff['duplicate'] = True self.fileList[idx]=ff - duplicateNumber+=1 - self.log.info("Ho rimosso {} duplicati di {} articoli..\n". format(duplicateNumber, len(duplicateList))) + self.duplicateNumber+=1 + self.log.info("Ho rimosso {} duplicati di {} articoli..\n". format(self.duplicateNumber, len(duplicateList))) pass def saveSeparate(self):