Cambiamenti che non mi ricordo, mi hanno distratto nel mentre

This commit is contained in:
2019-11-24 12:48:30 +01:00
parent 04a32c7bd1
commit 6e83ce559d

View File

@@ -15,6 +15,7 @@ class bananaSPLITTER(threading.Thread):
self.status = "first" self.status = "first"
self.fileList = list() self.fileList = list()
self.bodyCounter=0 self.bodyCounter=0
self.duplicateNumber=0
if fileParams is not None: if fileParams is not None:
self.log.info("Sto operando sul file: {}..".format(self.fileParams['name'])) self.log.info("Sto operando sul file: {}..".format(self.fileParams['name']))
self.paths = self.fileParams['paths'] self.paths = self.fileParams['paths']
@@ -28,8 +29,22 @@ class bananaSPLITTER(threading.Thread):
pass pass
def run(self): def run(self):
self.log.info("Nuovo SPLITTER su file: {}".format(self.fileName))
self.openFile() self.openFile()
self.remEmptyLines()
self.splitFile()
if self.settings['removeDuplicates']:
self.log.info("Controllo se ci sono dei duplicati..")
self.removeDuplicates()
else:
for idx, ff in enumerate(self.fileList):
ff['duplicate']=False
self.fileList[idx]=ff
print('Salto il controllo dei duplicati..')
if self.settings['saveSeparateFiles']:
self.saveSeparate()
if self.settings['saveBodyFile']:
self.saveBody()
pass pass
def openFile(self): def openFile(self):
@@ -42,7 +57,7 @@ class bananaSPLITTER(threading.Thread):
self.log.critical("Impossibile aprire il file: {}! [{}]".format(self.fileName,e)) self.log.critical("Impossibile aprire il file: {}! [{}]".format(self.fileName,e))
raise BaseException("OpenFile") raise BaseException("OpenFile")
pass pass
def remEmptyLines(self): def remEmptyLines(self):
self.log.info("Elimino righe vuote e caratteri inutili..") self.log.info("Elimino righe vuote e caratteri inutili..")
tempContent = [] tempContent = []
@@ -168,10 +183,8 @@ class bananaSPLITTER(threading.Thread):
pass pass
def removeDuplicates(self): def removeDuplicates(self):
self.log.info("Controllo se ci sono dei duplicati..")
titleList=[] titleList=[]
duplicateList=[] duplicateList=[]
duplicateNumber=0
for idx, ff in enumerate(self.fileList): for idx, ff in enumerate(self.fileList):
if ff['title'] not in titleList: if ff['title'] not in titleList:
titleList.append(ff['title']) titleList.append(ff['title'])
@@ -185,8 +198,8 @@ class bananaSPLITTER(threading.Thread):
self.log.info("Duplicato: {}".format(ff['title'].strip())) self.log.info("Duplicato: {}".format(ff['title'].strip()))
ff['duplicate'] = True ff['duplicate'] = True
self.fileList[idx]=ff self.fileList[idx]=ff
duplicateNumber+=1 self.duplicateNumber+=1
self.log.info("Ho rimosso {} duplicati di {} articoli..\n". format(duplicateNumber, len(duplicateList))) self.log.info("Ho rimosso {} duplicati di {} articoli..\n". format(self.duplicateNumber, len(duplicateList)))
pass pass
def saveSeparate(self): def saveSeparate(self):