corretta numerazione sequenziale dei file

This commit is contained in:
2019-12-13 14:26:07 +01:00
parent 275101eed3
commit 82710a73a3
2 changed files with 12 additions and 4 deletions

View File

@@ -52,6 +52,8 @@ class bananaSPLITTER(threading.Thread):
self.log.critical("Il file [{}] contiene caratteri non compatibili con la codifica scelta! [{}]"
.format(self.fileParams['name'],ee))
os.rmdir(self.outPath)
except BaseException as ee:
self.log.critical(ee)
pass
def openFile(self):
@@ -85,6 +87,8 @@ class bananaSPLITTER(threading.Thread):
def splitFile(self): #porting del codice dal programma originale
self.log.info("Individuo il contenuto..")
self.bodyCounter=0
self.duplicateNumber=0
docNumber = 0
docSkipped = 0
docDate = {}
@@ -179,6 +183,7 @@ class bananaSPLITTER(threading.Thread):
newDoc['content']=copy.deepcopy(''.join(tempBody))
self.fileList.append(copy.deepcopy(newDoc))
tempBody=list()
self.log.info(self.fileName + " - {}".format(self.bodyCounter))
self.bodyCounter +=1
pass
else:
@@ -214,6 +219,7 @@ class bananaSPLITTER(threading.Thread):
def saveSeparate(self):
os.chdir(self.outPath)
outFileCounter = 0
self.paths['OUTworkPath']=self.paths['OUTworkPath']+slugify(self.fileName)
self.log.info("Salvo gli articoli in file separati...")
self.log.debug("Persorso: {0}".format(self.paths['OUTworkPath'].format('nomeFile')))
@@ -222,7 +228,7 @@ class bananaSPLITTER(threading.Thread):
if ff['duplicate'] == False:
fName=self.paths['OUTnameFormat'].format(title=slugify(ff['title'][:self.settings['maxTitleLen']]),\
filename=slugify(self.fileName),\
docnum=self.bodyCounter,\
docnum=outFileCounter,\
papername=ff['newsPaperName'].strip(),\
**ff['date'])
out=open('{0}'.format(fName),'wb')
@@ -230,10 +236,12 @@ class bananaSPLITTER(threading.Thread):
ff['content'] = ff['title']+os.linesep+ff['content']
out.write(ff['content'].encode(self.settings['encoding']))
out.close()
self.bodyCounter+=1
outFileCounter+=1
except IOError as e:
self.log.error("Qualcosa e\' andato storto, non riesco a scrivere il file: {}".format(e))
continue
if outFileCounter < self.bodyCounter:
raise BaseException("Ho salvato meno file rispetto a quelli trovati!")
pass
def saveBody(self):

View File

@@ -57,9 +57,9 @@
"getNewsPaperName": true,
"nameNotFoundStr": "ND",
"includeTitle": true,
"removeDuplicates": true,
"removeDuplicates": false,
"showSkipped": true,
"showRemovedDuplicates": true,
"showRemovedDuplicates": false,
"maxTitleLen": 32,
"loadTXT": true,
"loadDOCX": false,