corretta numerazione sequenziale dei file
This commit is contained in:
@@ -52,6 +52,8 @@ class bananaSPLITTER(threading.Thread):
|
||||
self.log.critical("Il file [{}] contiene caratteri non compatibili con la codifica scelta! [{}]"
|
||||
.format(self.fileParams['name'],ee))
|
||||
os.rmdir(self.outPath)
|
||||
except BaseException as ee:
|
||||
self.log.critical(ee)
|
||||
pass
|
||||
|
||||
def openFile(self):
|
||||
@@ -85,6 +87,8 @@ class bananaSPLITTER(threading.Thread):
|
||||
|
||||
def splitFile(self): #porting del codice dal programma originale
|
||||
self.log.info("Individuo il contenuto..")
|
||||
self.bodyCounter=0
|
||||
self.duplicateNumber=0
|
||||
docNumber = 0
|
||||
docSkipped = 0
|
||||
docDate = {}
|
||||
@@ -179,6 +183,7 @@ class bananaSPLITTER(threading.Thread):
|
||||
newDoc['content']=copy.deepcopy(''.join(tempBody))
|
||||
self.fileList.append(copy.deepcopy(newDoc))
|
||||
tempBody=list()
|
||||
self.log.info(self.fileName + " - {}".format(self.bodyCounter))
|
||||
self.bodyCounter +=1
|
||||
pass
|
||||
else:
|
||||
@@ -214,6 +219,7 @@ class bananaSPLITTER(threading.Thread):
|
||||
|
||||
def saveSeparate(self):
|
||||
os.chdir(self.outPath)
|
||||
outFileCounter = 0
|
||||
self.paths['OUTworkPath']=self.paths['OUTworkPath']+slugify(self.fileName)
|
||||
self.log.info("Salvo gli articoli in file separati...")
|
||||
self.log.debug("Persorso: {0}".format(self.paths['OUTworkPath'].format('nomeFile')))
|
||||
@@ -222,7 +228,7 @@ class bananaSPLITTER(threading.Thread):
|
||||
if ff['duplicate'] == False:
|
||||
fName=self.paths['OUTnameFormat'].format(title=slugify(ff['title'][:self.settings['maxTitleLen']]),\
|
||||
filename=slugify(self.fileName),\
|
||||
docnum=self.bodyCounter,\
|
||||
docnum=outFileCounter,\
|
||||
papername=ff['newsPaperName'].strip(),\
|
||||
**ff['date'])
|
||||
out=open('{0}'.format(fName),'wb')
|
||||
@@ -230,10 +236,12 @@ class bananaSPLITTER(threading.Thread):
|
||||
ff['content'] = ff['title']+os.linesep+ff['content']
|
||||
out.write(ff['content'].encode(self.settings['encoding']))
|
||||
out.close()
|
||||
self.bodyCounter+=1
|
||||
outFileCounter+=1
|
||||
except IOError as e:
|
||||
self.log.error("Qualcosa e\' andato storto, non riesco a scrivere il file: {}".format(e))
|
||||
continue
|
||||
if outFileCounter < self.bodyCounter:
|
||||
raise BaseException("Ho salvato meno file rispetto a quelli trovati!")
|
||||
pass
|
||||
|
||||
def saveBody(self):
|
||||
|
||||
@@ -57,9 +57,9 @@
|
||||
"getNewsPaperName": true,
|
||||
"nameNotFoundStr": "ND",
|
||||
"includeTitle": true,
|
||||
"removeDuplicates": true,
|
||||
"removeDuplicates": false,
|
||||
"showSkipped": true,
|
||||
"showRemovedDuplicates": true,
|
||||
"showRemovedDuplicates": false,
|
||||
"maxTitleLen": 32,
|
||||
"loadTXT": true,
|
||||
"loadDOCX": false,
|
||||
|
||||
Reference in New Issue
Block a user