corretta numerazione sequenziale dei file
This commit is contained in:
@@ -52,6 +52,8 @@ class bananaSPLITTER(threading.Thread):
|
|||||||
self.log.critical("Il file [{}] contiene caratteri non compatibili con la codifica scelta! [{}]"
|
self.log.critical("Il file [{}] contiene caratteri non compatibili con la codifica scelta! [{}]"
|
||||||
.format(self.fileParams['name'],ee))
|
.format(self.fileParams['name'],ee))
|
||||||
os.rmdir(self.outPath)
|
os.rmdir(self.outPath)
|
||||||
|
except BaseException as ee:
|
||||||
|
self.log.critical(ee)
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def openFile(self):
|
def openFile(self):
|
||||||
@@ -85,6 +87,8 @@ class bananaSPLITTER(threading.Thread):
|
|||||||
|
|
||||||
def splitFile(self): #porting del codice dal programma originale
|
def splitFile(self): #porting del codice dal programma originale
|
||||||
self.log.info("Individuo il contenuto..")
|
self.log.info("Individuo il contenuto..")
|
||||||
|
self.bodyCounter=0
|
||||||
|
self.duplicateNumber=0
|
||||||
docNumber = 0
|
docNumber = 0
|
||||||
docSkipped = 0
|
docSkipped = 0
|
||||||
docDate = {}
|
docDate = {}
|
||||||
@@ -179,6 +183,7 @@ class bananaSPLITTER(threading.Thread):
|
|||||||
newDoc['content']=copy.deepcopy(''.join(tempBody))
|
newDoc['content']=copy.deepcopy(''.join(tempBody))
|
||||||
self.fileList.append(copy.deepcopy(newDoc))
|
self.fileList.append(copy.deepcopy(newDoc))
|
||||||
tempBody=list()
|
tempBody=list()
|
||||||
|
self.log.info(self.fileName + " - {}".format(self.bodyCounter))
|
||||||
self.bodyCounter +=1
|
self.bodyCounter +=1
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
@@ -214,6 +219,7 @@ class bananaSPLITTER(threading.Thread):
|
|||||||
|
|
||||||
def saveSeparate(self):
|
def saveSeparate(self):
|
||||||
os.chdir(self.outPath)
|
os.chdir(self.outPath)
|
||||||
|
outFileCounter = 0
|
||||||
self.paths['OUTworkPath']=self.paths['OUTworkPath']+slugify(self.fileName)
|
self.paths['OUTworkPath']=self.paths['OUTworkPath']+slugify(self.fileName)
|
||||||
self.log.info("Salvo gli articoli in file separati...")
|
self.log.info("Salvo gli articoli in file separati...")
|
||||||
self.log.debug("Persorso: {0}".format(self.paths['OUTworkPath'].format('nomeFile')))
|
self.log.debug("Persorso: {0}".format(self.paths['OUTworkPath'].format('nomeFile')))
|
||||||
@@ -222,7 +228,7 @@ class bananaSPLITTER(threading.Thread):
|
|||||||
if ff['duplicate'] == False:
|
if ff['duplicate'] == False:
|
||||||
fName=self.paths['OUTnameFormat'].format(title=slugify(ff['title'][:self.settings['maxTitleLen']]),\
|
fName=self.paths['OUTnameFormat'].format(title=slugify(ff['title'][:self.settings['maxTitleLen']]),\
|
||||||
filename=slugify(self.fileName),\
|
filename=slugify(self.fileName),\
|
||||||
docnum=self.bodyCounter,\
|
docnum=outFileCounter,\
|
||||||
papername=ff['newsPaperName'].strip(),\
|
papername=ff['newsPaperName'].strip(),\
|
||||||
**ff['date'])
|
**ff['date'])
|
||||||
out=open('{0}'.format(fName),'wb')
|
out=open('{0}'.format(fName),'wb')
|
||||||
@@ -230,10 +236,12 @@ class bananaSPLITTER(threading.Thread):
|
|||||||
ff['content'] = ff['title']+os.linesep+ff['content']
|
ff['content'] = ff['title']+os.linesep+ff['content']
|
||||||
out.write(ff['content'].encode(self.settings['encoding']))
|
out.write(ff['content'].encode(self.settings['encoding']))
|
||||||
out.close()
|
out.close()
|
||||||
self.bodyCounter+=1
|
outFileCounter+=1
|
||||||
except IOError as e:
|
except IOError as e:
|
||||||
self.log.error("Qualcosa e\' andato storto, non riesco a scrivere il file: {}".format(e))
|
self.log.error("Qualcosa e\' andato storto, non riesco a scrivere il file: {}".format(e))
|
||||||
continue
|
continue
|
||||||
|
if outFileCounter < self.bodyCounter:
|
||||||
|
raise BaseException("Ho salvato meno file rispetto a quelli trovati!")
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def saveBody(self):
|
def saveBody(self):
|
||||||
|
|||||||
@@ -57,9 +57,9 @@
|
|||||||
"getNewsPaperName": true,
|
"getNewsPaperName": true,
|
||||||
"nameNotFoundStr": "ND",
|
"nameNotFoundStr": "ND",
|
||||||
"includeTitle": true,
|
"includeTitle": true,
|
||||||
"removeDuplicates": true,
|
"removeDuplicates": false,
|
||||||
"showSkipped": true,
|
"showSkipped": true,
|
||||||
"showRemovedDuplicates": true,
|
"showRemovedDuplicates": false,
|
||||||
"maxTitleLen": 32,
|
"maxTitleLen": 32,
|
||||||
"loadTXT": true,
|
"loadTXT": true,
|
||||||
"loadDOCX": false,
|
"loadDOCX": false,
|
||||||
|
|||||||
Reference in New Issue
Block a user