correzioni procedura salvataggio file e cartelle

This commit is contained in:
2019-12-05 20:26:50 +01:00
parent 01dd92e4da
commit e3307d8db5
4 changed files with 43 additions and 35 deletions

View File

@@ -23,7 +23,9 @@ class bananaSPLITTER(threading.Thread):
self.docStruct = self.fileParams['docStruct'] self.docStruct = self.fileParams['docStruct']
self.settings = self.fileParams['settings'] self.settings = self.fileParams['settings']
self.fileName = self.fileParams['name'] self.fileName = self.fileParams['name']
self.outPath = self.paths['OUTworkPath']+slugify(self.fileName)
self.beginTime = time.time() self.beginTime = time.time()
os.mkdir(self.outPath)
pass pass
else: else:
self.log.critical("Non e' stato fornito il nome di alcun file da splittare!") self.log.critical("Non e' stato fornito il nome di alcun file da splittare!")
@@ -31,6 +33,7 @@ class bananaSPLITTER(threading.Thread):
def run(self): def run(self):
self.log.info("Nuovo SPLITTER su file: {}".format(self.fileName)) self.log.info("Nuovo SPLITTER su file: {}".format(self.fileName))
try:
self.openFile() self.openFile()
self.remEmptyLines() self.remEmptyLines()
self.splitFile() self.splitFile()
@@ -46,10 +49,14 @@ class bananaSPLITTER(threading.Thread):
self.saveSeparate() self.saveSeparate()
if self.settings['saveBodyFile']: if self.settings['saveBodyFile']:
self.saveBody() self.saveBody()
except UnicodeDecodeError as ee:
self.log.critical("Il file [{}] contiene caratteri non compatibili con la codifica scelta! [{}]"
.format(self.fileParams['name'],ee))
pass pass
def openFile(self): def openFile(self):
try: try:
os.chdir(self.paths["INworkPath"])
self.log.info("Carico il contenuto..") self.log.info("Carico il contenuto..")
fp = open(self.fileParams['name'], mode='r', encoding=self.settings['encoding']) fp = open(self.fileParams['name'], mode='r', encoding=self.settings['encoding'])
self.rawFile = fp.readlines() self.rawFile = fp.readlines()
@@ -57,9 +64,6 @@ class bananaSPLITTER(threading.Thread):
except IOError as e: except IOError as e:
self.log.critical("Impossibile aprire il file: {}! [{}]".format(self.fileName,e)) self.log.critical("Impossibile aprire il file: {}! [{}]".format(self.fileName,e))
raise BaseException("OpenFile") raise BaseException("OpenFile")
except UnicodeDecodeError as ee:
self.log.critical("Il file [{}] contiene caratteri non compatibili con la codifica scelta! [{}]"
.format(self.fileParams['name'],ee))
pass pass
def remEmptyLines(self): def remEmptyLines(self):
@@ -204,11 +208,11 @@ class bananaSPLITTER(threading.Thread):
ff['duplicate'] = True ff['duplicate'] = True
self.fileList[idx]=ff self.fileList[idx]=ff
self.duplicateNumber+=1 self.duplicateNumber+=1
self.log.info("Ho rimosso {} duplicati di {} articoli..\n". format(self.duplicateNumber, len(duplicateList))) self.log.info("Ho rimosso {} duplicati di {} articoli..". format(self.duplicateNumber, len(duplicateList)))
pass pass
def saveSeparate(self): def saveSeparate(self):
os.mkdir(self.paths['OUTworkPath']+slugify(self.fileName)) os.chdir(self.outPath)
self.paths['OUTworkPath']=self.paths['OUTworkPath']+slugify(self.fileName) self.paths['OUTworkPath']=self.paths['OUTworkPath']+slugify(self.fileName)
self.log.info("Salvo gli articoli in file separati...") self.log.info("Salvo gli articoli in file separati...")
self.log.debug("Persorso: {0}".format(self.paths['OUTworkPath'].format('nomeFile'))) self.log.debug("Persorso: {0}".format(self.paths['OUTworkPath'].format('nomeFile')))
@@ -220,7 +224,7 @@ class bananaSPLITTER(threading.Thread):
docnum=self.bodyCounter,\ docnum=self.bodyCounter,\
papername=ff['newsPaperName'].strip(),\ papername=ff['newsPaperName'].strip(),\
**ff['date']) **ff['date'])
out=open(self.paths['OUTworkPath']+'{0}'.format(fName),'wb') out=open('{0}'.format(fName),'wb')
if self.settings['includeTitle']: if self.settings['includeTitle']:
ff['content'] = ff['title']+os.linesep+ff['content'] ff['content'] = ff['title']+os.linesep+ff['content']
out.write(ff['content'].encode(self.settings['encoding'])) out.write(ff['content'].encode(self.settings['encoding']))
@@ -232,8 +236,9 @@ class bananaSPLITTER(threading.Thread):
pass pass
def saveBody(self): def saveBody(self):
print('Salvo gli articoli in un singolo file vicino agli originali...') self.log.info('Salvo gli articoli in un singolo file vicino agli originali...')
print ('Persorso: {0}'.format(self.paths['OUTworkPath'].format('nomeFile'))) os.chdir(self.outPath)
print ('Persorso: {0}'.format(self.outPath))
try: try:
fName=slugify(self.fileName) fName=slugify(self.fileName)
fName='BODYFILE_{0}.txt'.format(fName[:self.settings['maxTitleLen']]) fName='BODYFILE_{0}.txt'.format(fName[:self.settings['maxTitleLen']])

View File

@@ -9,7 +9,7 @@
"name": "", "name": "",
"paths": { "paths": {
"INworkPath": "D:\\Emanuele\\Documenti\\workspace\\bananaSPLIT\\TestFiles\\", "INworkPath": "D:\\Emanuele\\Documenti\\workspace\\bananaSPLIT\\TestFiles\\",
"OUTworkPath": "D:\\Test\\Separati\\", "OUTworkPath": "H:\\",
"OUTnameFormat": "TEST_{docnum}_{year:04d}{month:02d}{day:02d}_{title}.txt" "OUTnameFormat": "TEST_{docnum}_{year:04d}{month:02d}{day:02d}_{title}.txt"
}, },
"docStruct": { "docStruct": {

View File

@@ -9,7 +9,7 @@
"name": "", "name": "",
"paths": { "paths": {
"INworkPath": "D:\\Emanuele\\Documenti\\workspace\\bananaSPLIT\\TestFiles\\", "INworkPath": "D:\\Emanuele\\Documenti\\workspace\\bananaSPLIT\\TestFiles\\",
"OUTworkPath": "D:\\Test\\Separati\\", "OUTworkPath": "H:\\",
"OUTnameFormat": "TEST_{docnum}_{year:04d}{month:02d}{day:02d}_{title}.txt" "OUTnameFormat": "TEST_{docnum}_{year:04d}{month:02d}{day:02d}_{title}.txt"
}, },
"docStruct": { "docStruct": {

View File

@@ -5,6 +5,7 @@ Created on 1 dic 2019
''' '''
import os import os
from glob import glob from glob import glob
from copy import deepcopy
from libsplit import bananaSPLITTER from libsplit import bananaSPLITTER
from libconfload import bananaCONF from libconfload import bananaCONF
from libfancylogger import fancyLogger from libfancylogger import fancyLogger
@@ -16,16 +17,18 @@ confl.open()
confl.use("testEN.json") confl.use("testEN.json")
splconf = confl.getParams("splitter") splconf = confl.getParams("splitter")
for f in glob(splconf["paths"]["INworkPath"]+"*.txt"): splist = []
os.chdir(splconf["paths"]["INworkPath"])
for f in glob("*.txt"):
splconf["name"] = f splconf["name"] = f
splitter = bananaSPLITTER(fileParams=splconf, logger=logger) logger.info("-"*80)
splitter.openFile() splitter = bananaSPLITTER(fileParams=deepcopy(splconf), logger=logger)
splitter.remEmptyLines() splist.append(splitter)
splitter.splitFile() splitter.start()
splitter.removeDuplicates() splitter.join()
splitter.saveBody() del splitter
splitter.saveSeparate()
logger.info("\n"+"="*50+"\n\tFINITO!!!\n"+"="*50)