correzioni procedura salvataggio file e cartelle
This commit is contained in:
@@ -23,7 +23,9 @@ class bananaSPLITTER(threading.Thread):
|
|||||||
self.docStruct = self.fileParams['docStruct']
|
self.docStruct = self.fileParams['docStruct']
|
||||||
self.settings = self.fileParams['settings']
|
self.settings = self.fileParams['settings']
|
||||||
self.fileName = self.fileParams['name']
|
self.fileName = self.fileParams['name']
|
||||||
|
self.outPath = self.paths['OUTworkPath']+slugify(self.fileName)
|
||||||
self.beginTime = time.time()
|
self.beginTime = time.time()
|
||||||
|
os.mkdir(self.outPath)
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
self.log.critical("Non e' stato fornito il nome di alcun file da splittare!")
|
self.log.critical("Non e' stato fornito il nome di alcun file da splittare!")
|
||||||
@@ -31,6 +33,7 @@ class bananaSPLITTER(threading.Thread):
|
|||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
self.log.info("Nuovo SPLITTER su file: {}".format(self.fileName))
|
self.log.info("Nuovo SPLITTER su file: {}".format(self.fileName))
|
||||||
|
try:
|
||||||
self.openFile()
|
self.openFile()
|
||||||
self.remEmptyLines()
|
self.remEmptyLines()
|
||||||
self.splitFile()
|
self.splitFile()
|
||||||
@@ -46,10 +49,14 @@ class bananaSPLITTER(threading.Thread):
|
|||||||
self.saveSeparate()
|
self.saveSeparate()
|
||||||
if self.settings['saveBodyFile']:
|
if self.settings['saveBodyFile']:
|
||||||
self.saveBody()
|
self.saveBody()
|
||||||
|
except UnicodeDecodeError as ee:
|
||||||
|
self.log.critical("Il file [{}] contiene caratteri non compatibili con la codifica scelta! [{}]"
|
||||||
|
.format(self.fileParams['name'],ee))
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def openFile(self):
|
def openFile(self):
|
||||||
try:
|
try:
|
||||||
|
os.chdir(self.paths["INworkPath"])
|
||||||
self.log.info("Carico il contenuto..")
|
self.log.info("Carico il contenuto..")
|
||||||
fp = open(self.fileParams['name'], mode='r', encoding=self.settings['encoding'])
|
fp = open(self.fileParams['name'], mode='r', encoding=self.settings['encoding'])
|
||||||
self.rawFile = fp.readlines()
|
self.rawFile = fp.readlines()
|
||||||
@@ -57,9 +64,6 @@ class bananaSPLITTER(threading.Thread):
|
|||||||
except IOError as e:
|
except IOError as e:
|
||||||
self.log.critical("Impossibile aprire il file: {}! [{}]".format(self.fileName,e))
|
self.log.critical("Impossibile aprire il file: {}! [{}]".format(self.fileName,e))
|
||||||
raise BaseException("OpenFile")
|
raise BaseException("OpenFile")
|
||||||
except UnicodeDecodeError as ee:
|
|
||||||
self.log.critical("Il file [{}] contiene caratteri non compatibili con la codifica scelta! [{}]"
|
|
||||||
.format(self.fileParams['name'],ee))
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def remEmptyLines(self):
|
def remEmptyLines(self):
|
||||||
@@ -204,11 +208,11 @@ class bananaSPLITTER(threading.Thread):
|
|||||||
ff['duplicate'] = True
|
ff['duplicate'] = True
|
||||||
self.fileList[idx]=ff
|
self.fileList[idx]=ff
|
||||||
self.duplicateNumber+=1
|
self.duplicateNumber+=1
|
||||||
self.log.info("Ho rimosso {} duplicati di {} articoli..\n". format(self.duplicateNumber, len(duplicateList)))
|
self.log.info("Ho rimosso {} duplicati di {} articoli..". format(self.duplicateNumber, len(duplicateList)))
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def saveSeparate(self):
|
def saveSeparate(self):
|
||||||
os.mkdir(self.paths['OUTworkPath']+slugify(self.fileName))
|
os.chdir(self.outPath)
|
||||||
self.paths['OUTworkPath']=self.paths['OUTworkPath']+slugify(self.fileName)
|
self.paths['OUTworkPath']=self.paths['OUTworkPath']+slugify(self.fileName)
|
||||||
self.log.info("Salvo gli articoli in file separati...")
|
self.log.info("Salvo gli articoli in file separati...")
|
||||||
self.log.debug("Persorso: {0}".format(self.paths['OUTworkPath'].format('nomeFile')))
|
self.log.debug("Persorso: {0}".format(self.paths['OUTworkPath'].format('nomeFile')))
|
||||||
@@ -220,7 +224,7 @@ class bananaSPLITTER(threading.Thread):
|
|||||||
docnum=self.bodyCounter,\
|
docnum=self.bodyCounter,\
|
||||||
papername=ff['newsPaperName'].strip(),\
|
papername=ff['newsPaperName'].strip(),\
|
||||||
**ff['date'])
|
**ff['date'])
|
||||||
out=open(self.paths['OUTworkPath']+'{0}'.format(fName),'wb')
|
out=open('{0}'.format(fName),'wb')
|
||||||
if self.settings['includeTitle']:
|
if self.settings['includeTitle']:
|
||||||
ff['content'] = ff['title']+os.linesep+ff['content']
|
ff['content'] = ff['title']+os.linesep+ff['content']
|
||||||
out.write(ff['content'].encode(self.settings['encoding']))
|
out.write(ff['content'].encode(self.settings['encoding']))
|
||||||
@@ -232,8 +236,9 @@ class bananaSPLITTER(threading.Thread):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
def saveBody(self):
|
def saveBody(self):
|
||||||
print('Salvo gli articoli in un singolo file vicino agli originali...')
|
self.log.info('Salvo gli articoli in un singolo file vicino agli originali...')
|
||||||
print ('Persorso: {0}'.format(self.paths['OUTworkPath'].format('nomeFile')))
|
os.chdir(self.outPath)
|
||||||
|
print ('Persorso: {0}'.format(self.outPath))
|
||||||
try:
|
try:
|
||||||
fName=slugify(self.fileName)
|
fName=slugify(self.fileName)
|
||||||
fName='BODYFILE_{0}.txt'.format(fName[:self.settings['maxTitleLen']])
|
fName='BODYFILE_{0}.txt'.format(fName[:self.settings['maxTitleLen']])
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"name": "",
|
"name": "",
|
||||||
"paths": {
|
"paths": {
|
||||||
"INworkPath": "D:\\Emanuele\\Documenti\\workspace\\bananaSPLIT\\TestFiles\\",
|
"INworkPath": "D:\\Emanuele\\Documenti\\workspace\\bananaSPLIT\\TestFiles\\",
|
||||||
"OUTworkPath": "D:\\Test\\Separati\\",
|
"OUTworkPath": "H:\\",
|
||||||
"OUTnameFormat": "TEST_{docnum}_{year:04d}{month:02d}{day:02d}_{title}.txt"
|
"OUTnameFormat": "TEST_{docnum}_{year:04d}{month:02d}{day:02d}_{title}.txt"
|
||||||
},
|
},
|
||||||
"docStruct": {
|
"docStruct": {
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
"name": "",
|
"name": "",
|
||||||
"paths": {
|
"paths": {
|
||||||
"INworkPath": "D:\\Emanuele\\Documenti\\workspace\\bananaSPLIT\\TestFiles\\",
|
"INworkPath": "D:\\Emanuele\\Documenti\\workspace\\bananaSPLIT\\TestFiles\\",
|
||||||
"OUTworkPath": "D:\\Test\\Separati\\",
|
"OUTworkPath": "H:\\",
|
||||||
"OUTnameFormat": "TEST_{docnum}_{year:04d}{month:02d}{day:02d}_{title}.txt"
|
"OUTnameFormat": "TEST_{docnum}_{year:04d}{month:02d}{day:02d}_{title}.txt"
|
||||||
},
|
},
|
||||||
"docStruct": {
|
"docStruct": {
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ Created on 1 dic 2019
|
|||||||
'''
|
'''
|
||||||
import os
|
import os
|
||||||
from glob import glob
|
from glob import glob
|
||||||
|
from copy import deepcopy
|
||||||
from libsplit import bananaSPLITTER
|
from libsplit import bananaSPLITTER
|
||||||
from libconfload import bananaCONF
|
from libconfload import bananaCONF
|
||||||
from libfancylogger import fancyLogger
|
from libfancylogger import fancyLogger
|
||||||
@@ -16,16 +17,18 @@ confl.open()
|
|||||||
|
|
||||||
confl.use("testEN.json")
|
confl.use("testEN.json")
|
||||||
splconf = confl.getParams("splitter")
|
splconf = confl.getParams("splitter")
|
||||||
for f in glob(splconf["paths"]["INworkPath"]+"*.txt"):
|
splist = []
|
||||||
|
os.chdir(splconf["paths"]["INworkPath"])
|
||||||
|
for f in glob("*.txt"):
|
||||||
splconf["name"] = f
|
splconf["name"] = f
|
||||||
splitter = bananaSPLITTER(fileParams=splconf, logger=logger)
|
logger.info("-"*80)
|
||||||
splitter.openFile()
|
splitter = bananaSPLITTER(fileParams=deepcopy(splconf), logger=logger)
|
||||||
splitter.remEmptyLines()
|
splist.append(splitter)
|
||||||
splitter.splitFile()
|
splitter.start()
|
||||||
splitter.removeDuplicates()
|
splitter.join()
|
||||||
splitter.saveBody()
|
del splitter
|
||||||
splitter.saveSeparate()
|
|
||||||
|
logger.info("\n"+"="*50+"\n\tFINITO!!!\n"+"="*50)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user