uso di multiprocessing al posto d threading per una concorrenza vera

serve? da decidere
This commit is contained in:
2019-12-05 22:55:49 +01:00
parent e3307d8db5
commit c5662c8397
3 changed files with 28 additions and 30 deletions

View File

@@ -3,13 +3,12 @@ Created on 2 nov 2019
@author: Emanuele Trabattoni
'''
from libfancylogger import fancyLogger
from slugify.slugify import slugify
import threading, time, parse, re, copy, os
import time, parse, re, copy, os, multiprocessing
class bananaSPLITTER(threading.Thread):
class bananaSPLITTER(multiprocessing.Process):
def __init__(self, fileParams=None, logger=None):
threading.Thread.__init__(self)
multiprocessing.Process.__init__(self)
self.fileParams = fileParams
self.log = logger
self.rawFile = None
@@ -44,7 +43,7 @@ class bananaSPLITTER(threading.Thread):
for idx, ff in enumerate(self.fileList):
ff['duplicate']=False
self.fileList[idx]=ff
print('Salto il controllo dei duplicati..')
self.log.warn('Salto il controllo dei duplicati..')
if self.settings['saveSeparateFiles']:
self.saveSeparate()
if self.settings['saveBodyFile']:
@@ -128,11 +127,11 @@ class bananaSPLITTER(threading.Thread):
newsPaperName = self.settings['nameNotFoundStr']
except:
self.log.warn("E' successo qualcosa mentre stavo cercando il nome della pubblicazione,\
controlla i file di uscita! \n\t[{}]".format(prevLine.strip()))
controlla i file di uscita! [{}]".format(prevLine.strip()))
else:
newsPaperName = self.settings['nameNotFoundStr']
except:
self.log.warn("Ho trovato una riga ambigua.. potrebbe essere una data ma non so: \n\t[{}]". format(l.strip('\r\n')))
self.log.warn("Ho trovato una riga ambigua.. potrebbe essere una data ma non so: [{}]". format(l.strip('\r\n')))
pass
elif lineWords[0] in self.docStruct['headWords']:
#cambio stato e inizializzo un nuovo documento da riempire
@@ -237,17 +236,17 @@ class bananaSPLITTER(threading.Thread):
def saveBody(self):
self.log.info('Salvo gli articoli in un singolo file vicino agli originali...')
self.log.debug('Persorso: {0}'.format(self.outPath))
os.chdir(self.outPath)
print ('Persorso: {0}'.format(self.outPath))
try:
fName=slugify(self.fileName)
fName='BODYFILE_{0}.txt'.format(fName[:self.settings['maxTitleLen']])
fileContent = os.linesep.join([cc['content'] for cc in self.fileList])
out=open(self.paths['OUTworkPath']+'{0}'.format(fName),'wb')
out=open('{0}'.format(fName),'wb')
out.write(fileContent.encode(self.settings['encoding']))
out.close()
except IOError as e:
print("OOPS! Qualcosa e\' andato storto, non riesco a scrivere il file: {}".format(e))
self.log.error("Qualcosa e\' andato storto, non riesco a scrivere il file: {}".format(e))
pass

View File

@@ -67,7 +67,7 @@
"saveSeparateFiles": true,
"saveBodyFile": true,
"saveBodyNumber": true,
"delLF": true,
"delLF": false,
"delWordBreak": true,
"delChars": [
"'",

View File

@@ -10,25 +10,24 @@ from libsplit import bananaSPLITTER
from libconfload import bananaCONF
from libfancylogger import fancyLogger
print("CWD-> "+os.getcwd())
logger = fancyLogger(fileLog = False)
confl = bananaCONF(workdir=r"./libbananasplit", logger=logger)
confl.open()
confl.use("testEN.json")
splconf = confl.getParams("splitter")
splist = []
os.chdir(splconf["paths"]["INworkPath"])
for f in glob("*.txt"):
splconf["name"] = f
logger.info("-"*80)
splitter = bananaSPLITTER(fileParams=deepcopy(splconf), logger=logger)
splist.append(splitter)
splitter.start()
splitter.join()
del splitter
logger.info("\n"+"="*50+"\n\tFINITO!!!\n"+"="*50)
if __name__ == "__main__":
print("CWD-> "+os.getcwd())
logger = fancyLogger(fileLog = False)
confl = bananaCONF(workdir=r"./libbananasplit", logger=logger)
confl.open()
confl.use("testEN.json")
splconf = confl.getParams("splitter")
splist = []
os.chdir(splconf["paths"]["INworkPath"])
for f in glob("*.txt"):
splconf["name"] = f
logger.info("-"*80)
splitter = bananaSPLITTER(fileParams=deepcopy(splconf), logger=logger)
splist.append(splitter)
splitter.start()
logger.info("\n"+"="*50+"\n\tFINITO!!!\n"+"="*50)