uso di multiprocessing al posto d threading per una concorrenza vera

serve? da decidere
This commit is contained in:
2019-12-05 22:55:49 +01:00
parent e3307d8db5
commit c5662c8397
3 changed files with 28 additions and 30 deletions

View File

@@ -3,13 +3,12 @@ Created on 2 nov 2019
@author: Emanuele Trabattoni @author: Emanuele Trabattoni
''' '''
from libfancylogger import fancyLogger
from slugify.slugify import slugify from slugify.slugify import slugify
import threading, time, parse, re, copy, os import time, parse, re, copy, os, multiprocessing
class bananaSPLITTER(threading.Thread): class bananaSPLITTER(multiprocessing.Process):
def __init__(self, fileParams=None, logger=None): def __init__(self, fileParams=None, logger=None):
threading.Thread.__init__(self) multiprocessing.Process.__init__(self)
self.fileParams = fileParams self.fileParams = fileParams
self.log = logger self.log = logger
self.rawFile = None self.rawFile = None
@@ -44,7 +43,7 @@ class bananaSPLITTER(threading.Thread):
for idx, ff in enumerate(self.fileList): for idx, ff in enumerate(self.fileList):
ff['duplicate']=False ff['duplicate']=False
self.fileList[idx]=ff self.fileList[idx]=ff
print('Salto il controllo dei duplicati..') self.log.warn('Salto il controllo dei duplicati..')
if self.settings['saveSeparateFiles']: if self.settings['saveSeparateFiles']:
self.saveSeparate() self.saveSeparate()
if self.settings['saveBodyFile']: if self.settings['saveBodyFile']:
@@ -128,11 +127,11 @@ class bananaSPLITTER(threading.Thread):
newsPaperName = self.settings['nameNotFoundStr'] newsPaperName = self.settings['nameNotFoundStr']
except: except:
self.log.warn("E' successo qualcosa mentre stavo cercando il nome della pubblicazione,\ self.log.warn("E' successo qualcosa mentre stavo cercando il nome della pubblicazione,\
controlla i file di uscita! \n\t[{}]".format(prevLine.strip())) controlla i file di uscita! [{}]".format(prevLine.strip()))
else: else:
newsPaperName = self.settings['nameNotFoundStr'] newsPaperName = self.settings['nameNotFoundStr']
except: except:
self.log.warn("Ho trovato una riga ambigua.. potrebbe essere una data ma non so: \n\t[{}]". format(l.strip('\r\n'))) self.log.warn("Ho trovato una riga ambigua.. potrebbe essere una data ma non so: [{}]". format(l.strip('\r\n')))
pass pass
elif lineWords[0] in self.docStruct['headWords']: elif lineWords[0] in self.docStruct['headWords']:
#cambio stato e inizializzo un nuovo documento da riempire #cambio stato e inizializzo un nuovo documento da riempire
@@ -237,17 +236,17 @@ class bananaSPLITTER(threading.Thread):
def saveBody(self): def saveBody(self):
self.log.info('Salvo gli articoli in un singolo file vicino agli originali...') self.log.info('Salvo gli articoli in un singolo file vicino agli originali...')
self.log.debug('Persorso: {0}'.format(self.outPath))
os.chdir(self.outPath) os.chdir(self.outPath)
print ('Persorso: {0}'.format(self.outPath))
try: try:
fName=slugify(self.fileName) fName=slugify(self.fileName)
fName='BODYFILE_{0}.txt'.format(fName[:self.settings['maxTitleLen']]) fName='BODYFILE_{0}.txt'.format(fName[:self.settings['maxTitleLen']])
fileContent = os.linesep.join([cc['content'] for cc in self.fileList]) fileContent = os.linesep.join([cc['content'] for cc in self.fileList])
out=open(self.paths['OUTworkPath']+'{0}'.format(fName),'wb') out=open('{0}'.format(fName),'wb')
out.write(fileContent.encode(self.settings['encoding'])) out.write(fileContent.encode(self.settings['encoding']))
out.close() out.close()
except IOError as e: except IOError as e:
print("OOPS! Qualcosa e\' andato storto, non riesco a scrivere il file: {}".format(e)) self.log.error("Qualcosa e\' andato storto, non riesco a scrivere il file: {}".format(e))
pass pass

View File

@@ -67,7 +67,7 @@
"saveSeparateFiles": true, "saveSeparateFiles": true,
"saveBodyFile": true, "saveBodyFile": true,
"saveBodyNumber": true, "saveBodyNumber": true,
"delLF": true, "delLF": false,
"delWordBreak": true, "delWordBreak": true,
"delChars": [ "delChars": [
"'", "'",

View File

@@ -10,6 +10,7 @@ from libsplit import bananaSPLITTER
from libconfload import bananaCONF from libconfload import bananaCONF
from libfancylogger import fancyLogger from libfancylogger import fancyLogger
if __name__ == "__main__":
print("CWD-> "+os.getcwd()) print("CWD-> "+os.getcwd())
logger = fancyLogger(fileLog = False) logger = fancyLogger(fileLog = False)
confl = bananaCONF(workdir=r"./libbananasplit", logger=logger) confl = bananaCONF(workdir=r"./libbananasplit", logger=logger)
@@ -25,8 +26,6 @@ for f in glob("*.txt"):
splitter = bananaSPLITTER(fileParams=deepcopy(splconf), logger=logger) splitter = bananaSPLITTER(fileParams=deepcopy(splconf), logger=logger)
splist.append(splitter) splist.append(splitter)
splitter.start() splitter.start()
splitter.join()
del splitter
logger.info("\n"+"="*50+"\n\tFINITO!!!\n"+"="*50) logger.info("\n"+"="*50+"\n\tFINITO!!!\n"+"="*50)