Il test lancia correttamente i moduli ma siamo tornati agli errori di

parsing, debug necessario...
correzioni alla librerie
2019-12-01 17:44:14 +01:00 · 2019-12-01 17:17:48 +01:00 · 2019-12-01 17:17:37 +01:00 · 2019-12-01 17:17:16 +01:00
10 changed files with 101815 additions and 15 deletions
@@ -25,7 +25,7 @@ class bananaCONF(object):
 	
 	def open(self):
 		self.log.info("Carico i file di configurazione")
-		self.fileList = glob.glob(self.workdir+"\\*.json")
+		self.fileList = glob.glob(r"*.json")
 		if len(self.fileList) > 0:
 			for f in self.fileList:
 				try:
@@ -4,11 +4,12 @@ Created on 2 nov 2019
@author: Emanuele Trabattoni
 '''
 from libfancylogger import fancyLogger
-import threading, time, parse, re, copy, slugify, os
+from slugify.slugify import slugify
+import threading, time, parse, re, copy, os

 class bananaSPLITTER(threading.Thread):
-
 	def __init__(self, fileParams=None, logger=None):
+		threading.Thread.__init__(self)
 		self.fileParams = fileParams
 		self.log = logger
 		self.rawFile = None
@@ -49,7 +50,7 @@ class bananaSPLITTER(threading.Thread):
 		
 	def openFile(self):
 		try:
-			self.info("Carico il contenuto..")
+			self.log.info("Carico il contenuto..")
 			fp = open(self.fileParams['name'], mode='r', encoding=self.settings['encoding'])
 			self.rawFile = fp.readlines()
 			fp.close()
@@ -79,6 +80,7 @@ class bananaSPLITTER(threading.Thread):
 		docNumber = 0
 		docSkipped = 0
 		docDate = {}
+		title = ''
 		prevLine = ''
 		newsPaperName = ''
 		titleBegin = False
@@ -96,14 +98,14 @@ class bananaSPLITTER(threading.Thread):
 							pass
 						else:
 							if self.settings["showSkipped"]:
-								self.log.warning("Il conto dei documenti non torna! LexisNexis \
+								self.log.warn("Il conto dei documenti non torna! LexisNexis \
 								ne ha saltato qualcuno!\nPrecedente:{0}-Attuale:{1}".format(docNumber,nn["current"]))
 							docSkipped+=1
 						docNumber = nn["current"]
 					except:
 						pass #non segnalare eccezione se il parse fallisce
 					# ricerco la data
-					if (lineWords[self.settings['monthPosition']]).capitalize() in self.docParams['dateWords']:
+					if (lineWords[self.settings['monthPosition']]).capitalize() in self.docStruct['dateWords']:
 						try:
 							docDate=parse.parse(self.docParams['dateFormat'],l).named
 							docDate['month']=docDate['month'].lstrip().rstrip().capitalize()
@@ -118,12 +120,12 @@ class bananaSPLITTER(threading.Thread):
 									else:
 										newsPaperName = self.settings['nameNotFoundStr']
 								except:
-									self.log.warning("E' successo qualcosa mentre stavo cercando il nome della pubblicazione,\
+									self.log.warn("E' successo qualcosa mentre stavo cercando il nome della pubblicazione,\
 									controlla i file di uscita! \n\t[{}]".format(prevLine.strip()))
 							else:
 								newsPaperName = self.settings['nameNotFoundStr']
 						except:
-							self.log.warning("Ho trovato una riga ambigua.. potrebbe essere una data ma non so: \n\t[{}]". format(l.strip('\r\n')))
+							self.log.warn("Ho trovato una riga ambigua.. potrebbe essere una data ma non so: \n\t[{}]". format(l.strip('\r\n')))
 							pass
 					elif lineWords[0] in self.docStruct['headWords']:
 						#cambio stato e inizializzo un nuovo documento da riempire
@@ -155,7 +157,7 @@ class bananaSPLITTER(threading.Thread):
 					self.status = 'tail'
 					anomaly = False
 				if docSep.match(l) is not None: #controlla se ci sono articoli che non hanno le parole chiave finali 
-					self.log.warning("Ho individuato una separatore valido prima che si chiusesse l'articolo precedente, controlla i tuoi file in uscita!\n\
+					self.log.warn("Ho individuato una separatore valido prima che si chiusesse l'articolo precedente, controlla i tuoi file in uscita!\n\
 					L'errore dovrebbe essere intorno all'articolo {} ma non sono sicuro! \n\t\t[{}]".format(docNumber, l.strip()))
 					self.status = 'tail' 
 					anomaly = True
@@ -178,7 +180,7 @@ class bananaSPLITTER(threading.Thread):
 		#ricerca terminata, espongo i risultati
 		self.log.info("Nel file ho trovato {0} articoli..".format(self.bodyCounter))
 		if docSkipped > 0:
-			self.log.warning("Attentione, LexisNexis ne ha saltati {} !!!".format(docSkipped))
+			self.log.warn("Attentione, LexisNexis ne ha saltati {} !!!".format(docSkipped))
 			pass
 		pass

@@ -6,8 +6,9 @@
 		"logTimeFormat": "%m-%d %H:%M:%S"
 	},
 	"splitter": {
+	"name": "",
 		"paths": {
-			"INworkPath": "D:\\Test\\",
+			"INworkPath": "D:\\Emanuele\\Documenti\\workspace\\bananaSPLIT\\TestFiles\\",
 			"OUTworkPath": "D:\\Test\\Separati\\",
 			"OUTnameFormat": "TEST_{docnum}_{year:04d}{month:02d}{day:02d}_{title}.txt"
 		},
@@ -6,8 +6,9 @@
 		"logTimeFormat": "%m-%d %H:%M:%S"
 	},
 	"splitter": {
+		"name": "",
 		"paths": {
-			"INworkPath": "D:\\Test\\",
+			"INworkPath": "D:\\Emanuele\\Documenti\\workspace\\bananaSPLIT\\TestFiles\\",
 			"OUTworkPath": "D:\\Test\\Separati\\",
 			"OUTnameFormat": "TEST_{docnum}_{year:04d}{month:02d}{day:02d}_{title}.txt"
 		},
@@ -8,11 +8,19 @@ from libsplit import bananaSPLITTER
 from libconfload import bananaCONF
 from libfancylogger import fancyLogger

-print(os.getcwd())
+print("CWD-> "+os.getcwd())
 logger = fancyLogger(fileLog = False)
 confl = bananaCONF(workdir=r"./libbananasplit", logger=logger)
-splconf = confl.use("testEN")
-splitter = bananaSPLITTER()
+confl.open()
+
+confl.use("testEN.json")
+splconf = confl.getParams("splitter")
+splconf["name"] = splconf["paths"]["INworkPath"]+"GUARDIAN 1989.txt"
+splitter = bananaSPLITTER(fileParams=splconf, logger=logger)
+splitter.start()
+splitter.join()
+
+
Author	SHA1	Message	Date
Obbart	b6000c49fd	Il test lancia correttamente i moduli ma siamo tornati agli errori di parsing, debug necessario...	2019-12-01 17:44:14 +01:00
Obbart	851dcf103d	correzioni alla librerie	2019-12-01 17:17:48 +01:00
Obbart	4118ed82c3	aggiornate le configurazioni con path file di test	2019-12-01 17:17:37 +01:00
Obbart	435bb144ab	aggiunti file di test	2019-12-01 17:17:16 +01:00