non riesco a provare le modifiche, il debugger di python mi prende per

il culo
2020-03-30 14:52:09 +02:00
parent 989aa755dd
commit 6d43c564b8
6 changed files with 133 additions and 39 deletions
--- a/bananaSPLIT/conf/conftest.json
+++ b/bananaSPLIT/conf/conftest.json
@@ -0,0 +1,109 @@
 {
  "version": "v1.1a",
  "paths": {
    "lastUsed": "defaults.json",
    "configurationPath": "D:/Emanuele/Documenti/workspace/bananaSPLIT/bananaSPLIT/conf/",
    "INworkPath": "D:/Emanuele/Documenti/workspace/bananaSPLIT/TestFiles/File nuovo formato",
    "OUTworkPath": "H:/",
    "fileList": [
      "_ Women, Drugs And Depression.txt",
      "_96 Election Represents Last Hurrah of Generational Politics.txt",
      "_A 60_S GENERATION MADE BY MYTH-MAKERS.txt",
      "_MIDDLE AGE_ NO END TO AN UPWARD CREEP.txt",
      "A DEMOCRATIC SOCIETY IN NEEDOF A MILITARY.txt",
      "A Dog Who Taught the Lesson of Hope.txt",
      "A Fading Bohemia, Gritty but Beloved.txt"
    ]
  },
  "docStruct": {
    "fileVersNew": true,
    "language": {
      "dateWords": [
        "January",
        "February",
        "March",
        "April",
        "May",
        "June",
        "July",
        "August",
        "September",
        "October",
        "November",
        "December"
      ],
      "headWords": [
        "BYLINE:",
        "SECTION:",
        "LENGTH:",
        "DATELINE:",
        "HIGHLIGHT:",
        "Email:"
      ],
      "tailWords": [
        "Newstex ID:",
        "NOTES:",
        "LANGUAGE:",
        "GRAPHIC:",
        "TYPE:",
        "URL:",
        "LOAD-DATE:",
        "PUBLICATION-TYPE:",
        "DOCUMENT-TYPE:",
        "CHARTS:",
        "JOURNAL-CODE:"
      ]
    },
    "docSep": "\\s*Copyright [(0-9)]+",
    "beginOfDocument": "\\s*Body",
    "endOfDocument": "\\s*End of Document",
    "dateFormat": "{month} {day:d}, {year:d}{}",
    "outPrefix": "PRE",
    "outSuffix": "SUF",
    "outExt": ".txt",
    "outDateType": "jpn",
    "outNameFormat": "PRE+{docnum}+{year:04d}{month:02d}{day:02d}+{title}+SUF.txt",
    "outDate": true,
    "outTitle": true,
    "outNumber": true,
    "numberPos": 1,
    "datePos": 2,
    "titlePos": 3,
    "maxTitleLen": 10,
    "outNameSep": "+",
    "customSep": "=",
    "dateFormats": {
      "jpn": "{year:04d}{month:02d}{day:02d}",
      "it": "{day:02d}{month:02d}{year:04d}",
      "usa": "{month:02d}{year:04d}{day:02d}"
    }
  },
  "settings": {
    "encoding": "ansi",
    "monthPosition": 0,
    "getNewsPaperName": true,
    "nameNotFoundStr": "ND",
    "includeTitle": true,
    "removeDuplicates": true,
    "showSkipped": false,
    "showRemovedDuplicates": false,
    "loadTXT": true,
    "loadDOCX": false,
    "removeOldFiles": true,
    "saveSeparateFiles": true,
    "saveBodyFile": true,
    "saveBodyNumber": true,
    "delLF": false,
    "delWordBreak": true,
    "delChars": [
      "'",
      "@",
      "#",
      "$",
      "%",
      "^",
      "&"
    ]
  },
  "name": "_ Women, Drugs And Depression.txt"
 }
--- a/bananaSPLIT/conf/defaults.json
+++ b/bananaSPLIT/conf/defaults.json
@@ -24,16 +24,15 @@
 			"NYT 2009.txt",
 			"NYT 2013.txt",
 			"NYT 2015.txt",
-			"NYT 2017.txt",
+			"NYT 2017.txt"
 			"README"
 		]
 	},
 	"docStruct": {
 		"fileVersNew": true,
-		"language": "English",
+		"language": "Italiano",
 		"docSep": "\\s*Copyright [(0-9)]+",
-		"beginOfDocument":"\\s*Body",
+		"beginOfDocument": "\\s*Body",
-		"endOfDocument":"\\s*End of Document",
+		"endOfDocument": "\\s*End of Document",
 		"dateFormat": "{month} {day:d}, {year:d}{}",
 		"outPrefix": "PRE",
 		"outSuffix": "SUF",
--- a/bananaSPLIT/convert.py
+++ b/bananaSPLIT/convert.py
@@ -12,8 +12,8 @@ if __name__ == '__main__':
 		print(f)
 		try:
 			txt=docx2txt.process(f)
-			with open(f.replace('.docx', '.txt'), 'w') as fp:
+			with open(f.replace('.docx', '.txt'), 'wb') as fp:
-				fp.write(txt)
+				fp.write(txt.encode('ansi'))
 				fp.close()
 		except Exception as e:
 			print(e)
--- a/bananaSPLIT/guimain.py
+++ b/bananaSPLIT/guimain.py
@@ -92,11 +92,11 @@ class bananaMain(PyQt5.QtWidgets.QMainWindow):
 	def fillFileList(self):
 		fl = QDir(self.conf.getParam('paths','INworkPath'))
 		fl.setNameFilters(['*.txt','*.TXT'])
-		fl.entryList(QDir.NoDotAndDotDot | QDir.Files)
+		fl = fl.entryList(QDir.NoDotAndDotDot | QDir.Files)
 		self.ui.lst_files.clear()
 		for f in fl:
 			self.ui.lst_files.addItem(PyQt5.QtWidgets.QListWidgetItem(f))
-			self.conf.setParam(('paths', 'fileList'), fl)
+		self.conf.setParam(('paths', 'fileList'), fl)
 	def nextTab(self):
 		self.ui.wgt_main.setCurrentIndex(self.ui.wgt_main.currentIndex()+1)
@@ -455,11 +455,12 @@ class bananaSelezOut(PyQt5.QtWidgets.QWidget):
 		# costruisco i thread
 		tDict={}
 		try:
-			for f in splconf['paths']['fileList']:
+			for f in [splconf['paths']['fileList'][0]]:
 				splconf['name']=f
 				tDict[f] = bananaSPLITTER(fileParams=copy.deepcopy(splconf), logger=self.log)
 				tDict[f].run()
 				#tDict[f].sendStatus.connect(updateState)
-				QThreadPool.globalInstance().start(tDict[f])
+				#QThreadPool.globalInstance().start(tDict[f])
 		except Exception as e:
 			self.log.error(f"Impossibile avviare lo splitter: {e}")
 		pass
--- a/bananaSPLIT/libbananasplit/libsplit.py
+++ b/bananaSPLIT/libbananasplit/libsplit.py
@@ -3,19 +3,18 @@ Created on 2 nov 2019
@author: Emanuele Trabattoni
 '''
-from PyQt5.QtCore import QThread, QRunnable
+from PyQt5.QtCore import QRunnable, QObject, pyqtSignal
 from PyQt5.Qt import pyqtSignal
 from slugify import slugify
-import time, parse, re, copy, os, json
+import time, parse, re, copy, os,json
 import traceback
-class bananaSPLITTER(QRunnable):
+class bananaSPLITTER():
 	#sendStatus = pyqtSignal(str)
 	def __init__(self, fileParams=None, logger=None):
-		QRunnable.__init__(self)
+		#QRunnable.__init__(self)
 		self.fileParams = fileParams
 		self.log = logger
 		self.rawFile = None
@@ -23,7 +22,7 @@ class bananaSPLITTER(QRunnable):
 		self.contentList = list()
 		self.bodyCounter=0
 		self.duplicateNumber=0
-		#self.log.debug(f"Configurazione: \n {json.dumps(fileParams, indent=2)}")
+		self.log.debug(f"Configurazione: \n {json.dumps(fileParams, indent=2)}")
 		if fileParams is not None:
 			self.log.info("Sto operando sul file: {}..".format(self.fileParams['name']))
 			self.paths = self.fileParams['paths']
@@ -104,7 +103,7 @@ class bananaSPLITTER(QRunnable):
 			for ll in self.rawFile:
 				for c in self.settings['delChars']:
 					ll = ll.replace(c,'')
-				if ll not in ['\n', '\r']:
+				if ll not in ['\n', '\r', '\r\n']:
 					tempContent.append(ll)
 			self.rawFile = copy.deepcopy(tempContent)
 			return True
@@ -174,7 +173,7 @@ class bananaSPLITTER(QRunnable):
 			elif self.status == 'head':
 				tempContent = list()
 				#doppio check per trovare línizio del corpo documento
-				if re.match(self.docStruct['beginOfDocument'],l,re.i):
+				if re.match(self.docStruct['beginOfDocument'],l):
 					self.status='body' 
 				if lineWords[0] not in self.docStruct['language']['headWords']:  #se la prima parola non e' tra quelle di inizio	
 					tempBody.append(l)					   # vuol dire che ho trovato l'articolo e aggiungo la prima riga al contenuto del documento
--- a/bananaSPLIT/libtestmain.py
+++ b/bananaSPLIT/libtestmain.py
@@ -3,30 +3,16 @@ Created on 1 dic 2019
@author: Emanuele Trabattoni
 '''
-import os
+import json
 from glob import glob
 from copy import deepcopy
 from libsplit import bananaSPLITTER
 from libconfload import bananaCONF
 from libfancylogger import fancyLogger
 if __name__ == "__main__":
-	print("CWD-> "+os.getcwd())
+	logger = fancyLogger(filepath=r"./conf/loggerconf.json",fileLog=False)
-	logger = fancyLogger(fileLog = False)
+	fp = open('./conf/conftest.json', 'r')
-	confl = bananaCONF(workdir=r"./libbananasplit", logger=logger)
+	splitter = bananaSPLITTER(fileParams=json.load(fp), logger=logger)
-	confl.open()
+	fp.close()
-	
+	splitter.run()
 	confl.use("testEN.json")
 	splconf = confl.getParams("splitter")
 	splist = []
 	os.chdir(splconf["paths"]["INworkPath"])
 	for f in glob("*.txt"):
 		splconf["name"] = f
 		logger.info("-"*80)
 		splitter = bananaSPLITTER(fileParams=deepcopy(splconf), logger=logger)
 		splist.append(splitter)
 		splitter.start()
 		splitter.join()
 	logger.info("\n"+"="*50+"\n\tFINITO!!!\n"+"="*50)