diff --git a/bananaSPLIT/confGiulia/bananaconfEN.json b/bananaSPLIT/confGiulia/bananaconfEN.json new file mode 100644 index 0000000..cf8b837 --- /dev/null +++ b/bananaSPLIT/confGiulia/bananaconfEN.json @@ -0,0 +1,55 @@ +{ + "INworkPath": "C:\\Users\\Utente\\Desktop\\Dottorato\\Corpus\\test\\", + "OUTworkPath": "C:\\Users\\Utente\\Desktop\\Dottorato\\Corpus\\destinazione\\", + "OUTnameFormat":"{year:04d} {month:02d} {day:02d} {docnum} {title}.txt", + "docStruct": { + "docSep": "Copyright [(0-9)]+", + "dateFormat":"{month} {day:d}, {year:d}{}", + "dateWords": [ + "January", + "February", + "March", + "April", + "May", + "June", + "July", + "August", + "September", + "October", + "November", + "December" + ], + "headWords": [ + "BYLINE:", + "SECTION:", + "LENGTH:", + "DATELINE:", + "HIGHLIGHT:", + "Email:" + ], + "tailWords": [ + "LANGUAGE:", + "GRAPHIC:", + "TYPE:", + "URL:", + "LOAD-DATE:", + "PUBLICATION-TYPE:", + "DOCUMENT-TYPE:", + "CHARTS:" + ] + }, + "settings": { + "showDuplicates": true, + "removeDuplicates": true, + "maxTitleLen": 40, + "loadTXT": true, + "loadDOCX": false, + "removeOLDFiles": false, + "saveSeparateFiles": true, + "saveBodyFile": true, + "saveBodyNumber":true, + "delLF": false, + "delWordBreak": true, + "delChars": "'|@|#" + } +} diff --git a/bananaSPLIT/confGiulia/bananaconfITA.json b/bananaSPLIT/confGiulia/bananaconfITA.json new file mode 100644 index 0000000..e36da2d --- /dev/null +++ b/bananaSPLIT/confGiulia/bananaconfITA.json @@ -0,0 +1,53 @@ +{ + "INworkPath": "/Volumes/Emanuele/Desktop/testi/", + "OUTworkPath": "/Volumes/Emanuele/Desktop/divisi/", + "OUTnameFormat":"{docnum}_{year:04d}{month:02d}{day:02d}_{filename}_{title}.txt", + "docStruct": { + "docSep": "Copyright [(0-9)]+", + "dateFormat":"{month} {day:d}, {year:d}{}", + "dateWords": [ + "Gennaio", + "Febbraio", + "Marzo", + "Aprile", + "Maggio", + "Giugno", + "Luglio", + "Agosto", + "Settembre", + "Ottobre", + "Novembre", + "Dicembre" + ], + "headWords": [ + "BYLINE:", + "SECTION:", + "LENGTH:", + "DATELINE:", + "HIGHLIGHT:", + "Email:" + ], + "tailWords": [ + "LANGUAGE:", + "GRAPHIC:", + "TYPE:", + "URL:", + "LOAD-DATE:", + "PUBLICATION-TYPE:", + "DOCUMENT-TYPE:", + "CHARTS:" + ] + }, + "settings": { + "showDuplicates": false, + "maxTitleLen": 32, + "loadTXT": true, + "loadDOCX": false, + "saveSeparateFiles": true, + "saveBodyFile": true, + "saveBodyNumber":true, + "delLF": false, + "delWordBreak": true, + "delChars": "'|@|#" + } +}