import {useEffect, useState} from "react";
import {useStore} from "../store.js";
import {addMembership} from "../lib/membership.jsx";
import {translation} from "../lang/classes.jsx"

export const Preprocess = () => {
	
	const data_unsorted = useStore(state => state.data);
	const storedCats = useStore(state => state.storedCats);
	const setStoredCats = useStore(state => state.setStoredCats);
	
	const setStoredSubCats = useStore(state => state.setStoredSubCats);

	const setStoredPlaces = useStore(state => state.setStoredPlaces);
	const storedPlaces = useStore(state => state.storedPlaces);
	
	const setTimeline = useStore(state => state.setTimeline);
	
	const setStats = useStore(state => state.setStats);
	
	const setTaxonomy = useStore(state => state.setTaxonomy);
	const setTaxonomyd3 = useStore(state => state.setTaxonomyd3);
	
	const rawplaces = useStore(state => state.places);
	
	const geo = useStore(state => state.geo);
	const setGeo = useStore(state => state.setGeo);
	
	const setUnknownPlacesData = useStore(state => state.setUnknownPlacesData);
	
	useEffect(() => {
		if (storedCats) return
		
		const shelves = ["BE.1.", "BE.2.", "BE.3.", "BE.4.", "BE.5.", "BE.6.", "BE.7.", "BE.8.", "BE.9.", "BE.10.", "BE.11.", "BE.12.", "INK"] //, "BE.2", "BE.3", "BE.4", "BE.5", "BE.6", "BE.7", "BE.8", "BE.9", "BE.10", "BE.11", "BE.12"]
		
		var pool = [];
		pool.all = {};
		pool.eugeniana = {};
		pool.noneugeniana = {};
		pool.uncertain = {};
		
		var flattened = [];
		flattened.all = []
		flattened.eugeniana = [];
		flattened.noneugeniana = [];
		flattened.uncertain = [];
		
		shelves.map(shelf => {
			pool.all[shelf] = [];
			pool.eugeniana[shelf] = [];
			pool.noneugeniana[shelf] = [];
			pool.uncertain[shelf] = [];
		})
		
		function substituteTaxonomyLines(taxonomyString) {
			const substitutions = [
				[
					"Tractatus Heroici ac Heraldici, seu de Nobilitate ac Insignibus",
					"Tractatus Heroïci ac Heraldici, seu de Nobilitate ac Insignibus",
				],
				[
					"Ritus Veterum, Seu de Rebus eorum Sacris, Civilibus, Militaribus, ac Domesticis",
					"Ritus Veterum, seu de Rebus eorum Sacris, Civilibus, Militaribus ac Domesticis"
				],
				[
					"Icones & Vitæ Recentiorum Principum, & Militiâ Dignitatibusque Illustrium",
					"Icones ac Vitæ Recentiorum Principum, & Militiâ Dignitatibusque Illustrium"
				]
			];
			
			let result = taxonomyString.trim();
			for (const [lowerLine, upperLine] of substitutions) {
				// Use a case-insensitive regular expression for more flexible matching
				const regex = new RegExp(lowerLine.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), 'gi');
				if (regex.test(result)) {
					//					console.log(`Match found for: "${lowerLine}"`);
					//					console.log(`Before substitution: "${result}"`);
					result = result.replace(regex, upperLine);
					//					console.log(`After substitution: "${result}"`);
				}
			}
			return result;
		}
		
		data_unsorted.map(f => {
			
			let currentClass = f['Wissensklasse'] ?? "";
			let currentSubClass = f['Wissensunterklasse'] ?? "";
			currentClass = String(currentClass);
			currentSubClass = String(currentSubClass);
			
			// Apply substitutions to Wissensklasse and Wissensunterklasse
			if (currentClass.includes("|")) {
				let split = currentClass.split("|");
				let splitsub = currentClass.split("|");
				
				// check if first one is empty
				if (split[0].trim() === "") {
					currentClass = split[1];
					currentSubClass = splitsub[1];
				}
				else {
					currentClass = split[0];
					currentSubClass = splitsub[0];
				}
			}
			if (currentClass.includes("|")) {
				console.log('still not solved', f, currentClass, currentSubClass)
			}
			if (currentSubClass.includes("|")) {
				console.log('still not solved', f, currentClass, currentSubClass)
			}
			
			currentClass = currentClass.trimEnd();
			f['Wissensklasse'] = substituteTaxonomyLines(currentClass);
			currentSubClass = currentSubClass.trimEnd();
			f['Wissensunterklasse'] = substituteTaxonomyLines(currentSubClass);
			
			if (!f['Anfang Veröffentlichungsdatum']) {
				f['Anfang Veröffentlichungsdatum'] = ""
			} else if (f['Anfang Veröffentlichungsdatum']) f['Anfang Veröffentlichungsdatum'] = parseInt(f['Anfang Veröffentlichungsdatum']) + "";
			
			if (!f['Ende Veröffentlichungsdatum']) {
				f['Ende Veröffentlichungsdatum'] = ""
			} else if (f['Ende Veröffentlichungsdatum']) f['Ende Veröffentlichungsdatum'] = parseInt(f['Ende Veröffentlichungsdatum']) + "";
			
			if (f['Zz_year'] === 0) f['Zz_year'] = "none"
			f['signaturIndex'] = parseInt(f['signaturIndex'])
			f['Orte_corr'] = f['Orte_corr1']
			return f
		})
		
		
		const data = data_unsorted.sort((a, b) => a['signaturIndex'] - b['signaturIndex'])
		
		//console.log('----',data)
		
		const console_act = (obj) => console.log(JSON.parse(JSON.stringify(obj)))
		
		
		// ///////////////////
		//  M E M B E R S H I P
		// ///////////////////
		
		const newdata = addMembership(data);
		
		newdata.map((book) => {
			if (book['Shelf'] != null && book['Shelf'].toUpperCase().startsWith("BE")) {
				shelves.map(shelf => {
					if (book['Shelf'].toUpperCase().startsWith(shelf)) pool.all[shelf].push(book)
					
					if (book['membership'] === 0 && book['Shelf'].toUpperCase().startsWith(shelf)) {
						pool.noneugeniana[shelf].push(book)
						flattened.noneugeniana.push(book)
						flattened.all.push(book)
					} else if (book['membership'] === 1 && book['Shelf'].toUpperCase().startsWith(shelf)) {
						pool.eugeniana[shelf].push(book)
						flattened.eugeniana.push(book)
						flattened.all.push(book)
					} else if (book['membership'] === 2 && book['Shelf'].toUpperCase().startsWith(shelf)) {
						pool.uncertain[shelf].push(book)
						flattened.uncertain.push(book)
						flattened.all.push(book)
					}
				})
			} else {
				if (book['membership'] === 0) {
					flattened.noneugeniana.push(book)
					flattened.all.push(book)
				} else if (book['membership'] === 1) {
					flattened.eugeniana.push(book)
					flattened.all.push(book)
				} else if (book['membership'] === 2) {
					flattened.uncertain.push(book)
					flattened.all.push(book)
				}
			}
		})
		
		// ///////////////////
		//  P L A C E S
		// ///////////////////
		
		
		const places = {};
		const unknownPlacesData = {
			count: 0,
			books: [],
			categories: {
				'empty': {count: 0, books: []},
				'[ohne Ort]': {count: 0, books: []},
				'other': {count: 0, books: []},
				'unknown': {count: 0, books: []}
			}
		};
		
		newdata.forEach((book) => {
			let current = book['Orte_corr'];
			if (!current || current.trim() === '') {
				unknownPlacesData.count++;
				unknownPlacesData.books.push(book);
				unknownPlacesData.categories.empty.count++;
				unknownPlacesData.categories.empty.books.push(book);
			} else {
				let splitted = current.split(';');
				let isUnknown = false;
				splitted.forEach(p => {
					p = p.trim();
					if (p) {
						if (p === '[ohne Ort]') {
							isUnknown = true;
							unknownPlacesData.categories[p].count++;
							unknownPlacesData.categories[p].books.push(book);
						} else {
							if (places[p]) {
								places[p].cnt++;
								places[p].books.push(book);
								
								// Count by membership
								if (book.membership === 1) places[p].cnt_eugeniana++;
								else if (book.membership === 0) places[p].cnt_noneugeniana++;
								else if (book.membership === 2) places[p].cnt_uncertain++;
								
								// Count by color classification
								if (book['Farbklassifizierung'] === 'yellow') places[p].cnt_yellow++;
								else if (book['Farbklassifizierung'] === 'red') places[p].cnt_red++;
								else if (book['Farbklassifizierung'] === 'blue') places[p].cnt_blue++;
							} else {
								places[p] = {
									cnt: 1,
									books: [book],
									// Initialize membership counters
									cnt_eugeniana: book.membership === 1 ? 1 : 0,
									cnt_noneugeniana: book.membership === 0 ? 1 : 0,
									cnt_uncertain: book.membership === 2 ? 1 : 0,
									// Initialize color counters
									cnt_yellow: book['Farbklassifizierung'] === 'yellow' ? 1 : 0,
									cnt_red: book['Farbklassifizierung'] === 'red' ? 1 : 0,
									cnt_blue: book['Farbklassifizierung'] === 'blue' ? 1 : 0
								};
							}
						}
					}
				});
				if (isUnknown) {
					unknownPlacesData.count++;
					unknownPlacesData.books.push(book);
				}
			}
		});
		
		setUnknownPlacesData(unknownPlacesData);
		
		let placeList = Object.keys(places);
		
		//console.log(placeList.map(p => encodeURIComponent(p)))
		//console.log(places, placeList)
		
		let topPlaces = []
		placeList.forEach(val => {
			if (places[val].cnt > 0 && val !== '[u.a.]') {
				topPlaces = [...topPlaces, {
					name: val,
					cnt: places[val].cnt,
					cnt_eugeniana: places[val].cnt_eugeniana,
					cnt_noneugeniana: places[val].cnt_noneugeniana,
					cnt_uncertain: places[val].cnt_uncertain,
					cnt_yellow: places[val].cnt_yellow,
					cnt_red: places[val].cnt_red,
					cnt_blue: places[val].cnt_blue
				}]
			}
			return
		})
		
		let placesObj = {
			all: places,
			placesList: topPlaces,
			topTen: topPlaces.sort((a, b) => b.cnt - a.cnt).slice(0, 11)
		}
		
		setStoredPlaces(placesObj)
		
		flattened.all.forEach(book => {
			if (book['Komplexe Klassifizierung'] == 0 && book.Wissensklasse) {
				console.log('ohne kpmplexe klassifizierung: ', book);
			}
		})
		
		setStoredCats(flattened);
		setStoredSubCats(pool);
		
		// ///////////////////
		//  S T A T S
		// ///////////////////
		
		const getShelves = (d) => {
			let shelfStats = {}
			shelves.map((shelf) => shelfStats[shelf] = d[shelf].length)
			return shelfStats
		}
		
		const getShelvesColor = (d, color) => {
			let shelfStats = {}
			shelves.map((shelf) => shelfStats[shelf] = d[shelf].filter(d => d['Farbklassifizierung'] === color).length)
			return shelfStats
		}
		
		let newStats = {
			categories: {
				all: flattened.all.length,
				eugeniana: flattened.eugeniana.length,
				eugeniana_yellow: flattened.eugeniana.filter(d => d['Farbklassifizierung'] === 'yellow').length,
				eugeniana_red: flattened.eugeniana.filter(d => d['Farbklassifizierung'] === 'red').length,
				eugeniana_blue: flattened.eugeniana.filter(d => d['Farbklassifizierung'] === 'blue').length,
				noneugeniana: flattened.noneugeniana.length,
				uncertain: flattened.uncertain.length,
			},
			subcategories: {
				all: getShelves(pool.all),
				eugeniana: getShelves(pool.eugeniana),
				eugeniana_yellow: getShelvesColor(pool.eugeniana, 'yellow'),
				eugeniana_red: getShelvesColor(pool.eugeniana, 'red'),
				eugeniana_blue: getShelvesColor(pool.eugeniana, 'blue'),
				noneugeniana: getShelves(pool.noneugeniana),
				uncertain: getShelves(pool.uncertain),
			}
		}
		
		setStats(newStats)
		
		// console.log(newStats)
		
		// ///////////////////
		//  L A N G U A G E
		// ///////////////////
		
		const languages = {};
		const categoryLanguages = {
			all: {},
			eugeniana: {},
			eugeniana_blue: {},
			eugeniana_red: {},
			eugeniana_yellow: {},
			noneugeniana: {},
			uncertain: {}
		};
		
		const processLanguage = (lang, book, category) => {
			lang = lang.trim();
			// Process for overall languages
			if (!languages[lang]) languages[lang] = {cnt: 0, books: new Set()};
			languages[lang].cnt++;
			languages[lang].books.add(book);
			
			// Process for category-specific languages
			if (!categoryLanguages[category][lang]) categoryLanguages[category][lang] = {cnt: 0, books: new Set()};
			categoryLanguages[category][lang].cnt++;
			categoryLanguages[category][lang].books.add(book);
		};
		
		flattened.all.forEach((book) => {
			let current = book['Sprache'];
			if (current) {
				let splitted = new Set(current.split(';').map(lang => lang.trim()));
				splitted.forEach(lang => {
					if (lang) {  // Check if the language is not an empty string
						processLanguage(lang, book, 'all');
						
						if (book.membership === 1) {
							processLanguage(lang, book, 'eugeniana');
							if (book['Farbklassifizierung'] === 'blue') processLanguage(lang, book, 'eugeniana_blue');
							if (book['Farbklassifizierung'] === 'red') processLanguage(lang, book, 'eugeniana_red');
							if (book['Farbklassifizierung'] === 'yellow') processLanguage(lang, book, 'eugeniana_yellow');
						} else if (book.membership === 0) {
							processLanguage(lang, book, 'noneugeniana');
						} else if (book.membership === 2) {
							processLanguage(lang, book, 'uncertain');
						}
					}
				});
			}
		});
		
		const processLanguageStats = (languageObj) => {
			return Object.keys(languageObj).map(lang => ({
				name: lang,
				cnt: languageObj[lang].books.size  // Use Set size instead of cnt
			})).sort((a, b) => b.cnt - a.cnt);
		};
		
		let allLanguages = processLanguageStats(languages);
		
		let languagesObj = {
			all: languages,
			languagesList: allLanguages,
			topTen: allLanguages.slice(0, 10),
			categories: {}
		};
		
		// Process language stats for each category
		Object.keys(categoryLanguages).forEach(category => {
			languagesObj.categories[category] = {
				allLanguages: processLanguageStats(categoryLanguages[category]),
				topTen: processLanguageStats(categoryLanguages[category]).slice(0, 10)
			};
		});
		
		// Add language stats to newStats
		newStats.languages = languagesObj;
		
		// Update stats
		setStats(newStats);
		// console.log("languages",newStats)
		
		
		// ///////////////////
		//  T I M E
		// ///////////////////
		
		let cleaned = flattened.all.filter(d => d["Anfang Veröffentlichungsdatum"] > 2 && d["Anfang Veröffentlichungsdatum"] < 9999)
		
		//		let min = Math.min(...cleaned.map(d => d["Anfang Veröffentlichungsdatum"]))
		//		let max = Math.max(...cleaned.map(d => d["Anfang Veröffentlichungsdatum"]))
		//		console.log(min, max)
		
		let arr = [];
		
		cleaned.map((d) => {
			let datum = d["Anfang Veröffentlichungsdatum"]
			if (!datum) return
			if (!arr[datum]) arr[datum] = [];
			arr[datum].push(d)
		})
		
		setTimeline(arr)
		// console.log(arr)
		
		// ///////////////////
		//  T A X O N O M Y - C L A S S E S
		// ///////////////////
		
		const classes = {}
		
		flattened.all.map((d, i) => {
			let current = d['Wissensklasse'] ?? '';
			let currentsub = d['Wissensunterklasse'] ?? '';
			
			if (currentsub) {
				let splitted = currentsub.split(';;;')
				splitted.map(p => {
					if (classes[current+'_'+p]) {
						classes[current+'_'+p] = {
							cnt: classes[current+'_'+p].cnt + 1,
							class: current,
							subclass: currentsub,
							books: [...classes[current+'_'+p].books, d],
							// Add membership counts
							cnt_eugeniana: classes[current+'_'+p].cnt_eugeniana + (d.membership === 1 ? 1 : 0),
							cnt_noneugeniana: classes[current+'_'+p].cnt_noneugeniana + (d.membership === 0 ? 1 : 0),
							cnt_uncertain: classes[current+'_'+p].cnt_uncertain + (d.membership === 2 ? 1 : 0),
							// Add color counts for eugeniana
							cnt_yellow: classes[current+'_'+p].cnt_yellow + (d.membership === 1 && d['Farbklassifizierung'] === 'yellow' ? 1 : 0),
							cnt_red: classes[current+'_'+p].cnt_red + (d.membership === 1 && d['Farbklassifizierung'] === 'red' ? 1 : 0),
							cnt_blue: classes[current+'_'+p].cnt_blue + (d.membership === 1 && d['Farbklassifizierung'] === 'blue' ? 1 : 0),
						}
					} else {
						classes[current+'_'+p] = {
							cnt: 1,
							class: current,
							subclass: currentsub,
							books: [d],
							// Initialize membership counts
							cnt_eugeniana: d.membership === 1 ? 1 : 0,
							cnt_noneugeniana: d.membership === 0 ? 1 : 0,
							cnt_uncertain: d.membership === 2 ? 1 : 0,
							// Initialize color counts
							cnt_yellow: d.membership === 1 && d['Farbklassifizierung'] === 'yellow' ? 1 : 0,
							cnt_red: d.membership === 1 && d['Farbklassifizierung'] === 'red' ? 1 : 0,
							cnt_blue: d.membership === 1 && d['Farbklassifizierung'] === 'blue' ? 1 : 0,
						}
					}
				})
			}
		})
		
		// console.log('classes',classes)
		
		setTaxonomy(classes)
		
		// setTaxonomyd3(convertd3(classes))
		
		// add german texts
		function checkForDuplicates(translation) {
			const latinTexts = {};
			const duplicates = [];
			
			translation.forEach(item => {
				const latin = item.latin;
				if (latinTexts[latin]) {
					duplicates.push(latin);
				} else {
					latinTexts[latin] = true;
				}
			});
			
			return duplicates;
		}
		
		const duplicates = checkForDuplicates(translation);
		
		if (duplicates.length > 0) {
			console.log("Duplicate Latin texts found:");
			duplicates.forEach(text => console.log(text));
		} else {
		//	console.log("No duplicate Latin texts found.");
		}
		
		// translate and store unknowns in its own array
		let unknown = []
		
		const t = (latin) => {
			let result = translation.find(f => f.latin === latin);
			if (!result) {
				unknown.push({"latin": latin, "german": "missing", "brevia": "missing", "kurz": "missing"});
				console.log(`"${latin}"`, result);
				return {german: 'unknown', brevia: 'unknown', kurz: 'unknown'};
			}
			return {
				german: result.german || 'unknown',
				brevia: result.brevia || 'unknown',
				kurz: result.kurz || 'unknown'
			};
		}
		
		if (unknown.length > 0) console.log(unknown)
		
		const convertd3multiling = (obj) => {
			var data = {name: 'root', children: []};
			
			for (var key in obj) {
				if (obj.hasOwnProperty(key)) {
					var classIndex = data.children.findIndex(child => child.name === obj[key].class);
					let classTranslation = t(obj[key].class);
					let subclassTranslation = t(obj[key].subclass);
					
					if (classIndex === -1) {
						data.children.push({
							name: obj[key].class,
							german: classTranslation.german,
							brevia: classTranslation.brevia,
							kurz: classTranslation.kurz,
							children: [{
								name: obj[key].subclass,
								german: subclassTranslation.german,
								brevia: subclassTranslation.brevia,
								kurz: subclassTranslation.kurz,
								value: obj[key].cnt,
								// Add the counts
								value_eugeniana: obj[key].cnt_eugeniana,
								value_noneugeniana: obj[key].cnt_noneugeniana,
								value_uncertain: obj[key].cnt_uncertain,
								value_yellow: obj[key].cnt_yellow,
								value_red: obj[key].cnt_red,
								value_blue: obj[key].cnt_blue,
								books: obj[key].books
							}]
						});
					} else {
						data.children[classIndex].children.push({
							name: obj[key].subclass,
							german: subclassTranslation.german,
							brevia: subclassTranslation.brevia,
							kurz: subclassTranslation.kurz,
							value: obj[key].cnt,
							// Add the counts
							value_eugeniana: obj[key].cnt_eugeniana,
							value_noneugeniana: obj[key].cnt_noneugeniana,
							value_uncertain: obj[key].cnt_uncertain,
							value_yellow: obj[key].cnt_yellow,
							value_red: obj[key].cnt_red,
							value_blue: obj[key].cnt_blue,
							books: obj[key].books
						});
					}
				}
			}
			return data;
		}
		
		setTaxonomyd3(convertd3multiling(classes))
		
		// console.log(convertd3multiling(classes))
		// console.log(unknown)
		
	}, []); // eslint-disable-line
	
	
	useEffect(() => {
		
		if (!storedPlaces || !rawplaces) return
		
		// console.log('placematch', rawplaces, storedPlaces)
		
		// check geocoords file without a place in storedplaces
		rawplaces.map((d, i) => {
			let city = d['city']
			
			if (storedPlaces.all[city]) {
				// console.log('match:', city)
			} else {
				// console.log('no match', city)
			}
			
		})
		
		let unknownPlaces = [
			"[ohne Ort]",
			"[u.a.]",
			"s.l.",
			"Doregnal (fing.)",
			"Crisopolis (fing.)",
			"Bengodi (fing.)",
			"Albionspolis", // fing
			"Fridenstad", // fing
			"Saint-Jean", // ambiguous
		]
		
		let newplaces = [];
		
		// Add debugging to see what we're getting from storedPlaces
		// console.log('First place example:', Object.entries(storedPlaces.all)[0]);
		
		let unknown = "";
		
		Object.keys(storedPlaces.all).forEach((key) => {
			let currentplace = rawplaces.find(f => f.city === key)
			let placeData = storedPlaces.all[key];
			
			if (currentplace) {
				newplaces.push({
					'city': key,
					'lat': currentplace.latitude,
					'lon': currentplace.longitude,
					'radius': placeData.cnt || 0,  // Ensure we have a fallback
					'radius_eugeniana': placeData.cnt_eugeniana || 0,
					'radius_noneugeniana': placeData.cnt_noneugeniana || 0,
					'radius_uncertain': placeData.cnt_uncertain || 0,
					'radius_yellow': placeData.cnt_yellow || 0,
					'radius_red': placeData.cnt_red || 0,
					'radius_blue': placeData.cnt_blue || 0,
					// Add books count for verification
					'books_count': placeData.books.length
				})
				// console.log('match found', key, rawplaces.find(f => f.city === key))
			} else {
				if (!unknownPlaces.find(f => f === key))
					unknown += "\n" + key;
				// console.log('no coords, probably OK', key)
			}
		})
		
		console.log(unknown);
		
		setGeo(newplaces)
		
		//console.log(newplaces.filter(f => f.lon < -10))
		// console.log(storedCats)
		
	}, [storedPlaces, rawplaces]);
	
	
	return <></>
}


