package de.xam.textsearch.text;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;

import org.xydra.index.IEntrySet;
import org.xydra.index.impl.trie.SmallTrieStringMapSetIndex;
import org.xydra.index.iterator.ClosableIterator;
import org.xydra.index.iterator.IFilter;
import org.xydra.index.query.Constraint;
import org.xydra.index.query.KeyEntryTuple;
import org.xydra.index.query.Pair;
import org.xydra.log.api.Logger;
import org.xydra.log.api.LoggerFactory;

import com.google.gwt.regexp.shared.SplitResult;

import de.xam.textsearch.compare.LongestStringComparator;
import de.xam.textsearch.fragment.FragmentIndex;
import de.xam.textsearch.fragment.FragmentQuery;
import de.xam.textsearch.query.AndQuery;
import de.xam.textsearch.query.IQuery;
import de.xam.textsearch.tokenize.ITokenizer;
import de.xam.textsearch.tokenize.Tokenizers;
import de.xam.textsearch.util.INormaliser;

/**
 * Responsible for tokenisation and normalisation. Beyond that, this class is merely a decorator of
 * {@link FragmentIndex}
 *
 * @author xamde
 *
 * @param <V> id type
 */
public class TextIndex<V extends Serializable> implements Serializable {

	private static final Logger log = LoggerFactory.getLogger(TextIndex.class);

	private static final long serialVersionUID = 1L;

	/**
	 * @param fragmentIndex
	 * @param fragment
	 * @param prefixMatch if true, returns more matches by answering 'fragment*' instead of 'fragment'
	 * @return the query
	 */
	private static <V extends Serializable> FragmentQuery<V> createFragmentQuery(final FragmentIndex<V> fragmentIndex, final String fragment,
			final boolean prefixMatch) {
		return FragmentQuery.<V> create(fragmentIndex, fragment, prefixMatch);
	}

	/**
	 * Creates an OR-query with each fragment used as a prefix-match. The result can be large, use full scoring to find
	 * the good results.
	 *
	 * @param queryPhrase
	 * @param optionalValueFilter @CanBeNull
	 * @return a combined query, using prefix-matches
	 */
	public static <V extends Serializable> ITextQuery<V> createTextQuery(final FragmentIndex<V> fragmentIndex,
			final ITokenizer phrase2wordTokenizer, final ITokenizer phrase2fragmentTokenizer,
			final INormaliser normaliser, final String queryPhrase, final IFilter<V> optionalValueFilter,
			final boolean prefixQuery) {
		final List<String> wordList = tokenizeToWordList(phrase2wordTokenizer, queryPhrase);

		final List<String> fragmentList = new ArrayList<String>();
		final SplitResult fragments = phrase2fragmentTokenizer.split(queryPhrase);
		for (int i = 0; i < fragments.length(); i++) {
			final String fragment = fragments.get(i);
			if (!fragmentList.contains(fragment)) {
				fragmentList.add(fragment);
			}
		}

		/* having the phrase and words allows for more precise scoring of results with regards to user intent */
		final TextQuery<V> textquery = new TextQuery<V>(queryPhrase, wordList, fragmentList);

		final Set<String> usedParts = new HashSet<>();
		// using complete words first
		for(final String word : wordList) {
			final String normalisedWord = normaliser.normalise(word);
			if(normalisedWord!=null && !usedParts.contains(normalisedWord)) {
				final FragmentQuery<V> fragmentQuery = createFragmentQuery(fragmentIndex, normalisedWord, prefixQuery)
						.setValueFilter(optionalValueFilter);
				textquery.or(fragmentQuery);
				usedParts.add(normalisedWord);
			}
		}

		// using stable order
		for (int i = 0; i < fragments.length(); i++) {
			final String fragment = fragments.get(i);
			final String normalisedFragment = normaliser.normalise(fragment);
			if(!usedParts.contains(normalisedFragment)) {
				final FragmentQuery<V> fragmentQuery = createFragmentQuery(fragmentIndex, normalisedFragment, prefixQuery)
						.setValueFilter(optionalValueFilter);
				textquery.or(fragmentQuery);
			}
		}
		return textquery;
	}

	/**
	 * @param fragmentIndex
	 * @param phrase2fragmentTokenizer
	 * @param queryPhrase
	 * @param optionalValueFilter is added as deep down in a query hierarchy as possible to minimize intermediate result
	 *        set size
	 * @param prefixMatch if true, returns more matches by answering 'fragment*' instead of 'fragment'
	 * @return
	 */
	@SuppressWarnings("unused")
	private static <V extends Serializable> IQuery<V> toAndQuery(final FragmentIndex<V> fragmentIndex,
			final ITokenizer phrase2fragmentTokenizer, final INormaliser normaliser, final String queryPhrase,
			final IFilter<V> optionalValueFilter, final boolean prefixMatch) {
		boolean queryForEverything = false;
		if (queryPhrase.length() == 0) {
			queryForEverything = true;
		}

		SplitResult fragments = null;
		if (!queryForEverything) {
			fragments = phrase2fragmentTokenizer.split(queryPhrase);
			/* weird queries */
			if (fragments.length() == 0) {
				assert queryPhrase.length() > 0;
				log.trace("no fragments, but queryString '" + queryPhrase
						+ "' present. QueryString seems to consist only of special characters such as '_'");
				queryForEverything = true;
			}
		}

		if (queryForEverything) {
			final FragmentQuery<V> fragmentQuery = createFragmentQuery(fragmentIndex, "", prefixMatch);
			fragmentQuery.setValueFilter(optionalValueFilter);
			return fragmentQuery;
		}

		assert!queryForEverything;
		assert fragments != null;
		assert fragments.length() > 0;

		final AndQuery<V> combined = AndQuery.create();
		for (int i = 0; i < fragments.length(); i++) {
			final String fragment = fragments.get(i);
			final String normalisedFragment = normaliser.normalise(fragment);
			final FragmentQuery<V> fragmentQuery = createFragmentQuery(fragmentIndex, normalisedFragment, prefixMatch);
			fragmentQuery.setValueFilter(optionalValueFilter);
			combined.and(fragmentQuery);
		}

		return combined;
	}

	/**
	 * @param phrase2wordTokenizer
	 * @param queryPhrase
	 * @return
	 */
	public static List<String> tokenizeToWordList(final ITokenizer phrase2wordTokenizer, final String queryPhrase) {
		final List<String> wordList = new ArrayList<String>();
		final SplitResult words = phrase2wordTokenizer.split(queryPhrase);
		for (int i = 0; i < words.length(); i++) {
			final String word = words.get(i);
			if (word.length() == 0) {
				continue;
			}
			if (!wordList.contains(word)) {
				wordList.add(word);
			}
		}
		return wordList;
	}

	private FragmentIndex<V> fragmentIndex;

	private transient INormaliser normaliser;

	private transient ITokenizer phrase2fragmentTokenizer;

	/** required only for ranking of results */
	private transient ITokenizer phrase2wordTokenizer;

	/**
	 * Default: normalize to lowercase,
	 */
	public TextIndex() {
		this(INormaliser.LOWERCASE, Tokenizers.PHRASE_2_WORD_TOKENIZER__RANGEBASED,
				Tokenizers.WORD_OR_PHRASE_2_FRAGMENT_TOKENIZER__RANGEBASED);
	}

	/**
	 * @param normaliser @NeverNull
	 * @param phrase2words @NeverNull Only used for ranking auto-completion matches (prefixes at begin of word are
	 *        better than at begin of fragment only)
	 * @param phrase2fragments @NeverNull
	 */
	public TextIndex(final INormaliser normaliser, final ITokenizer phrase2words, final ITokenizer phrase2fragments) {
		this.fragmentIndex = new FragmentIndex<V>();
		this.normaliser = normaliser;
		this.phrase2wordTokenizer = phrase2words;
		this.phrase2fragmentTokenizer = phrase2fragments;
	}

	public void clear() {
		this.fragmentIndex.clear();
	}

	/**
	 * @param fragmentConstraint
	 * @return
	 */
	public ClosableIterator<V> constraintFragmentIterator(final Constraint<String> fragmentConstraint) {
		return this.fragmentIndex.constraintIterator(fragmentConstraint);
	}

	/**
	 * @param fragment
	 * @return
	 */
	public boolean containsFragment(final String fragment) {
		return this.fragmentIndex.containsKey(fragment);
	}

	/**
	 * @param fragmentConstraint
	 * @param idConstraint
	 * @return
	 */
	public boolean containsFragmentsMatching(final Constraint<String> fragmentConstraint,
			final Constraint<V> idConstraint) {
		return this.fragmentIndex.contains(fragmentConstraint, idConstraint);
	}

	/**
	 * @param fragment
	 * @param prefixMatch if true, returns more matches by answering 'fragment*' instead of 'fragment'
	 * @return the query
	 */
	public FragmentQuery<V> createFragmentQuery(final String fragment, final boolean prefixMatch) {
		return FragmentQuery.<V> create(this.fragmentIndex, fragment, prefixMatch);
	}

	public IQuery<V> createAndQuery(final Set<String> fragments, final boolean prefixMatch) {

		if (fragments.size() == 1) {
			return createFragmentQuery(fragments.iterator().next(), prefixMatch);
		}

		// optimize: longer fragments first
		final List<String> longestFirst = new ArrayList<String>(fragments.size());
		longestFirst.addAll(fragments);
		Collections.sort(longestFirst, new LongestStringComparator());

		final AndQuery<V> andQuery = new AndQuery<V>();

		for (final String fragment : longestFirst) {
			final FragmentQuery<V> fragmentQuery = createFragmentQuery(fragment, prefixMatch);
			andQuery.and(fragmentQuery);
		}

		return andQuery;
	}

	public ITextQuery<V> createTextQuery(final String queryPhrase, final IFilter<V> optionalValueFilter,
			final boolean prefixQUery) {
		return createTextQuery(this.fragmentIndex, this.phrase2wordTokenizer, this.phrase2fragmentTokenizer,
				this.normaliser, queryPhrase, optionalValueFilter, prefixQUery);
	}

	public void deIndexFragment(final String fragment) {
		this.fragmentIndex.deIndex(fragment);
	}

	public boolean deIndexFragment(final String fragment, final V id) {
		return this.fragmentIndex.deIndex(fragment, id);
	}

	public void deIndexText(final String phrase, final V id) {
		final SplitResult phraseSplit = this.phrase2fragmentTokenizer.split(phrase);
		for (int i = 0; i < phraseSplit.length(); i++) {
			final String fragment = phraseSplit.get(i);
			final String normalisedFragment = this.normaliser.normalise(fragment);
			deIndexFragment(normalisedFragment, id);
		}
	}

	public void dump() {
		this.fragmentIndex.dump();
	}

	@Override
	@SuppressWarnings("unchecked")
	public boolean equals(final Object obj) {
		return obj instanceof TextIndex && this.fragmentIndex.equals(((TextIndex<V>) obj).fragmentIndex);
	}

	public ClosableIterator<KeyEntryTuple<String, V>> fragmentIdTupleIterator(
			final Constraint<String> fragmentConstraint, final Constraint<V> idConstraint) {
		return this.fragmentIndex.tupleIterator(fragmentConstraint, idConstraint);
	}

	public Iterator<V> fragmentQuery(final FragmentQuery<V> fragmentQuery) {
		return this.fragmentIndex.executeQuery(fragmentQuery);
	}

	public ClosableIterator<Set<V>> fragmentQueryAsSets(final FragmentQuery<V> fragmentQuery) {
		return this.fragmentIndex.executeQueryAsSets(fragmentQuery);
	}

	public ClosableIterator<String> fragmentsIterator() {
		return this.fragmentIndex.keyIterator();
	}

	/**
	 * Tries to match text.substring(start) within the prefix tree. Returns the length in s of the longest full match
	 * (for which an ID is indexed)
	 *
	 * @param text
	 * @param start
	 * @return a pair: 1) the number of characters, starting from start, are the longest match so that at least one
	 *         result is returned; 2) the entry set of result values @CanBeNull
	 */
	public Pair<Integer, Set<V>> getLongestMatch(final String text, final int start) {
		return this.fragmentIndex.getLongestMatch(text, start);
	}

	@Override
	public int hashCode() {
		return this.fragmentIndex.hashCode();
	}

	/**
	 * @param fragment
	 * @param id
	 * @return
	 */
	public boolean indexFragment(final String fragment, final V id) {
		return this.fragmentIndex.index(fragment, id);
	}

	/**
	 * @param phrase will be tokenized into words (separated by space), tokens (separated by @@) and fragments
	 *        (separated by casing and character kind, i.e. digit vs. letter).
	 * @param id
	 */
	public void indexText(final String phrase, final V id) {
		assert phrase != null;
		final SplitResult phraseSplit = this.phrase2fragmentTokenizer.split(phrase);
		for (int i = 0; i < phraseSplit.length(); i++) {
			final String fragment = phraseSplit.get(i);
			final String normalisedFragment = this.normaliser.normalise(fragment);
			indexFragment(normalisedFragment, id);
		}
	}

	public boolean isEmpty() {
		return this.fragmentIndex.isEmpty();
	}

	/**
	 * @param fragment
	 * @return the raw lookup
	 */
	public IEntrySet<V> lookupFragment(final String fragment) {
		return this.fragmentIndex.lookup(fragment);
	}

	/**
	 * Batch query
	 *
	 * @param queries
	 * @return the results of running the queries
	 */
	@SuppressWarnings("unchecked")
	public Set<V>[] query(final FragmentQuery<V>... queries) {

		if (queries == null || queries.length == 0) {
			return new Set[0];
		}

		final Set<V>[] result = new Set[queries.length];

		for (int i = 0; i < result.length; i++) {
			final Iterator<Set<V>> it = this.fragmentQueryAsSets(queries[i]);
			final Set<V> singleSet = toSingleSet(it);
			result[i] = singleSet;
		}
		return result;
	}

	public String toDebugString() {
		return this.fragmentIndex.toDebugString();
	}

	/**
	 * @param phrase
	 * @CanBeNull
	 * @return @NeverNull but maybe an empty collection
	 */
	public Set<String> toNormalisedFragments(final String phrase) {
		if (phrase == null) {
			return Collections.emptySet();
			// assert phrase.length() > 0;
		}

		final HashSet<String> set = new HashSet<String>();
		final SplitResult phraseSplit = this.phrase2fragmentTokenizer.split(phrase);
		for (int i = 0; i < phraseSplit.length(); i++) {
			final String fragment = phraseSplit.get(i);
			final String normalisedFragment = this.normaliser.normalise(fragment);
			set.add(normalisedFragment);
		}
		if (set.isEmpty()) {
			log.trace("Phrase '" + phrase + "' is weird, results in no fragments. Indexing as-is.");
			set.add(phrase);
		}
		return set;
	}

	/**
	 * Efficient if iterator contains in fact only one set
	 *
	 * @param it
	 * @return (maybe unmodifiable) set
	 */
	private Set<V> toSingleSet(final Iterator<Set<V>> it) {
		if (!it.hasNext()) {
			return Collections.emptySet();
		}

		final Set<V> set1 = it.next();
		if (it.hasNext()) {
			// deal with multiple sets
			final Set<V> union = new HashSet<V>();
			while (it.hasNext()) {
				final Set<V> eset = it.next();
				union.addAll(eset);
			}
			return union;
		} else {
			return Collections.unmodifiableSet(set1);
		}
	}

	@Override
	public String toString() {
		return this.fragmentIndex.toString();
	}

	/**
	 * One of oldPhrase or newPhrase is not null
	 *
	 * @param oldPhrase @CanBeNull
	 * @param newPhrase @CanBeNull
	 * @param id @NeverNull
	 */
	public void updateIndexText(final String oldPhrase, final String newPhrase, final V id) {
		assert oldPhrase != null || newPhrase != null : "old&new are null";
		// create diff
		final Set<String> oldFragments = toNormalisedFragments(oldPhrase);
		final Set<String> newFragments = toNormalisedFragments(newPhrase);

		final Set<String> addedFragments = new HashSet<String>(newFragments.size());
		for (final String s : newFragments) {
			if (oldFragments.contains(s)) {
				// same
			} else {
				addedFragments.add(s);
			}
		}
		final Set<String> removedFragments = oldFragments;
		removedFragments.removeAll(newFragments);

		// use diff
		for (final String r : removedFragments) {
			deIndexFragment(r, id);
		}
		for (final String a : addedFragments) {
			indexFragment(a, id);
			assert lookupFragment(a).contains(id);
		}
	}

	/**
	 * @param rootNode
	 */
	public void setInternalIndexState(final SmallTrieStringMapSetIndex<V>.Node rootNode) {
		this.fragmentIndex.getInternalIndexState().setRootNote(rootNode);
	}

	public SmallTrieStringMapSetIndex<V>.Node getInternalIndexState() {
		return this.fragmentIndex.getInternalIndexState().getRootNode();
	}

	public int size() {
		return this.fragmentIndex.size();
	}

}
