package de.xam.textsearch.score;

import java.util.List;

import org.xydra.log.api.Logger;
import org.xydra.log.api.LoggerFactory;

import de.xam.texthtml.text.TextTool;
import de.xam.textsearch.Match;
import de.xam.textsearch.util.TextIndexTool;

/**
 * We use the following digits, each normalised in 0..1.
 *
 * a) phraseMatch with casing
 *
 * b) phraseMatch, case-insensitive
 *
 * c) case-matching words / words
 *
 * d) case-insensitive matching words / words
 *
 * e) case-matching fragments / fragments
 *
 * f) case-insensitive matching fragments / fragments
 *
 * q = 120 a + 100 b + 12 c + 10 d + 1.2 e + f
 *
 * @param <V> match value type
 */
public class DefaultScorer<V> implements IScorer<V> {

	private static final Logger log = LoggerFactory.getLogger(DefaultScorer.class);

	private static final double WEIGHT_PHRASE_SOLITAIRE_CASE = 120d;
	private static final double WEIGHT_PHRASE_SOLITAIRE_NOCASE = 110d;
	private static final double WEIGHT_PHRASE_INSIDE_CASE = 105d;
	private static final double WEIGHT_PHRASE_INSISE_NOCASE = 100d;
	private static final double WEIGHT_C = 12d;
	private static final double WEIGHT_D = 10d;
	private static final double WEIGHT_E = 1.2d;
	private static final double WEIGHT_F = 1d;

	@Override
	public synchronized void score(final Match<V> match, final String content, final String phrase,
			final List<String> words, final List<String> fragments) {
		assert content != null;
		assert TextTool.trim(content).length() > 0;
		assert phrase != null;
		assert TextTool.trim(phrase).length() > 0;
		if (containsPadded(content, phrase)) {
			match.q = WEIGHT_PHRASE_SOLITAIRE_CASE;
			match.addMatchedString(phrase);
		} else if (content.contains(phrase)) {
			match.q = WEIGHT_PHRASE_INSIDE_CASE;
			match.addMatchedString(phrase);
		} else {
			final String contentLowercase = content.toLowerCase();
			final String phraseLowercase = phrase.toLowerCase();
			if (containsPadded(contentLowercase, phraseLowercase)) {
				match.q = WEIGHT_PHRASE_SOLITAIRE_NOCASE;
				match.addMatchedString(phrase);
			} else if (contentLowercase.contains(phraseLowercase)) {
				match.q = WEIGHT_PHRASE_INSISE_NOCASE;
				match.addMatchedString(phrase);
			} else {

				double c = 1;
				double d = 1;
				double e = 1;
				double f = 1;

				int matchedWordsCaseSensitive = 0;
				int matchedWordsCaseInsensitive = 0;
				int matchedFragmentsCaseSensitive = 0;
				int matchedFragmentsCaseInsensitive = 0;
				int sumOfWordLengths = -1;
				int sumOfFragmentLengths = -1;

				if (!words.isEmpty()) {
					// inspect words
					sumOfWordLengths = TextIndexTool.sumOfStringLengths(words);
					assert sumOfWordLengths > 0 : words;

					for (final String word : words) {
						assert word != null;
						assert TextTool.trim(word).length() > 0;
						if (content.contains(word)) {
							matchedWordsCaseSensitive += word.length();
							match.addMatchedString(word);
						} else if (contentLowercase.contains(word.toLowerCase())) {
							matchedWordsCaseInsensitive += word.length();
							match.addMatchedString(word);
						}
					}

					c = (double) matchedWordsCaseSensitive / (double) sumOfWordLengths;
					d = (double) matchedWordsCaseInsensitive / (double) sumOfWordLengths;
				}
				if (!fragments.isEmpty()) {
					sumOfFragmentLengths = TextIndexTool.sumOfStringLengths(fragments);
					assert sumOfFragmentLengths > 0;

					for (final String fragment : fragments) {

						if (match.hasMatchedString(fragment)) {
							continue;
						}

						if (content.contains(fragment)) {
							matchedFragmentsCaseSensitive += fragment.length();
							match.addMatchedString(fragment);
						} else if (contentLowercase.contains(fragment)) {
							matchedFragmentsCaseInsensitive += fragment.length();
							match.addMatchedString(fragment);
						}
					}

					e = (double) matchedFragmentsCaseSensitive / (double) sumOfFragmentLengths;
					f = (double) matchedFragmentsCaseInsensitive / (double) sumOfFragmentLengths;
				}

				match.q = WEIGHT_C * c + WEIGHT_D * d + WEIGHT_E * e + WEIGHT_F * f;

				if (log.isTraceEnabled()) {
					log.trace("q=" + match.q + " matchedWordsCaseSensitive="
							+ matchedWordsCaseSensitive

							+ " matchedWordsCaseInsensitive=" + matchedWordsCaseInsensitive

							+ " matchedfragmentsCaseSensitive=" + matchedFragmentsCaseSensitive

							+ " matchedfragmentsCaseInsensitive=" + matchedFragmentsCaseInsensitive

							+ " sumOfWordLengths=" + sumOfWordLengths

							+ " sumOfFragmentLengths=" + sumOfFragmentLengths + "\n" + " c=" + c
							+ " d=" + d + " e=" + e + " f=" + f);
				}
			}
		}
	}

	/**
	 * @param s @NeverNull
	 * @param part
	 * @return true iff s tokenized by " " (unicode whitespace) contains a token equal to
	 *         part
	 */
	private static boolean containsPadded(final String s, final String part) {
		assert s != null;
		int i = -1;
		do {
			i = s.indexOf(part, i + 1);
			if (i == -1) {
				return false;
			}
			// search manually before and after
			if (i == 0) {
				// before is ok
			} else {
				if (!isWhitespace(s.codePointAt(i - 1))) {
					// maybe next token has a space before
					continue;
				}
			}
			// "Hallo World"
			// 01234567890 = len = 10
			// i=6
			// World.lengt = 5
			//
			if (i + part.length() == s.length()) {
				// good, just at the end
			} else {
				if (!isWhitespace(s.codePointAt(i + part.length()))) {
					// maybe next token has a space before
					continue;
				}
			}
			return true;
		} while (i >= 0);
		return false;
	}

	private static boolean isWhitespace(final int c) {
		return c == ' ' || c == '\n' || c == '\t' || c == '\r';
	}

	public static void main(final String[] args) {
		System.out.println(containsPadded("Hello World", "Hello"));
		System.out.println(containsPadded("Hello World", "World"));
		System.out.println(containsPadded("Hello World", "ell"));
	}

}
