package de.xam.texthtml.text;

import java.util.ArrayList;
import java.util.regex.Pattern;

import org.xydra.annotations.CanBeNull;
import org.xydra.core.util.RegExUtil;
import org.xydra.index.impl.IntegerRangeIndex.ISplitHandler;

import com.google.common.base.Joiner;

public class CamelCase {

	/**
	 * May not start with a digit.
	 */
	@Deprecated
	public static final Pattern pattern_CAMELCASE_v1 = Pattern.compile(

			/* enable Unicode flag */
			"(?U)"

			/* match only at start of word (word boundary) */
			+ "\\b"

			/* Start with uppercase OR lowercase BUT NOT digit */
			+ "[\\w&&[^\\p{Digit}_]]+"

			/* REPEAT */
			+ "(?:"

			/* anything but not upper == numeric OR lower */
			+ "[\\w&&[^\\p{javaUpperCase}]]+"

			/* anything but not lower */
			+ "[\\w&&[^\\p{javaLowerCase}]]+"

	+ ")+"

	/* end with upper OR lower OR digit, but not '_' */
	+ "[\\w&&[^_]]+"

	/* end of word (word boundary) */
	+ "\\b"

			);

	/**
	 * <pre>
	From 10600 potential CamelCase link lines:

	7219	^(?!_)(?!.*__)[A-Z][a-zäöüßáàéèíìóòúù]+(_?([A-Z][a-zäöüßáàéèíìóòúù]+|[0-9]+|[A-Z]+)|_[a-zäöüßáàéèíìóòúù]+){0,5}(?<!_)$
	    magic

	811 	^[a-zäöüßáàéèíìóòúù]+([A-Z]+[a-zäöüßáàéèíìóòúù]*)+$
	    starts with lowercase and contains at least one Uppercase

	157 	^(?!_)(?!.*__)[A-Z]+[a-zäöüßáàéèíìóòúù]+(_?([A-Z]+[a-zäöüßáàéèíìóòúù]+|[A-Z]+)|_[a-zäöüßáàéèíìóòúù]+)+(?<!_)$
	    no numbers but any number of Camel Case Bumps


	137	^(?!_)(?!.*__)([A-Z]+[_a-zäöüßáàéèíìóòúù]*)+((9|20)[0-9]{2})[A-Z_a-zäöüßáàéèíìóòúù]*(?<!_)$
		events with a year in 19xx - 20xx, followed by crap
	 * </pre>
	 */
	public static final Pattern pattern_CAMELCASE = Pattern.compile(

			RegExUtil.unicode() + RegExUtil.inWordBoundary(

					/* don't start with underscore */
					"(?!_)"
					/* no 2 consecutive underscores anywhere */
					+ "(?!.*__)"

					/* 4 variants */
					+ "("

					/* variant 1: magic */

					+ "[\\p{javaUpperCase}]+[\\p{javaLowerCase}]+"

					/* repeat 1 to 5 times */
					+ "(_?"

	+ "([\\p{javaUpperCase}]+[\\p{javaLowerCase}]*" + "|" + "[0-9]+)"

	+ "|"

	+ "_[\\p{javaLowerCase}]+"

	+ "){1,5}"

	+ "|"

	/* variant 2: no numbers but any number of Camel Case Bumps */

	+ "[\\p{javaUpperCase}]+[\\p{javaLowerCase}]+"

	+ "(_?" + "(" + "[\\p{javaUpperCase}]+[\\p{javaLowerCase}]+" + "|" + "[\\p{javaUpperCase}]+" + ")" + "|"
	+ "_[\\p{javaLowerCase}]+" + ")+"

	+ "|"
	/* variant 3: events with a year in 19xx - 20xx, followed by crap */

	// ([A-Z]+[_a-zäöüßáàéèíìóòúù]*)+((19|20)[0-9]{2})[A-Z_a-zäöüßáàéèíìóòúù]*

	+ "("
	+ "[\\p{javaUpperCase}]+[_\\p{javaLowerCase}]*"
	+ ")+"
	+ "("
	+ "(19|20)"
	+ "[0-9]{2}"
	+ ")"
	+ "[\\p{javaUpperCase}_\\p{javaLowerCase}]*"

	//	+ "|"
	//	/* variant 4: starts with lowercase and contains at least one Uppercase */
	//
	//	// [a-zäöüßáàéèíìóòúù]+([A-Z]+[a-zäöüßáàéèíìóòúù]*)+
	//
	//	+ "[\\p{javaLowerCase}]+(\\p{javaUpperCase}+[\\p{javaLowerCase}]*)+"

	/* end of 4 variants */
	+ ")"

	/* don't end with underscore */
	+ "(?<!_)"

					)

			);

	public static boolean isCamelCase(final String t) {
		return CamelCase.pattern_CAMELCASE.matcher(t).matches();
	}

	/**
	 * Split a query/index word into tokens. E.g. "Product2014LE" becomes "Product","2014","LE"
	 *
	 * Unicode-safe.
	 *
	 * @param token
	 * @return @NeverNull
	 */
	public static String[] splitCamelCaseAndDigits(final String token) {
		final ArrayList<String> fragments = new ArrayList<String>();

		splitCamelCaseAndDigits(token, 0, token.length(), new ISplitHandler() {

			@Override
			public void onToken(final int startInclusive, final int endExclusive) {
				fragments.add(token.substring(startInclusive, endExclusive));
			}

			@Override
			public void onSeparator(final int startInclusive, final int endExclusive) {}

			@Override
			public void onDone() {}

		});

		return fragments.toArray(new String[fragments.size()]);
	}

	/**
	 * Split a query/index word into tokens. E.g. "Product2014LE" becomes "Product","2014","LE"
	 *
	 * Unicode-safe.
	 *
	 * @param s @NeverNull
	 * @param startInclusive in s
	 * @param endExclusive in s
	 * @param splitHandler
	 */
	public static void splitCamelCaseAndDigits(final String s, final int startInclusive, final int endExclusive,
			final ISplitHandler splitHandler) {
		Unicodes.KindOfCharacter mode = Unicodes.KindOfCharacter.None;

		assert s != null;
		final int length = endExclusive - startInclusive;
		if (length == 0) {
			return;
		}

		int fragmentStart = startInclusive;
		for (int offset = startInclusive; offset < endExclusive;) {
			final int codepoint = s.codePointAt(offset);
			final Unicodes.KindOfCharacter currentMode = Unicodes.getKindOfCharacter(codepoint);
			if (currentMode == mode) {
				// move on
			} else {

				boolean split = false;
				if (currentMode == Unicodes.KindOfCharacter.Digit) {
					split = true;
				} else if (currentMode == Unicodes.KindOfCharacter.Uppercase) {
					split = true;
				}

				// split it
				if (split && offset > fragmentStart) {
					splitHandler.onToken(fragmentStart, offset);
					fragmentStart = offset;
				}
				mode = currentMode;
			}
			offset += Character.charCount(codepoint);
		}
		if (fragmentStart < endExclusive) {
			splitHandler.onToken(fragmentStart, endExclusive);
		}
	}

	/**
	 * @param camelCased
	 * @return first letter upper-case, all other words lower-cased
	 */
	public static String convertTo__Word_word_word(final String camelCased) {
		final String[] words = splitCamelCaseAndDigits(camelCased);
		return convertTo__Word_word_word(words);
	}

	/**
	 * @param words
	 * @return
	 */
	public static String convertTo__Word_word_word(final String[] words) {
		if (words.length == 0) {
			return "";
		}
		final String[] converted = new String[words.length];
		converted[0] = capitalize(words[0]);
		if (words.length > 1) {
			for (int i = 1; i < words.length; i++) {
				converted[i] = TextTool.lowercase(words[i]);
			}
		}
		return Joiner.on(" ").join(converted);
	}

	/**
	 * @param s
	 * @CanBeNull
	 * @return s with first character upper-cased, rest unchanged
	 */
	private static String capitalize(@CanBeNull final String s) {
		if (s == null) {
			return null;
		}
		if (s.length() == 0) {
			return s;
		}
		final int first = s.codePointAt(0);
		final int firstUpper = Character.toUpperCase(first);
		if (s.length() == 1) {
			return new String(Character.toChars(firstUpper));
		} else {
			final StringBuilder b = new StringBuilder();
			b.append(Character.toChars(firstUpper));
			b.append(s.substring(Character.charCount(first)));
			return b.toString();
		}
	}

	/**
	 * @param s
	 * @CanBeNull
	 * @return s with first character upper-cased, rest lower-cased
	 */
	private static String capitalizeFully(@CanBeNull final String s) {
		if (s == null) {
			return null;
		}
		if (s.length() == 0) {
			return s;
		}
		final int first = s.codePointAt(0);
		final int firstUpper = Character.toUpperCase(first);
		if (s.length() == 1) {
			return new String(Character.toChars(firstUpper));
		} else {
			final StringBuilder b = new StringBuilder();
			b.append(Character.toChars(firstUpper));
			b.append(s.substring(Character.charCount(first)).toLowerCase());
			return b.toString();
		}
	}

	/**
	 * @param camelCased
	 * @return all words title-cased
	 */
	public static String convertTo__Word_Word_Word(final String camelCased) {
		final String[] words = splitCamelCaseAndDigits(camelCased);
		return convertTo__Word_Word_Word(words);
	}

	public static String convertTo__Word_Word_Word(final String[] words) {
		if (words.length == 0) {
			return "";
		}

		final String[] converted = new String[words.length];
		for (int i = 0; i < words.length; i++) {
			converted[i] = capitalizeFully(words[i]);
		}
		return Joiner.on(" ").join(converted);
	}

	/**
	 * @param camelCased
	 * @return all words completely lower-cased
	 */
	public static String convertTo__word_word_word(final String camelCased) {
		final String[] words = splitCamelCaseAndDigits(camelCased);
		if (words.length > 0) {
			for (int i = 0; i < words.length; i++) {
				words[i] = TextTool.lowercase(words[i]);
			}
		}
		return Joiner.on(" ").join(words);
	}

	/**
	 * @param anyText
	 * @return anyText converted into CamelCase by removing all spaces or dashes, and upper-casing every character after
	 *         a space.
	 */
	public static String convertTo__WordWordWord(final String anyText) {
		final String[] parts = anyText.split("[ \t\n-]");
		if (parts.length > 0) {
			for (int i = 0; i < parts.length; i++) {
				parts[i] = capitalize(parts[i]);
			}
		}
		return Joiner.on("").join(parts);
	}

}
