//
// tapliterate.js - Tapioca Transliteration module
//                  Convert Manglish to Malayalam Unicode
//
//          (c)  2003-2006 Thomas P John
//          See Tapioca copyright page for further details
// 

// system constants. defined var since I can't find const keyword in js

var T_NONE            = 0; // no real type. ZWJ, ZWNJ etc are grouped as T_NONE
var T_VOWEL           = 1; // vowels
var T_CONSONANT       = 2; // normal consonants 
var T_CHILLUCONSONANT = 3; // chillu forming consonants. See ischillu() for list
var T_SPECIALS        = 4; // special characters like anusvara etc.

// system variables & diversity

var conf_midchillu    = false; // if true, handles chillu forming consonants in their chill form
                               // in the middle of a word. Varamozhi does it this way which is
                               // extremely convenient. However, it requires some additional
                               // logic (which I havent figured out yet) to still allow formation
                               // of koottaksharams involving chillu consonants (e.g. NT)
                               // therefore, set false by default

// ischillu(c): returns true if c is a chillu forming consonant character
// at present are 5 chillu forming characters in malayalam
// 0x0d23 - N, 0x0d2e - m, 0x0d30 - r, 0x0d32 - l, 0x0d33 - L
// 0x0D28 - m is a special case. Anusvara is considered chillu of m

function ischillu(c)
{
    return (c=="&#x0d28;") || (c=="&#x0d2e;") || (c=="&#x0d30;") || 
           (c=="&#x0d32;") || (c=="&#x0d33;") || (c=="&#x0d23;");

}

// isspace(c): returns true if c is a word delimiter
// hyphen is considered a word delimiter. So is ZWJ

function isspace(c)
{
    return (c==' ') || (c=='.') || (c==',') || (c==';') || (c==';') ||
           (c=='/') || (c=='\\') || (c=='-') || (c=='_');
}

function xlit_to_mal(s)
{
	var sl  = s.length;                      // length of above string
	var i   = 0;                             // index for while loop
	var tvi = new String("");                // current matched token value independent
	var tvd = new String("");                // current matched token value dependent    	
	var tl  = 0;                             // current matched token length
    var ttype = 0;                           // current matched token type
	var pvi = 0;                             // previous matched token value independent
    var ptype = 0;                           // previous matched token type
	var oo  = new String("");                // output (xlited) string
	var ss  = "";                            // substring of s being parsed
    var startword = 1;                       // bool starting of a word
    var endword = 0;                         // bool ending of a word	

    
	while(i<sl)
	{
        ss=s.substr(i); // get substr from current index location (i) till end of string

        // parse for input tokens to identify out values
        // no comments pls. This section needs a redesign with better use of regexps ;-)
        
	    // --- start: generated code ---
        if(ss.search("chh")==0) {tvi="&#x0d1b;";tvd="";tl=3;ttype=2; } // token:chh Consonant
        else if(ss.search("thh")==0) {tvi="&#x0d25;";tvd="";tl=3;ttype=2; } // token:thh Consonant
        else if(ss.search("aa")==0) {tvi="&#x0d06;";tvd="&#x0d3e;";tl=2;ttype=1; } // token:aa Vowel
        else if(ss.search("ee")==0) {tvi="&#x0d08;";tvd="&#x0d40;";tl=2;ttype=1; } // token:ee Vowel
        else if(ss.search("oo")==0) {tvi="&#x0d0a;";tvd="&#x0d42;";tl=2;ttype=1; } // token:oo Vowel
        else if(ss.search("r\\^")==0) {tvi="&#x0d0b;";tvd="&#x0d43;";tl=2;ttype=1; } // token:r\\^ Vowel
        else if(ss.search("ai")==0) {tvi="&#x0d10;";tvd="&#x0d48;";tl=2;ttype=1; } // token:ai Vowel
        else if(ss.search("au")==0) {tvi="&#x0d14;";tvd="&#x0d4c;";tl=2;ttype=1; } // token:au Vowel
        else if(ss.search("kh")==0) {tvi="&#x0d16;";tvd="";tl=2;ttype=2; } // token:kh Consonant
        else if(ss.search("gh")==0) {tvi="&#x0d18;";tvd="";tl=2;ttype=2; } // token:gh Consonant
        else if(ss.search("ng")==0) {tvi="&#x0d19;";tvd="";tl=2;ttype=2; } // token:ng Consonant
        else if(ss.search("ch")==0) {tvi="&#x0d1a;";tvd="";tl=2;ttype=2; } // token:ch Consonant
        else if(ss.search("jh")==0) {tvi="&#x0d1d;";tvd="";tl=2;ttype=2; } // token:jh Consonant
        else if(ss.search("nj")==0) {tvi="&#x0d1e;";tvd="";tl=2;ttype=2; } // token:nj Consonant
        else if(ss.search("Th")==0) {tvi="&#x0d20;";tvd="";tl=2;ttype=2; } // token:Th Consonant
        else if(ss.search("Dh")==0) {tvi="&#x0d22;";tvd="";tl=2;ttype=2; } // token:Dh Consonant
        else if(ss.search("th")==0) {tvi="&#x0d24;";tvd="";tl=2;ttype=2; } // token:th Consonant
        else if(ss.search("dh")==0) {tvi="&#x0d27;";tvd="";tl=2;ttype=2; } // token:dh Consonant
        else if(ss.search("bh")==0) {tvi="&#x0d2d;";tvd="";tl=2;ttype=2; } // token:bh Consonant
        else if(ss.search("zh")==0) {tvi="&#x0d34;";tvd="";tl=2;ttype=2; } // token:zh Consonant
        else if(ss.search("sh")==0) {tvi="&#x0d37;";tvd="";tl=2;ttype=2; } // token:sh Consonant
        else if(ss.search("a")==0) {tvi="&#x0d05;";tvd="";tl=1;ttype=1; } // token:a Vowel
        else if(ss.search("u")==0) {tvi="&#x0d09;";tvd="&#x0d41;";tl=1;ttype=1; } // token:u Vowel
        else if(ss.search("e")==0) {tvi="&#x0d0e;";tvd="&#x0d46;";tl=1;ttype=1; } // token:e Vowel
        else if(ss.search("E")==0) {tvi="&#x0d0f;";tvd="&#x0d47;";tl=1;ttype=1; } // token:E Vowel
        else if(ss.search("o")==0) {tvi="&#x0d12;";tvd="&#x0d4a;";tl=1;ttype=1; } // token:o Vowel
        else if(ss.search("O")==0) {tvi="&#x0d13;";tvd="&#x0d4b;";tl=1;ttype=1; } // token:O Vowel
        else if(ss.search("H")==0) {tvi="&#x0d03;";tvd="";tl=1;ttype=1; } // token:H Vowel        
        else if(ss.search("k|c")==0) {tvi="&#x0d15;";tvd="";tl=1;ttype=2; } // token:k Consonant
        else if(ss.search("g")==0) {tvi="&#x0d17;";tvd="";tl=1;ttype=2; } // token:g Consonant
        else if(ss.search("j")==0) {tvi="&#x0d1c;";tvd="";tl=1;ttype=2; } // token:j Consonant
        else if(ss.search("T")==0) {tvi="&#x0d1f;";tvd="";tl=1;ttype=2; } // token:T Consonant
        else if(ss.search("D")==0) {tvi="&#x0d21;";tvd="";tl=1;ttype=2; } // token:D Consonant
        else if(ss.search("N")==0) {tvi="&#x0d23;";tvd="";tl=1;ttype=3; } // token:N Consonant Chillu
        else if(ss.search("d")==0) {tvi="&#x0d26;";tvd="";tl=1;ttype=2; } // token:d Consonant
        else if(ss.search("n")==0) {tvi="&#x0d28;";tvd="";tl=1;ttype=3; } // token:n Consonant Chillu
        else if(ss.search("p")==0) {tvi="&#x0d2a;";tvd="";tl=1;ttype=2; } // token:p Consonant
        else if(ss.search("f")==0) {tvi="&#x0d2b;";tvd="";tl=1;ttype=2; } // token:f Consonant
        else if(ss.search("b")==0) {tvi="&#x0d2c;";tvd="";tl=1;ttype=2; } // token:b Consonant
        else if(ss.search("m")==0) {tvi="&#x0d2e;";tvd="";tl=1;ttype=3; } // token:m Consonant Chillu
        else if(ss.search("y")==0) {tvi="&#x0d2f;";tvd="";tl=1;ttype=2; } // token:y Consonant
        else if(ss.search("r")==0) {tvi="&#x0d30;";tvd="";tl=1;ttype=3; } // token:r Consonant Chillu
        else if(ss.search("R")==0) {tvi="&#x0d31;";tvd="";tl=1;ttype=2; } // token:R Consonant
        else if(ss.search("t")==0) {tvi="&#x0d31;&#x0d4d;&#x0d31;";tvd="";tl=1;ttype=2; } // token:t Consonant
        else if(ss.search("l")==0) {tvi="&#x0d32;";tvd="";tl=1;ttype=3; } // token:l Consonant Chillu
        else if(ss.search("L")==0) {tvi="&#x0d33;";tvd="";tl=1;ttype=3; } // token:L Consonant Chillu
        else if(ss.search("v|w")==0) {tvi="&#x0d35;";tvd="";tl=1;ttype=2; } // token:v Consonant
        else if(ss.search("S")==0) {tvi="&#x0d36;";tvd="";tl=1;ttype=2; } // token:S Consonant
        else if(ss.search("s")==0) {tvi="&#x0d38;";tvd="";tl=1;ttype=2; } // token:s Consonant
        else if(ss.search("h")==0) {tvi="&#x0d39;";tvd="";tl=1;ttype=2; } // token:h Consonant
        else if(ss.search("~")==0) {tvi="&#x0d4d;";tvd="";tl=1;ttype=4; } // token:~ Sign
        else if(ss.search("i")==0) {tvi="&#x0d07;";tvd="&#x0d3f;";tl=1;ttype=1; } // token:i Vowel
        else if(ss.search("M")==0) {tvi="&#x0d02;";tvd="";tl=1;ttype=0; } // token:M Sign Anusvara
	    // --- end: generated ---
        else if(ss.search("_")==0) {tvi="&#x200d;";tvd=0;tl=1;ttype=T_NONE; }    // Zero Width Join
        else if(ss.search("#")==0) {tvi="&#x200c;";tvd=0;tl=1;ttype=T_NONE; }    // Zero Width No Join            
        else {tvi=s.charAt(i);tvd=0;tl=1;ttype=T_NONE; } // default: no token match

        if(i+tl >= sl || isspace(s.charAt(i+tl))) endword=1; // check if end word
        if(!startword && tvd != "") tvi=tvd; // RULE #1
        if(!startword && tvi=="&#x0d05;") {} // RULE #2
        else if((ptype==T_CONSONANT || ptype==T_CHILLUCONSONANT) && // RULE #3
                (ttype==T_CONSONANT || ttype==T_CHILLUCONSONANT))
        {
            if((pvi!=tvi) && ischillu(pvi) && conf_midchillu) // RULE #3B (disabled)
            {
                oo+= "&#x0d4d;"; // chandrakkala               
                oo+= "&#x200d;"; // ZWJ        
                oo+= tvi;    // current char
            }
            else
            {
                oo+= "&#x0d4d;"; // chandrakkala
                oo+= tvi;    // current char
            }
        }
        else if(tvi.search(/[A-Za-z]/)!=0) oo+=tvi;
        if(endword && (ttype==T_CONSONANT || ttype==T_CHILLUCONSONANT)) // RULE #5
        {
            oo+= "&#x0d4d;";
            if(ischillu(tvi)) oo+= "&#x200d;"; // RULE #5A
        }
        endword=0;
        if(isspace(tvi)) // if current token is delimiter, flag new word starting
        {
            startword = 1;
            ptype=T_NONE;
        }
        else startword = 0;
        i+=tl;          // increment current pointer to next token start
        ptype=ttype;    // store current token type as previous
        pvi=tvi;        // store current token value as previous
	}
	return oo.replace(/&#x0d2E;&#x0d4d;&#x200d;/ig,"&#x0d02;");
}




