de/content/nwSearchFnt.js

/*----------------------------------------------------------------------------
 * JavaScript for webhelp search
 *----------------------------------------------------------------------------
 This file is part of the webhelpsearch plugin for DocBook WebHelp
 Copyright (c) 2007-2008 NexWave Solutions All Rights Reserved.
 www.nexwave.biz Nadege Quaine
 http://kasunbg.blogspot.com/ Kasun Gajasinghe
 */

//string initialization
var htmlfileList = "htmlFileList.js";
var htmlfileinfoList = "htmlFileInfoList.js";
var useCJKTokenizing = false;

/* Cette fonction verifie la validite de la recherche entrre par l utilisateur */
function Verifie(ditaSearch_Form) {

    // Check browser compatibitily
    if (navigator.userAgent.indexOf("Konquerer") > -1) {

        alert(txt_browser_not_supported);
        return;
    }


    var expressionInput = document.ditaSearch_Form.textToSearch.value;
    //Set a cookie to store the searched keywords
    $.cookie('textToSearch', expressionInput);


    if (expressionInput.length < 1) {

        // expression is invalid
        alert(txt_enter_at_least_1_char);
        // reactive la fenetre de search (utile car cadres)
        document.ditaSearch_Form.textToSearch.focus();
    }
    else {

        // Effectuer la recherche
        Effectuer_recherche(expressionInput);

        // reactive la fenetre de search (utile car cadres)
        document.ditaSearch_Form.textToSearch.focus();
    }
}

var stemQueryMap = new Array();  // A hashtable which maps stems to query words

/* This function parses the search expression, loads the indices and displays the results*/
function Effectuer_recherche(expressionInput) {

    /* Display a waiting message */
    //DisplayWaitingMessage();

    /*data initialisation*/
    var searchFor = "";       // expression en lowercase et sans les caracte    res speciaux
    //w = new Object();  // hashtable, key=word, value = list of the index of the html files
    scriptLetterTab = new Scriptfirstchar(); // Array containing the first letter of each word to look for
    var wordsList = new Array(); // Array with the words to look for
    var finalWordsList = new Array(); // Array with the words to look for after removing spaces
    var linkTab = new Array();
    var fileAndWordList = new Array();
    var txt_wordsnotfound = "";


    /*nqu: expressionInput, la recherche est lower cased, plus remplacement des char speciaux*/
    searchFor = expressionInput.toLowerCase().replace(/<\//g, "_st_").replace(/\$_/g, "_di_").replace(/\.|%2C|%3B|%21|%3A|@|\/|\*/g, " ").replace(/(%20)+/g, " ").replace(/_st_/g, "</").replace(/_di_/g, "%24_");

    searchFor = searchFor.replace(/  +/g, " ");
    searchFor = searchFor.replace(/ $/, "").replace(/^ /, "");

    wordsList = searchFor.split(" ");
    wordsList.sort();

    //set the tokenizing method
    if(typeof indexerLanguage != "undefined" && (indexerLanguage=="zh" || indexerLanguage=="ja" ||indexerLanguage=="ko")){
        useCJKTokenizing=true;
    } else {
        useCJKTokenizing=false;
    }
    //If Lucene CJKTokenizer was used as the indexer, then useCJKTokenizing will be true. Else, do normal tokenizing.
    // 2-gram tokenizinghappens in CJKTokenizing,  
    if(useCJKTokenizing){
        finalWordsList = cjkTokenize(wordsList);
    } else { 
        finalWordsList = tokenize(wordsList);
    }

    //load the scripts with the indices: the following lines do not work on the server. To be corrected
    /*if (IEBrowser) {
     scriptsarray = loadTheIndexScripts (scriptLetterTab);
     } */

    /**
     * Compare with the indexed words (in the w[] array), and push words that are in it to tempTab.
     */
    var tempTab = new Array();
    for (var t in finalWordsList) {
        if (w[finalWordsList[t].toString()] == undefined) {
            txt_wordsnotfound += finalWordsList[t] + " ";
        } else {
            tempTab.push(finalWordsList[t]);
        }
    }
    finalWordsList = tempTab;

    if (finalWordsList.length) {

        //search 'and' and 'or' one time
        fileAndWordList = SortResults(finalWordsList);

        var cpt = fileAndWordList.length;
        for (var i = cpt - 1; i >= 0; i--) {
            if (fileAndWordList[i] != undefined) {
                linkTab.push("<p>" + txt_results_for + " " + "<span class=\"searchExpression\">" + fileAndWordList[i][0].motslisteDisplay + "</span>" + "</p>");

                linkTab.push("<ul class='searchresult'>");
                for (t in fileAndWordList[i]) {
                    //DEBUG: alert(": "+ fileAndWordList[i][t].filenb+" " +fileAndWordList[i][t].motsliste);
                    //linkTab.push("<li><a href=\"../"+fl[fileAndWordList[i][t].filenb]+"\">"+fl[fileAndWordList[i][t].filenb]+"</a></li>");
                    var tempInfo = fil[fileAndWordList[i][t].filenb];
                    var pos1 = tempInfo.indexOf("@@@");
                    var pos2 = tempInfo.lastIndexOf("@@@");
                    var tempPath = tempInfo.substring(0, pos1);
                    var tempTitle = tempInfo.substring(pos1 + 3, pos2);
                    var tempShortdesc = tempInfo.substring(pos2 + 3, tempInfo.length);

                    //file:///home/kasun/docbook/WEBHELP/webhelp-draft-output-format-idea/src/main/resources/web/webhelp/installation.html
                    var linkString = "<li><a href=" + tempPath + ">" + tempTitle + "</a>";
                    // var linkString = "<li><a href=\"installation.html\">" + tempTitle + "</a>";
                    if ((tempShortdesc != "null")) {
                        linkString += "\n<div class=\"shortdesclink\">" + tempShortdesc + "</div>";
                    }
                    linkString += "</li>";
                    linkTab.push(linkString);
                }
                linkTab.push("</ul>");
            }
        }
    }

    var results = "";
    if (linkTab.length > 0) { 
        /*writeln ("<p>" + txt_results_for + " " + "<span class=\"searchExpression\">"  + cleanwordsList + "</span>" + "<br/>"+"</p>");*/
        results = "<p>";
        //write("<ul class='searchresult'>");
        for (t in linkTab) {
            results += linkTab[t].toString();
        }
        results += "</p>";
    } else {
        results = "<p>" + txt_no_results_for + "<span class=\"searchExpression\">" + txt_wordsnotfound + "</span>" + "</p>";
    }
    //alert(results);
    document.getElementById('searchResults').innerHTML = results; 
}

function tokenize(wordsList){
    var stemmedWordsList = new Array(); // Array with the words to look for after removing spaces
    var cleanwordsList = new Array(); // Array with the words to look for
    for(var j in wordsList){
        var word = wordsList[j];
        if(typeof stemmer != "undefined" ){
            stemQueryMap[stemmer(word)] = word;
        } else {
            stemQueryMap[word] = word;
        }
    } 
     //stemmedWordsList is the stemmed list of words separated by spaces.
    for (var t in wordsList) {
        wordsList[t] = wordsList[t].replace(/(%22)|^-/g, "");
        if (wordsList[t] != "%20") {
            scriptLetterTab.add(wordsList[t].charAt(0));
            cleanwordsList.push(wordsList[t]);
        }
    }

    if(typeof stemmer != "undefined" ){
        //Do the stemming using Porter's stemming algorithm
        for (var i = 0; i < cleanwordsList.length; i++) {
            var stemWord = stemmer(cleanwordsList[i]);
            stemmedWordsList.push(stemWord);
        }
    } else {
        stemmedWordsList = cleanwordsList;
    }
    return stemmedWordsList;
}

//Invoker of CJKTokenizer class methods.
function cjkTokenize(wordsList){
    var allTokens= new Array();
    var notCJKTokens= new Array();
    var j=0;
    for(j=0;j<wordsList.length;j++){
        var word = wordsList[j];
        if(getAvgAsciiValue(word) < 127){
            notCJKTokens.push(word);
        } else { 
            var tokenizer = new CJKTokenizer(word);
            var tokensTmp = tokenizer.getAllTokens();
            allTokens = allTokens.concat(tokensTmp);
        }
    }
    allTokens = allTokens.concat(tokenize(notCJKTokens));
    return allTokens;
}

//A simple way to determine whether the query is in english or not.
function getAvgAsciiValue(word){
    var tmp = 0;
    var num = word.length < 5 ? word.length:5;
    for(var i=0;i<num;i++){
        if(i==5) break;
        tmp += word.charCodeAt(i);
    }
    return tmp/num;
}

//CJKTokenizer
function CJKTokenizer(input){
    this.input = input;
    this.offset=-1;
    this.tokens = new Array(); 
    this.incrementToken = incrementToken;
    this.tokenize = tokenize;
    this.getAllTokens = getAllTokens;
    this.unique = unique;

    function incrementToken(){
                if(this.input.length - 2 <= this.offset){
                //      console.log("false "+offset);
                        return false;
                }
                else {
                        this.offset+=1;
                        return true;
                }
        }

        function tokenize(){
                //document.getElementById("content").innerHTML += x.substring(offset,offset+2)+"<br>";
                return this.input.substring(this.offset,this.offset+2);
        }

        function getAllTokens(){
                while(this.incrementToken()){
                        var tmp = this.tokenize();
                        this.tokens.push(tmp);
                }
        return this.unique(this.tokens);
//              document.getElementById("content").innerHTML += tokens+" ";
//              document.getElementById("content").innerHTML += "<br>dada"+sortedTokens+" ";
//              console.log(tokens.length+"dsdsds");
                /*for(i=0;i<tokens.length;i++){
                        console.log(tokens[i]);
                        var ss = tokens[i] == sortedTokens[i];

//                      document.getElementById("content").innerHTML += "<br>dada"+un[i]+"- "+stems[i]+"&nbsp;&nbsp;&nbsp;"+ ss;
                        document.getElementById("content").innerHTML += "<br>"+sortedTokens[i];
                }*/
        }

        function unique(a)
        {
           var r = new Array();
           o:for(var i = 0, n = a.length; i < n; i++)
           {
              for(var x = 0, y = r.length; x < y; x++)
              {
                 if(r[x]==a[i]) continue o;
              }
              r[r.length] = a[i];
           }
           return r;
        } 
}


/* Scriptfirstchar: to gather the first letter of index js files to upload */
function Scriptfirstchar() {
    this.strLetters = "";
    this.add = addLettre;
}

function addLettre(caract) {

    if (this.strLetters == 'undefined') {
        this.strLetters = caract;
    } else if (this.strLetters.indexOf(caract) < 0) {
        this.strLetters += caract;
    }

    return 0;
}
/* end of scriptfirstchar */

/*main loader function*/
/*tab contains the first letters of each word looked for*/
function loadTheIndexScripts(tab) {

    //alert (tab.strLetters);
    var scriptsarray = new Array();

    for (var i = 0; i < tab.strLetters.length; i++) {

        scriptsarray[i] = "..\/search" + "\/" + tab.strLetters.charAt(i) + ".js";
    }
    // add the list of html files
    i++;
    scriptsarray[i] = "..\/search" + "\/" + htmlfileList;

    //debug
    for (var t in scriptsarray) {
        //alert (scriptsarray[t]);
    }

    tab = new ScriptLoader();
    for (t in scriptsarray) {
        tab.add(scriptsarray[t]);
    }
    tab.load();
    //alert ("scripts loaded");
    return (scriptsarray);
}

/* ScriptLoader: to load the scripts and wait that it's finished */
function ScriptLoader() {
    this.cpt = 0;
    this.scriptTab = new Array();
    this.add = addAScriptInTheList;
    this.load = loadTheScripts;
    this.onScriptLoaded = onScriptLoadedFunc;
}

function addAScriptInTheList(scriptPath) {
    this.scriptTab.push(scriptPath);
}

function loadTheScripts() {
    var script;
    var head;

    head = document.getElementsByTagName('head').item(0);

    //script = document.createElement('script');

    for (var el in this.scriptTab) {
        //alert (el+this.scriptTab[el]);
        script = document.createElement('script');
        script.src = this.scriptTab[el];
        script.type = 'text/javascript';
        script.defer = false;

        head.appendChild(script);
    }

}

function onScriptLoadedFunc(e) {
    e = e || window.event;
    var target = e.target || e.srcElement;
    var isComplete = true;
    if (typeof target.readyState != undefined) {

        isComplete = (target.readyState == "complete" || target.readyState == "loaded");
    }
    if (isComplete) {
        ScriptLoader.cpt++;
        if (ScriptLoader.cpt == ScriptLoader.scripts.length) {
            ScriptLoader.onLoadComplete();
        }
    }
}

/*
function onLoadComplete() {
    alert("loaded !!");
} */

/* End of scriptloader functions */
 
// Array.unique( strict ) - Remove duplicate values
function unique(tab) {
    var a = new Array();
    var i;
    var l = tab.length;

    if (tab[0] != undefined) {
        a[0] = tab[0];
    }
    else {
        return -1
    }

    for (i = 1; i < l; i++) {
        if (indexof(a, tab[i], 0) < 0) {
            a.push(tab[i]);
        }
    }
    return a;
}
function indexof(tab, element, begin) {
    for (var i = begin; i < tab.length; i++) {
        if (tab[i] == element) {
            return i;
        }
    }
    return -1;

}
/* end of Array functions */


/*
 Param: mots= list of words to look for.
 This function creates an hashtable:
 - The key is the index of a html file which contains a word to look for.
 - The value is the list of all words contained in the html file.

 Return value: the hashtable fileAndWordList
 */
function SortResults(mots) {

    var fileAndWordList = new Object();
    if (mots.length == 0) {
        return null;
    }

    for (var t in mots) {
        // get the list of the indices of the files.
        var listNumerosDesFicStr = w[mots[t].toString()];
        //alert ("listNumerosDesFicStr "+listNumerosDesFicStr);
        var tab = listNumerosDesFicStr.split(",");

        //for each file (file's index):
        for (var t2 in tab) {
            var temp = tab[t2].toString();
            if (fileAndWordList[temp] == undefined) {

                fileAndWordList[temp] = "" + mots[t];
            } else {

                fileAndWordList[temp] += "," + mots[t];
            }
        }
    }

    var fileAndWordListValuesOnly = new Array();

    // sort results according to values
    var temptab = new Array();
    for (t in fileAndWordList) {
        tab = fileAndWordList[t].split(',');

        var tempDisplay = new Array();
        for (var x in tab) {
            if(stemQueryMap[tab[x]] != undefined){
                tempDisplay.push(stemQueryMap[tab[x]]); //get the original word from the stem word.
            } else {
                tempDisplay.push(tab[x]); //no stem is available. (probably a CJK language)
            }
        }
        var tempDispString = tempDisplay.join(", ");

        temptab.push(new resultPerFile(t, fileAndWordList[t], tab.length, tempDispString));
        fileAndWordListValuesOnly.push(fileAndWordList[t]);
    }


    //alert("t"+fileAndWordListValuesOnly.toString());

    fileAndWordListValuesOnly = unique(fileAndWordListValuesOnly);
    fileAndWordListValuesOnly = fileAndWordListValuesOnly.sort(compare_nbMots);
    //alert("t: "+fileAndWordListValuesOnly.join(';'));

    var listToOutput = new Array();

    for (var j in fileAndWordListValuesOnly) {
        for (t in temptab) {
            if (temptab[t].motsliste == fileAndWordListValuesOnly[j]) {
                if (listToOutput[j] == undefined) {
                    listToOutput[j] = new Array(temptab[t]);
                } else {
                    listToOutput[j].push(temptab[t]);
                }
            }
        }
    }
    return listToOutput;
}

function resultPerFile(filenb, motsliste, motsnb, motslisteDisplay) {
    this.filenb = filenb;
    this.motsliste = motsliste;
    this.motsnb = motsnb;
    this.motslisteDisplay= motslisteDisplay;
}

function compare_nbMots(s1, s2) {
    var t1 = s1.split(',');
    var t2 = s2.split(',');
    //alert ("s1:"+t1.length + " " +t2.length)
    if (t1.length == t2.length) {
        return 0;
    } else if (t1.length > t2.length) {
        return 1;
    } else {
        return -1;
    }
    //return t1.length - t2.length);
}
1	/*----------------------------------------------------------------------------
2	* JavaScript for webhelp search
3	*----------------------------------------------------------------------------
4	This file is part of the webhelpsearch plugin for DocBook WebHelp
5	Copyright (c) 2007-2008 NexWave Solutions All Rights Reserved.
6	www.nexwave.biz Nadege Quaine
7	http://kasunbg.blogspot.com/ Kasun Gajasinghe
8	*/
9
10	//string initialization
11	var htmlfileList = "htmlFileList.js";
12	var htmlfileinfoList = "htmlFileInfoList.js";
13	var useCJKTokenizing = false;
14
15	/* Cette fonction verifie la validite de la recherche entrre par l utilisateur */
16	function Verifie(ditaSearch_Form) {
17
18	// Check browser compatibitily
19	if (navigator.userAgent.indexOf("Konquerer") > -1) {
20
21	alert(txt_browser_not_supported);
22	return;
23	}
24
25
26	var expressionInput = document.ditaSearch_Form.textToSearch.value;
27	//Set a cookie to store the searched keywords
28	$.cookie('textToSearch', expressionInput);
29
30
31	if (expressionInput.length < 1) {
32
33	// expression is invalid
34	alert(txt_enter_at_least_1_char);
35	// reactive la fenetre de search (utile car cadres)
36	document.ditaSearch_Form.textToSearch.focus();
37	}
38	else {
39
40	// Effectuer la recherche
41	Effectuer_recherche(expressionInput);
42
43	// reactive la fenetre de search (utile car cadres)
44	document.ditaSearch_Form.textToSearch.focus();
45	}
46	}
47
48	var stemQueryMap = new Array(); // A hashtable which maps stems to query words
49
50	/* This function parses the search expression, loads the indices and displays the results*/
51	function Effectuer_recherche(expressionInput) {
52
53	/* Display a waiting message */
54	//DisplayWaitingMessage();
55
56	/data initialisation/
57	var searchFor = ""; // expression en lowercase et sans les caracte res speciaux
58	//w = new Object(); // hashtable, key=word, value = list of the index of the html files
59	scriptLetterTab = new Scriptfirstchar(); // Array containing the first letter of each word to look for
60	var wordsList = new Array(); // Array with the words to look for
61	var finalWordsList = new Array(); // Array with the words to look for after removing spaces
62	var linkTab = new Array();
63	var fileAndWordList = new Array();
64	var txt_wordsnotfound = "";
65
66
67	/nqu: expressionInput, la recherche est lower cased, plus remplacement des char speciaux/
68	searchFor = expressionInput.toLowerCase().replace(/<\//g, "_st_").replace(/\$_/g, "_di_").replace(/\.\|%2C\|%3B\|%21\|%3A\|@\|\/\|\*/g, " ").replace(/(%20)+/g, " ").replace(/_st_/g, "</").replace(/_di_/g, "%24_");
69
70	searchFor = searchFor.replace(/ +/g, " ");
71	searchFor = searchFor.replace(/ $/, "").replace(/^ /, "");
72
73	wordsList = searchFor.split(" ");
74	wordsList.sort();
75
76	//set the tokenizing method
77	if(typeof indexerLanguage != "undefined" && (indexerLanguage=="zh" \|\| indexerLanguage=="ja" \|\|indexerLanguage=="ko")){
78	useCJKTokenizing=true;
79	} else {
80	useCJKTokenizing=false;
81	}
82	//If Lucene CJKTokenizer was used as the indexer, then useCJKTokenizing will be true. Else, do normal tokenizing.
83	// 2-gram tokenizinghappens in CJKTokenizing,
84	if(useCJKTokenizing){
85	finalWordsList = cjkTokenize(wordsList);
86	} else {
87	finalWordsList = tokenize(wordsList);
88	}
89
90	//load the scripts with the indices: the following lines do not work on the server. To be corrected
91	/*if (IEBrowser) {
92	scriptsarray = loadTheIndexScripts (scriptLetterTab);
93	} */
94
95	/**
96	* Compare with the indexed words (in the w[] array), and push words that are in it to tempTab.
97	*/
98	var tempTab = new Array();
99	for (var t in finalWordsList) {
100	if (w[finalWordsList[t].toString()] == undefined) {
101	txt_wordsnotfound += finalWordsList[t] + " ";
102	} else {
103	tempTab.push(finalWordsList[t]);
104	}
105	}
106	finalWordsList = tempTab;
107
108	if (finalWordsList.length) {
109
110	//search 'and' and 'or' one time
111	fileAndWordList = SortResults(finalWordsList);
112
113	var cpt = fileAndWordList.length;
114	for (var i = cpt - 1; i >= 0; i--) {
115	if (fileAndWordList[i] != undefined) {
116	linkTab.push("<p>" + txt_results_for + " " + "<span class=\"searchExpression\">" + fileAndWordList[i][0].motslisteDisplay + "</span>" + "</p>");
117
118	linkTab.push("<ul class='searchresult'>");
119	for (t in fileAndWordList[i]) {
120	//DEBUG: alert(": "+ fileAndWordList[i][t].filenb+" " +fileAndWordList[i][t].motsliste);
121	//linkTab.push("<li><a href=\"../"+fl[fileAndWordList[i][t].filenb]+"\">"+fl[fileAndWordList[i][t].filenb]+"</a></li>");
122	var tempInfo = fil[fileAndWordList[i][t].filenb];
123	var pos1 = tempInfo.indexOf("@@@");
124	var pos2 = tempInfo.lastIndexOf("@@@");
125	var tempPath = tempInfo.substring(0, pos1);
126	var tempTitle = tempInfo.substring(pos1 + 3, pos2);
127	var tempShortdesc = tempInfo.substring(pos2 + 3, tempInfo.length);
128
129	//file:///home/kasun/docbook/WEBHELP/webhelp-draft-output-format-idea/src/main/resources/web/webhelp/installation.html
130	var linkString = "<li><a href=" + tempPath + ">" + tempTitle + "</a>";
131	// var linkString = "<li><a href=\"installation.html\">" + tempTitle + "</a>";
132	if ((tempShortdesc != "null")) {
133	linkString += "\n<div class=\"shortdesclink\">" + tempShortdesc + "</div>";
134	}
135	linkString += "</li>";
136	linkTab.push(linkString);
137	}
138	linkTab.push("</ul>");
139	}
140	}
141	}
142
143	var results = "";
144	if (linkTab.length > 0) {
145	/writeln ("<p>" + txt_results_for + " " + "<span class=\"searchExpression\">" + cleanwordsList + "</span>" + "<br/>"+"</p>");/
146	results = "<p>";
147	//write("<ul class='searchresult'>");
148	for (t in linkTab) {
149	results += linkTab[t].toString();
150	}
151	results += "</p>";
152	} else {
153	results = "<p>" + txt_no_results_for + "<span class=\"searchExpression\">" + txt_wordsnotfound + "</span>" + "</p>";
154	}
155	//alert(results);
156	document.getElementById('searchResults').innerHTML = results;
157	}
158
159	function tokenize(wordsList){
160	var stemmedWordsList = new Array(); // Array with the words to look for after removing spaces
161	var cleanwordsList = new Array(); // Array with the words to look for
162	for(var j in wordsList){
163	var word = wordsList[j];
164	if(typeof stemmer != "undefined" ){
165	stemQueryMap[stemmer(word)] = word;
166	} else {
167	stemQueryMap[word] = word;
168	}
169	}
170	//stemmedWordsList is the stemmed list of words separated by spaces.
171	for (var t in wordsList) {
172	wordsList[t] = wordsList[t].replace(/(%22)\|^-/g, "");
173	if (wordsList[t] != "%20") {
174	scriptLetterTab.add(wordsList[t].charAt(0));
175	cleanwordsList.push(wordsList[t]);
176	}
177	}
178
179	if(typeof stemmer != "undefined" ){
180	//Do the stemming using Porter's stemming algorithm
181	for (var i = 0; i < cleanwordsList.length; i++) {
182	var stemWord = stemmer(cleanwordsList[i]);
183	stemmedWordsList.push(stemWord);
184	}
185	} else {
186	stemmedWordsList = cleanwordsList;
187	}
188	return stemmedWordsList;
189	}
190
191	//Invoker of CJKTokenizer class methods.
192	function cjkTokenize(wordsList){
193	var allTokens= new Array();
194	var notCJKTokens= new Array();
195	var j=0;
196	for(j=0;j<wordsList.length;j++){
197	var word = wordsList[j];
198	if(getAvgAsciiValue(word) < 127){
199	notCJKTokens.push(word);
200	} else {
201	var tokenizer = new CJKTokenizer(word);
202	var tokensTmp = tokenizer.getAllTokens();
203	allTokens = allTokens.concat(tokensTmp);
204	}
205	}
206	allTokens = allTokens.concat(tokenize(notCJKTokens));
207	return allTokens;
208	}
209
210	//A simple way to determine whether the query is in english or not.
211	function getAvgAsciiValue(word){
212	var tmp = 0;
213	var num = word.length < 5 ? word.length:5;
214	for(var i=0;i<num;i++){
215	if(i==5) break;
216	tmp += word.charCodeAt(i);
217	}
218	return tmp/num;
219	}
220
221	//CJKTokenizer
222	function CJKTokenizer(input){
223	this.input = input;
224	this.offset=-1;
225	this.tokens = new Array();
226	this.incrementToken = incrementToken;
227	this.tokenize = tokenize;
228	this.getAllTokens = getAllTokens;
229	this.unique = unique;
230
231	function incrementToken(){
232	if(this.input.length - 2 <= this.offset){
233	// console.log("false "+offset);
234	return false;
235	}
236	else {
237	this.offset+=1;
238	return true;
239	}
240	}
241
242	function tokenize(){
243	//document.getElementById("content").innerHTML += x.substring(offset,offset+2)+"<br>";
244	return this.input.substring(this.offset,this.offset+2);
245	}
246
247	function getAllTokens(){
248	while(this.incrementToken()){
249	var tmp = this.tokenize();
250	this.tokens.push(tmp);
251	}
252	return this.unique(this.tokens);
253	// document.getElementById("content").innerHTML += tokens+" ";
254	// document.getElementById("content").innerHTML += "<br>dada"+sortedTokens+" ";
255	// console.log(tokens.length+"dsdsds");
256	/*for(i=0;i<tokens.length;i++){
257	console.log(tokens[i]);
258	var ss = tokens[i] == sortedTokens[i];
259
260	// document.getElementById("content").innerHTML += "<br>dada"+un[i]+"- "+stems[i]+"   "+ ss;
261	document.getElementById("content").innerHTML += "<br>"+sortedTokens[i];
262	}*/
263	}
264
265	function unique(a)
266	{
267	var r = new Array();
268	o:for(var i = 0, n = a.length; i < n; i++)
269	{
270	for(var x = 0, y = r.length; x < y; x++)
271	{
272	if(r[x]==a[i]) continue o;
273	}
274	r[r.length] = a[i];
275	}
276	return r;
277	}
278	}
279
280
281	/* Scriptfirstchar: to gather the first letter of index js files to upload */
282	function Scriptfirstchar() {
283	this.strLetters = "";
284	this.add = addLettre;
285	}
286
287	function addLettre(caract) {
288
289	if (this.strLetters == 'undefined') {
290	this.strLetters = caract;
291	} else if (this.strLetters.indexOf(caract) < 0) {
292	this.strLetters += caract;
293	}
294
295	return 0;
296	}
297	/* end of scriptfirstchar */
298
299	/main loader function/
300	/tab contains the first letters of each word looked for/
301	function loadTheIndexScripts(tab) {
302
303	//alert (tab.strLetters);
304	var scriptsarray = new Array();
305
306	for (var i = 0; i < tab.strLetters.length; i++) {
307
308	scriptsarray[i] = "..\/search" + "\/" + tab.strLetters.charAt(i) + ".js";
309	}
310	// add the list of html files
311	i++;
312	scriptsarray[i] = "..\/search" + "\/" + htmlfileList;
313
314	//debug
315	for (var t in scriptsarray) {
316	//alert (scriptsarray[t]);
317	}
318
319	tab = new ScriptLoader();
320	for (t in scriptsarray) {
321	tab.add(scriptsarray[t]);
322	}
323	tab.load();
324	//alert ("scripts loaded");
325	return (scriptsarray);
326	}
327
328	/* ScriptLoader: to load the scripts and wait that it's finished */
329	function ScriptLoader() {
330	this.cpt = 0;
331	this.scriptTab = new Array();
332	this.add = addAScriptInTheList;
333	this.load = loadTheScripts;
334	this.onScriptLoaded = onScriptLoadedFunc;
335	}
336
337	function addAScriptInTheList(scriptPath) {
338	this.scriptTab.push(scriptPath);
339	}
340
341	function loadTheScripts() {
342	var script;
343	var head;
344
345	head = document.getElementsByTagName('head').item(0);
346
347	//script = document.createElement('script');
348
349	for (var el in this.scriptTab) {
350	//alert (el+this.scriptTab[el]);
351	script = document.createElement('script');
352	script.src = this.scriptTab[el];
353	script.type = 'text/javascript';
354	script.defer = false;
355
356	head.appendChild(script);
357	}
358
359	}
360
361	function onScriptLoadedFunc(e) {
362	e = e \|\| window.event;
363	var target = e.target \|\| e.srcElement;
364	var isComplete = true;
365	if (typeof target.readyState != undefined) {
366
367	isComplete = (target.readyState == "complete" \|\| target.readyState == "loaded");
368	}
369	if (isComplete) {
370	ScriptLoader.cpt++;
371	if (ScriptLoader.cpt == ScriptLoader.scripts.length) {
372	ScriptLoader.onLoadComplete();
373	}
374	}
375	}
376
377	/*
378	function onLoadComplete() {
379	alert("loaded !!");
380	} */
381
382	/* End of scriptloader functions */
383
384	// Array.unique( strict ) - Remove duplicate values
385	function unique(tab) {
386	var a = new Array();
387	var i;
388	var l = tab.length;
389
390	if (tab[0] != undefined) {
391	a[0] = tab[0];
392	}
393	else {
394	return -1
395	}
396
397	for (i = 1; i < l; i++) {
398	if (indexof(a, tab[i], 0) < 0) {
399	a.push(tab[i]);
400	}
401	}
402	return a;
403	}
404	function indexof(tab, element, begin) {
405	for (var i = begin; i < tab.length; i++) {
406	if (tab[i] == element) {
407	return i;
408	}
409	}
410	return -1;
411
412	}
413	/* end of Array functions */
414
415
416	/*
417	Param: mots= list of words to look for.
418	This function creates an hashtable:
419	- The key is the index of a html file which contains a word to look for.
420	- The value is the list of all words contained in the html file.
421
422	Return value: the hashtable fileAndWordList
423	*/
424	function SortResults(mots) {
425
426	var fileAndWordList = new Object();
427	if (mots.length == 0) {
428	return null;
429	}
430
431	for (var t in mots) {
432	// get the list of the indices of the files.
433	var listNumerosDesFicStr = w[mots[t].toString()];
434	//alert ("listNumerosDesFicStr "+listNumerosDesFicStr);
435	var tab = listNumerosDesFicStr.split(",");
436
437	//for each file (file's index):
438	for (var t2 in tab) {
439	var temp = tab[t2].toString();
440	if (fileAndWordList[temp] == undefined) {
441
442	fileAndWordList[temp] = "" + mots[t];
443	} else {
444
445	fileAndWordList[temp] += "," + mots[t];
446	}
447	}
448	}
449
450	var fileAndWordListValuesOnly = new Array();
451
452	// sort results according to values
453	var temptab = new Array();
454	for (t in fileAndWordList) {
455	tab = fileAndWordList[t].split(',');
456
457	var tempDisplay = new Array();
458	for (var x in tab) {
459	if(stemQueryMap[tab[x]] != undefined){
460	tempDisplay.push(stemQueryMap[tab[x]]); //get the original word from the stem word.
461	} else {
462	tempDisplay.push(tab[x]); //no stem is available. (probably a CJK language)
463	}
464	}
465	var tempDispString = tempDisplay.join(", ");
466
467	temptab.push(new resultPerFile(t, fileAndWordList[t], tab.length, tempDispString));
468	fileAndWordListValuesOnly.push(fileAndWordList[t]);
469	}
470
471
472	//alert("t"+fileAndWordListValuesOnly.toString());
473
474	fileAndWordListValuesOnly = unique(fileAndWordListValuesOnly);
475	fileAndWordListValuesOnly = fileAndWordListValuesOnly.sort(compare_nbMots);
476	//alert("t: "+fileAndWordListValuesOnly.join(';'));
477
478	var listToOutput = new Array();
479
480	for (var j in fileAndWordListValuesOnly) {
481	for (t in temptab) {
482	if (temptab[t].motsliste == fileAndWordListValuesOnly[j]) {
483	if (listToOutput[j] == undefined) {
484	listToOutput[j] = new Array(temptab[t]);
485	} else {
486	listToOutput[j].push(temptab[t]);
487	}
488	}
489	}
490	}
491	return listToOutput;
492	}
493
494	function resultPerFile(filenb, motsliste, motsnb, motslisteDisplay) {
495	this.filenb = filenb;
496	this.motsliste = motsliste;
497	this.motsnb = motsnb;
498	this.motslisteDisplay= motslisteDisplay;
499	}
500
501	function compare_nbMots(s1, s2) {
502	var t1 = s1.split(',');
503	var t2 = s2.split(',');
504	//alert ("s1:"+t1.length + " " +t2.length)
505	if (t1.length == t2.length) {
506	return 0;
507	} else if (t1.length > t2.length) {
508	return 1;
509	} else {
510	return -1;
511	}
512	//return t1.length - t2.length);
513	}