1 |
/*---------------------------------------------------------------------------- |
2 |
* JavaScript for webhelp search |
3 |
*---------------------------------------------------------------------------- |
4 |
This file is part of the webhelpsearch plugin for DocBook WebHelp |
5 |
Copyright (c) 2007-2008 NexWave Solutions All Rights Reserved. |
6 |
www.nexwave.biz Nadege Quaine |
7 |
http://kasunbg.blogspot.com/ Kasun Gajasinghe |
8 |
*/ |
9 |
|
10 |
//string initialization |
11 |
var htmlfileList = "htmlFileList.js"; |
12 |
var htmlfileinfoList = "htmlFileInfoList.js"; |
13 |
var useCJKTokenizing = false; |
14 |
|
15 |
/* Cette fonction verifie la validite de la recherche entrre par l utilisateur */ |
16 |
function Verifie(ditaSearch_Form) { |
17 |
|
18 |
// Check browser compatibitily |
19 |
if (navigator.userAgent.indexOf("Konquerer") > -1) { |
20 |
|
21 |
alert(txt_browser_not_supported); |
22 |
return; |
23 |
} |
24 |
|
25 |
|
26 |
var expressionInput = document.ditaSearch_Form.textToSearch.value; |
27 |
//Set a cookie to store the searched keywords |
28 |
$.cookie('textToSearch', expressionInput); |
29 |
|
30 |
|
31 |
if (expressionInput.length < 1) { |
32 |
|
33 |
// expression is invalid |
34 |
alert(txt_enter_at_least_1_char); |
35 |
// reactive la fenetre de search (utile car cadres) |
36 |
document.ditaSearch_Form.textToSearch.focus(); |
37 |
} |
38 |
else { |
39 |
|
40 |
// Effectuer la recherche |
41 |
Effectuer_recherche(expressionInput); |
42 |
|
43 |
// reactive la fenetre de search (utile car cadres) |
44 |
document.ditaSearch_Form.textToSearch.focus(); |
45 |
} |
46 |
} |
47 |
|
48 |
var stemQueryMap = new Array(); // A hashtable which maps stems to query words |
49 |
|
50 |
/* This function parses the search expression, loads the indices and displays the results*/ |
51 |
function Effectuer_recherche(expressionInput) { |
52 |
|
53 |
/* Display a waiting message */ |
54 |
//DisplayWaitingMessage(); |
55 |
|
56 |
/*data initialisation*/ |
57 |
var searchFor = ""; // expression en lowercase et sans les caracte res speciaux |
58 |
//w = new Object(); // hashtable, key=word, value = list of the index of the html files |
59 |
scriptLetterTab = new Scriptfirstchar(); // Array containing the first letter of each word to look for |
60 |
var wordsList = new Array(); // Array with the words to look for |
61 |
var finalWordsList = new Array(); // Array with the words to look for after removing spaces |
62 |
var linkTab = new Array(); |
63 |
var fileAndWordList = new Array(); |
64 |
var txt_wordsnotfound = ""; |
65 |
|
66 |
|
67 |
/*nqu: expressionInput, la recherche est lower cased, plus remplacement des char speciaux*/ |
68 |
searchFor = expressionInput.toLowerCase().replace(/<\//g, "_st_").replace(/\$_/g, "_di_").replace(/\.|%2C|%3B|%21|%3A|@|\/|\*/g, " ").replace(/(%20)+/g, " ").replace(/_st_/g, "</").replace(/_di_/g, "%24_"); |
69 |
|
70 |
searchFor = searchFor.replace(/ +/g, " "); |
71 |
searchFor = searchFor.replace(/ $/, "").replace(/^ /, ""); |
72 |
|
73 |
wordsList = searchFor.split(" "); |
74 |
wordsList.sort(); |
75 |
|
76 |
//set the tokenizing method |
77 |
if(typeof indexerLanguage != "undefined" && (indexerLanguage=="zh" || indexerLanguage=="ja" ||indexerLanguage=="ko")){ |
78 |
useCJKTokenizing=true; |
79 |
} else { |
80 |
useCJKTokenizing=false; |
81 |
} |
82 |
//If Lucene CJKTokenizer was used as the indexer, then useCJKTokenizing will be true. Else, do normal tokenizing. |
83 |
// 2-gram tokenizinghappens in CJKTokenizing, |
84 |
if(useCJKTokenizing){ |
85 |
finalWordsList = cjkTokenize(wordsList); |
86 |
} else { |
87 |
finalWordsList = tokenize(wordsList); |
88 |
} |
89 |
|
90 |
//load the scripts with the indices: the following lines do not work on the server. To be corrected |
91 |
/*if (IEBrowser) { |
92 |
scriptsarray = loadTheIndexScripts (scriptLetterTab); |
93 |
} */ |
94 |
|
95 |
/** |
96 |
* Compare with the indexed words (in the w[] array), and push words that are in it to tempTab. |
97 |
*/ |
98 |
var tempTab = new Array(); |
99 |
for (var t in finalWordsList) { |
100 |
if (w[finalWordsList[t].toString()] == undefined) { |
101 |
txt_wordsnotfound += finalWordsList[t] + " "; |
102 |
} else { |
103 |
tempTab.push(finalWordsList[t]); |
104 |
} |
105 |
} |
106 |
finalWordsList = tempTab; |
107 |
|
108 |
if (finalWordsList.length) { |
109 |
|
110 |
//search 'and' and 'or' one time |
111 |
fileAndWordList = SortResults(finalWordsList); |
112 |
|
113 |
var cpt = fileAndWordList.length; |
114 |
for (var i = cpt - 1; i >= 0; i--) { |
115 |
if (fileAndWordList[i] != undefined) { |
116 |
linkTab.push("<p>" + txt_results_for + " " + "<span class=\"searchExpression\">" + fileAndWordList[i][0].motslisteDisplay + "</span>" + "</p>"); |
117 |
|
118 |
linkTab.push("<ul class='searchresult'>"); |
119 |
for (t in fileAndWordList[i]) { |
120 |
//DEBUG: alert(": "+ fileAndWordList[i][t].filenb+" " +fileAndWordList[i][t].motsliste); |
121 |
//linkTab.push("<li><a href=\"../"+fl[fileAndWordList[i][t].filenb]+"\">"+fl[fileAndWordList[i][t].filenb]+"</a></li>"); |
122 |
var tempInfo = fil[fileAndWordList[i][t].filenb]; |
123 |
var pos1 = tempInfo.indexOf("@@@"); |
124 |
var pos2 = tempInfo.lastIndexOf("@@@"); |
125 |
var tempPath = tempInfo.substring(0, pos1); |
126 |
var tempTitle = tempInfo.substring(pos1 + 3, pos2); |
127 |
var tempShortdesc = tempInfo.substring(pos2 + 3, tempInfo.length); |
128 |
|
129 |
//file:///home/kasun/docbook/WEBHELP/webhelp-draft-output-format-idea/src/main/resources/web/webhelp/installation.html |
130 |
var linkString = "<li><a href=" + tempPath + ">" + tempTitle + "</a>"; |
131 |
// var linkString = "<li><a href=\"installation.html\">" + tempTitle + "</a>"; |
132 |
if ((tempShortdesc != "null")) { |
133 |
linkString += "\n<div class=\"shortdesclink\">" + tempShortdesc + "</div>"; |
134 |
} |
135 |
linkString += "</li>"; |
136 |
linkTab.push(linkString); |
137 |
} |
138 |
linkTab.push("</ul>"); |
139 |
} |
140 |
} |
141 |
} |
142 |
|
143 |
var results = ""; |
144 |
if (linkTab.length > 0) { |
145 |
/*writeln ("<p>" + txt_results_for + " " + "<span class=\"searchExpression\">" + cleanwordsList + "</span>" + "<br/>"+"</p>");*/ |
146 |
results = "<p>"; |
147 |
//write("<ul class='searchresult'>"); |
148 |
for (t in linkTab) { |
149 |
results += linkTab[t].toString(); |
150 |
} |
151 |
results += "</p>"; |
152 |
} else { |
153 |
results = "<p>" + txt_no_results_for + "<span class=\"searchExpression\">" + txt_wordsnotfound + "</span>" + "</p>"; |
154 |
} |
155 |
//alert(results); |
156 |
document.getElementById('searchResults').innerHTML = results; |
157 |
} |
158 |
|
159 |
function tokenize(wordsList){ |
160 |
var stemmedWordsList = new Array(); // Array with the words to look for after removing spaces |
161 |
var cleanwordsList = new Array(); // Array with the words to look for |
162 |
for(var j in wordsList){ |
163 |
var word = wordsList[j]; |
164 |
if(typeof stemmer != "undefined" ){ |
165 |
stemQueryMap[stemmer(word)] = word; |
166 |
} else { |
167 |
stemQueryMap[word] = word; |
168 |
} |
169 |
} |
170 |
//stemmedWordsList is the stemmed list of words separated by spaces. |
171 |
for (var t in wordsList) { |
172 |
wordsList[t] = wordsList[t].replace(/(%22)|^-/g, ""); |
173 |
if (wordsList[t] != "%20") { |
174 |
scriptLetterTab.add(wordsList[t].charAt(0)); |
175 |
cleanwordsList.push(wordsList[t]); |
176 |
} |
177 |
} |
178 |
|
179 |
if(typeof stemmer != "undefined" ){ |
180 |
//Do the stemming using Porter's stemming algorithm |
181 |
for (var i = 0; i < cleanwordsList.length; i++) { |
182 |
var stemWord = stemmer(cleanwordsList[i]); |
183 |
stemmedWordsList.push(stemWord); |
184 |
} |
185 |
} else { |
186 |
stemmedWordsList = cleanwordsList; |
187 |
} |
188 |
return stemmedWordsList; |
189 |
} |
190 |
|
191 |
//Invoker of CJKTokenizer class methods. |
192 |
function cjkTokenize(wordsList){ |
193 |
var allTokens= new Array(); |
194 |
var notCJKTokens= new Array(); |
195 |
var j=0; |
196 |
for(j=0;j<wordsList.length;j++){ |
197 |
var word = wordsList[j]; |
198 |
if(getAvgAsciiValue(word) < 127){ |
199 |
notCJKTokens.push(word); |
200 |
} else { |
201 |
var tokenizer = new CJKTokenizer(word); |
202 |
var tokensTmp = tokenizer.getAllTokens(); |
203 |
allTokens = allTokens.concat(tokensTmp); |
204 |
} |
205 |
} |
206 |
allTokens = allTokens.concat(tokenize(notCJKTokens)); |
207 |
return allTokens; |
208 |
} |
209 |
|
210 |
//A simple way to determine whether the query is in english or not. |
211 |
function getAvgAsciiValue(word){ |
212 |
var tmp = 0; |
213 |
var num = word.length < 5 ? word.length:5; |
214 |
for(var i=0;i<num;i++){ |
215 |
if(i==5) break; |
216 |
tmp += word.charCodeAt(i); |
217 |
} |
218 |
return tmp/num; |
219 |
} |
220 |
|
221 |
//CJKTokenizer |
222 |
function CJKTokenizer(input){ |
223 |
this.input = input; |
224 |
this.offset=-1; |
225 |
this.tokens = new Array(); |
226 |
this.incrementToken = incrementToken; |
227 |
this.tokenize = tokenize; |
228 |
this.getAllTokens = getAllTokens; |
229 |
this.unique = unique; |
230 |
|
231 |
function incrementToken(){ |
232 |
if(this.input.length - 2 <= this.offset){ |
233 |
// console.log("false "+offset); |
234 |
return false; |
235 |
} |
236 |
else { |
237 |
this.offset+=1; |
238 |
return true; |
239 |
} |
240 |
} |
241 |
|
242 |
function tokenize(){ |
243 |
//document.getElementById("content").innerHTML += x.substring(offset,offset+2)+"<br>"; |
244 |
return this.input.substring(this.offset,this.offset+2); |
245 |
} |
246 |
|
247 |
function getAllTokens(){ |
248 |
while(this.incrementToken()){ |
249 |
var tmp = this.tokenize(); |
250 |
this.tokens.push(tmp); |
251 |
} |
252 |
return this.unique(this.tokens); |
253 |
// document.getElementById("content").innerHTML += tokens+" "; |
254 |
// document.getElementById("content").innerHTML += "<br>dada"+sortedTokens+" "; |
255 |
// console.log(tokens.length+"dsdsds"); |
256 |
/*for(i=0;i<tokens.length;i++){ |
257 |
console.log(tokens[i]); |
258 |
var ss = tokens[i] == sortedTokens[i]; |
259 |
|
260 |
// document.getElementById("content").innerHTML += "<br>dada"+un[i]+"- "+stems[i]+" "+ ss; |
261 |
document.getElementById("content").innerHTML += "<br>"+sortedTokens[i]; |
262 |
}*/ |
263 |
} |
264 |
|
265 |
function unique(a) |
266 |
{ |
267 |
var r = new Array(); |
268 |
o:for(var i = 0, n = a.length; i < n; i++) |
269 |
{ |
270 |
for(var x = 0, y = r.length; x < y; x++) |
271 |
{ |
272 |
if(r[x]==a[i]) continue o; |
273 |
} |
274 |
r[r.length] = a[i]; |
275 |
} |
276 |
return r; |
277 |
} |
278 |
} |
279 |
|
280 |
|
281 |
/* Scriptfirstchar: to gather the first letter of index js files to upload */ |
282 |
function Scriptfirstchar() { |
283 |
this.strLetters = ""; |
284 |
this.add = addLettre; |
285 |
} |
286 |
|
287 |
function addLettre(caract) { |
288 |
|
289 |
if (this.strLetters == 'undefined') { |
290 |
this.strLetters = caract; |
291 |
} else if (this.strLetters.indexOf(caract) < 0) { |
292 |
this.strLetters += caract; |
293 |
} |
294 |
|
295 |
return 0; |
296 |
} |
297 |
/* end of scriptfirstchar */ |
298 |
|
299 |
/*main loader function*/ |
300 |
/*tab contains the first letters of each word looked for*/ |
301 |
function loadTheIndexScripts(tab) { |
302 |
|
303 |
//alert (tab.strLetters); |
304 |
var scriptsarray = new Array(); |
305 |
|
306 |
for (var i = 0; i < tab.strLetters.length; i++) { |
307 |
|
308 |
scriptsarray[i] = "..\/search" + "\/" + tab.strLetters.charAt(i) + ".js"; |
309 |
} |
310 |
// add the list of html files |
311 |
i++; |
312 |
scriptsarray[i] = "..\/search" + "\/" + htmlfileList; |
313 |
|
314 |
//debug |
315 |
for (var t in scriptsarray) { |
316 |
//alert (scriptsarray[t]); |
317 |
} |
318 |
|
319 |
tab = new ScriptLoader(); |
320 |
for (t in scriptsarray) { |
321 |
tab.add(scriptsarray[t]); |
322 |
} |
323 |
tab.load(); |
324 |
//alert ("scripts loaded"); |
325 |
return (scriptsarray); |
326 |
} |
327 |
|
328 |
/* ScriptLoader: to load the scripts and wait that it's finished */ |
329 |
function ScriptLoader() { |
330 |
this.cpt = 0; |
331 |
this.scriptTab = new Array(); |
332 |
this.add = addAScriptInTheList; |
333 |
this.load = loadTheScripts; |
334 |
this.onScriptLoaded = onScriptLoadedFunc; |
335 |
} |
336 |
|
337 |
function addAScriptInTheList(scriptPath) { |
338 |
this.scriptTab.push(scriptPath); |
339 |
} |
340 |
|
341 |
function loadTheScripts() { |
342 |
var script; |
343 |
var head; |
344 |
|
345 |
head = document.getElementsByTagName('head').item(0); |
346 |
|
347 |
//script = document.createElement('script'); |
348 |
|
349 |
for (var el in this.scriptTab) { |
350 |
//alert (el+this.scriptTab[el]); |
351 |
script = document.createElement('script'); |
352 |
script.src = this.scriptTab[el]; |
353 |
script.type = 'text/javascript'; |
354 |
script.defer = false; |
355 |
|
356 |
head.appendChild(script); |
357 |
} |
358 |
|
359 |
} |
360 |
|
361 |
function onScriptLoadedFunc(e) { |
362 |
e = e || window.event; |
363 |
var target = e.target || e.srcElement; |
364 |
var isComplete = true; |
365 |
if (typeof target.readyState != undefined) { |
366 |
|
367 |
isComplete = (target.readyState == "complete" || target.readyState == "loaded"); |
368 |
} |
369 |
if (isComplete) { |
370 |
ScriptLoader.cpt++; |
371 |
if (ScriptLoader.cpt == ScriptLoader.scripts.length) { |
372 |
ScriptLoader.onLoadComplete(); |
373 |
} |
374 |
} |
375 |
} |
376 |
|
377 |
/* |
378 |
function onLoadComplete() { |
379 |
alert("loaded !!"); |
380 |
} */ |
381 |
|
382 |
/* End of scriptloader functions */ |
383 |
|
384 |
// Array.unique( strict ) - Remove duplicate values |
385 |
function unique(tab) { |
386 |
var a = new Array(); |
387 |
var i; |
388 |
var l = tab.length; |
389 |
|
390 |
if (tab[0] != undefined) { |
391 |
a[0] = tab[0]; |
392 |
} |
393 |
else { |
394 |
return -1 |
395 |
} |
396 |
|
397 |
for (i = 1; i < l; i++) { |
398 |
if (indexof(a, tab[i], 0) < 0) { |
399 |
a.push(tab[i]); |
400 |
} |
401 |
} |
402 |
return a; |
403 |
} |
404 |
function indexof(tab, element, begin) { |
405 |
for (var i = begin; i < tab.length; i++) { |
406 |
if (tab[i] == element) { |
407 |
return i; |
408 |
} |
409 |
} |
410 |
return -1; |
411 |
|
412 |
} |
413 |
/* end of Array functions */ |
414 |
|
415 |
|
416 |
/* |
417 |
Param: mots= list of words to look for. |
418 |
This function creates an hashtable: |
419 |
- The key is the index of a html file which contains a word to look for. |
420 |
- The value is the list of all words contained in the html file. |
421 |
|
422 |
Return value: the hashtable fileAndWordList |
423 |
*/ |
424 |
function SortResults(mots) { |
425 |
|
426 |
var fileAndWordList = new Object(); |
427 |
if (mots.length == 0) { |
428 |
return null; |
429 |
} |
430 |
|
431 |
for (var t in mots) { |
432 |
// get the list of the indices of the files. |
433 |
var listNumerosDesFicStr = w[mots[t].toString()]; |
434 |
//alert ("listNumerosDesFicStr "+listNumerosDesFicStr); |
435 |
var tab = listNumerosDesFicStr.split(","); |
436 |
|
437 |
//for each file (file's index): |
438 |
for (var t2 in tab) { |
439 |
var temp = tab[t2].toString(); |
440 |
if (fileAndWordList[temp] == undefined) { |
441 |
|
442 |
fileAndWordList[temp] = "" + mots[t]; |
443 |
} else { |
444 |
|
445 |
fileAndWordList[temp] += "," + mots[t]; |
446 |
} |
447 |
} |
448 |
} |
449 |
|
450 |
var fileAndWordListValuesOnly = new Array(); |
451 |
|
452 |
// sort results according to values |
453 |
var temptab = new Array(); |
454 |
for (t in fileAndWordList) { |
455 |
tab = fileAndWordList[t].split(','); |
456 |
|
457 |
var tempDisplay = new Array(); |
458 |
for (var x in tab) { |
459 |
if(stemQueryMap[tab[x]] != undefined){ |
460 |
tempDisplay.push(stemQueryMap[tab[x]]); //get the original word from the stem word. |
461 |
} else { |
462 |
tempDisplay.push(tab[x]); //no stem is available. (probably a CJK language) |
463 |
} |
464 |
} |
465 |
var tempDispString = tempDisplay.join(", "); |
466 |
|
467 |
temptab.push(new resultPerFile(t, fileAndWordList[t], tab.length, tempDispString)); |
468 |
fileAndWordListValuesOnly.push(fileAndWordList[t]); |
469 |
} |
470 |
|
471 |
|
472 |
//alert("t"+fileAndWordListValuesOnly.toString()); |
473 |
|
474 |
fileAndWordListValuesOnly = unique(fileAndWordListValuesOnly); |
475 |
fileAndWordListValuesOnly = fileAndWordListValuesOnly.sort(compare_nbMots); |
476 |
//alert("t: "+fileAndWordListValuesOnly.join(';')); |
477 |
|
478 |
var listToOutput = new Array(); |
479 |
|
480 |
for (var j in fileAndWordListValuesOnly) { |
481 |
for (t in temptab) { |
482 |
if (temptab[t].motsliste == fileAndWordListValuesOnly[j]) { |
483 |
if (listToOutput[j] == undefined) { |
484 |
listToOutput[j] = new Array(temptab[t]); |
485 |
} else { |
486 |
listToOutput[j].push(temptab[t]); |
487 |
} |
488 |
} |
489 |
} |
490 |
} |
491 |
return listToOutput; |
492 |
} |
493 |
|
494 |
function resultPerFile(filenb, motsliste, motsnb, motslisteDisplay) { |
495 |
this.filenb = filenb; |
496 |
this.motsliste = motsliste; |
497 |
this.motsnb = motsnb; |
498 |
this.motslisteDisplay= motslisteDisplay; |
499 |
} |
500 |
|
501 |
function compare_nbMots(s1, s2) { |
502 |
var t1 = s1.split(','); |
503 |
var t2 = s2.split(','); |
504 |
//alert ("s1:"+t1.length + " " +t2.length) |
505 |
if (t1.length == t2.length) { |
506 |
return 0; |
507 |
} else if (t1.length > t2.length) { |
508 |
return 1; |
509 |
} else { |
510 |
return -1; |
511 |
} |
512 |
//return t1.length - t2.length); |
513 |
} |