Go to the source code of this file.
Functions | |
| search_results () | |
| matches_text ($num) | |
| report_matches () | |
| readInt ($file) | |
| readString ($file) | |
| readHeader ($file) | |
| computeIndex ($word) | |
| search ($file, $word, &$statsList) | |
| combine_results ($results, &$docs) | |
| normalize_ranking (&$docs) | |
| filter_results ($docs, &$requiredWords, &$forbiddenWords) | |
| compare_rank ($a, $b) | |
| sort_results ($docs, &$sorted) | |
| report_results (&$docs) | |
| main () | |
|
||||||||||||
|
Definition at line 140 of file search.php. Referenced by main().
00141 {
00142 foreach ($results as $wordInfo)
00143 {
00144 $docsList = &$wordInfo["docs"];
00145 foreach ($docsList as $di)
00146 {
00147 $key=$di["url"];
00148 $rank=$di["rank"];
00149 if (in_array($key, array_keys($docs)))
00150 {
00151 $docs[$key]["rank"]+=$rank;
00152 $docs[$key]["rank"]*=2; // multiple matches increases rank
00153 }
00154 else
00155 {
00156 $docs[$key] = array("url"=>$key,
00157 "name"=>$di["name"],
00158 "rank"=>$rank
00159 );
00160 }
00161 $docs[$key]["words"][] = array(
00162 "word"=>$wordInfo["word"],
00163 "match"=>$wordInfo["match"],
00164 "freq"=>$di["freq"]
00165 );
00166 }
00167 }
00168 return $docs;
00169 }
|
|
||||||||||||
|
Definition at line 230 of file search.php.
00231 {
00232 if ($a["rank"] == $b["rank"])
00233 {
00234 return 0;
00235 }
00236 return ($a["rank"]>$b["rank"]) ? -1 : 1;
00237 }
|
|
|
Definition at line 58 of file search.php. Referenced by search().
00059 {
00060 if (strlen($word)<2) return -1;
00061 // high char of the index
00062 $hi = ord($word{0});
00063 if ($hi==0) return -1;
00064 // low char of the index
00065 $lo = ord($word{1});
00066 if ($lo==0) return -1;
00067 // return index
00068 return $hi*256+$lo;
00069 }
|
|
||||||||||||||||
|
Definition at line 190 of file search.php. Referenced by main().
00191 {
00192 $filteredDocs=array();
00193 while (list ($key, $val) = each ($docs))
00194 {
00195 $words = &$docs[$key]["words"];
00196 $copy=1; // copy entry by default
00197 if (sizeof($requiredWords)>0)
00198 {
00199 foreach ($requiredWords as $reqWord)
00200 {
00201 $found=0;
00202 foreach ($words as $wordInfo)
00203 {
00204 $found = $wordInfo["word"]==$reqWord;
00205 if ($found) break;
00206 }
00207 if (!$found)
00208 {
00209 $copy=0; // document contains none of the required words
00210 break;
00211 }
00212 }
00213 }
00214 if (sizeof($forbiddenWords)>0)
00215 {
00216 foreach ($words as $wordInfo)
00217 {
00218 if (in_array($wordInfo["word"],$forbiddenWords))
00219 {
00220 $copy=0; // document contains a forbidden word
00221 break;
00222 }
00223 }
00224 }
00225 if ($copy) $filteredDocs[$key]=$docs[$key];
00226 }
00227 return $filteredDocs;
00228 }
|
|
|
Definition at line 288 of file search.php. References combine_results(), filter_results(), normalize_ranking(), readHeader(), report_results(), search(), and sort_results().
00289 {
00290 if(strcmp('4.1.0', phpversion()) > 0)
00291 {
00292 die("Error: PHP version 4.1.0 or above required!");
00293 }
00294 if (!($file=fopen("search.idx","rb")))
00295 {
00296 die("Error: Search index file could NOT be opened!");
00297 }
00298 if (readHeader($file)!="DOXS")
00299 {
00300 die("Error: Header of index file is invalid!");
00301 }
00302 $query="";
00303 if (array_key_exists("query", $_GET))
00304 {
00305 $query=$_GET["query"];
00306 }
00307 echo "<input class=\"search\" type=\"text\" name=\"query\" value=\"$query\" size=\"20\" accesskey=\"s\"/>\n";
00308 echo "</span>\n";
00309 echo "</form>\n";
00310 echo "</div>\n";
00311 $results = array();
00312 $requiredWords = array();
00313 $forbiddenWords = array();
00314 $foundWords = array();
00315 $word=strtok($query," ");
00316 while ($word) // for each word in the search query
00317 {
00318 if (($word{0}=='+')) { $word=substr($word,1); $requiredWords[]=$word; }
00319 if (($word{0}=='-')) { $word=substr($word,1); $forbiddenWords[]=$word; }
00320 if (!in_array($word,$foundWords))
00321 {
00322 $foundWords[]=$word;
00323 search($file,$word,$results);
00324 }
00325 $word=strtok(" ");
00326 }
00327 $docs = array();
00328 combine_results($results,$docs);
00329 // filter out documents with forbidden word or that do not contain
00330 // required words
00331 $filteredDocs = filter_results($docs,$requiredWords,$forbiddenWords);
00332 // normalize rankings so they are in the range [0-100]
00333 normalize_ranking($filteredDocs);
00334 // sort the results based on rank
00335 $sorted = array();
00336 sort_results($filteredDocs,$sorted);
00337 // report results to the user
00338 report_results($sorted);
00339 fclose($file);
00340 }
|
Here is the call graph for this function:

|
|
Definition at line 16 of file search.php.
00017 {
00018 if ($num==0)
00019 {
00020 return "Sorry, no documents matching your query.";
00021 }
00022 else if ($num==1)
00023 {
00024 return "Found <b>1</b> document matching your query.";
00025 }
00026 else // $num>1
00027 {
00028 return "Found <b>$num</b> documents matching your query. Showing best matches first.";
00029 }
00030 }
|
|
|
Definition at line 171 of file search.php. Referenced by main().
00172 {
00173 $maxRank = 0.0000001;
00174 // compute maximal rank
00175 foreach ($docs as $doc)
00176 {
00177 if ($doc["rank"]>$maxRank)
00178 {
00179 $maxRank=$doc["rank"];
00180 }
00181 }
00182 reset($docs);
00183 // normalize rankings
00184 while (list ($key, $val) = each ($docs))
00185 {
00186 $docs[$key]["rank"]*=100/$maxRank;
00187 }
00188 }
|
|
|
Definition at line 51 of file search.php. Referenced by main().
00052 {
00053 $header =fgetc($file); $header.=fgetc($file);
00054 $header.=fgetc($file); $header.=fgetc($file);
00055 return $header;
00056 }
|
|
|
Definition at line 37 of file search.php. Referenced by search().
00038 {
00039 $b1 = ord(fgetc($file)); $b2 = ord(fgetc($file));
00040 $b3 = ord(fgetc($file)); $b4 = ord(fgetc($file));
00041 return ($b1<<24)|($b2<<16)|($b3<<8)|$b4;
00042 }
|
|
|
Definition at line 44 of file search.php. Referenced by search().
00045 {
00046 $result="";
00047 while (ord($c=fgetc($file))) $result.=$c;
00048 return $result;
00049 }
|
|
|
Definition at line 32 of file search.php.
00033 {
00034 return "Matches: ";
00035 }
|
|
|
Definition at line 246 of file search.php. Referenced by main().
00247 {
00248 echo "<table cellspacing=\"2\">\n";
00249 echo " <tr>\n";
00250 echo " <td colspan=\"2\"><h2>".search_results()."</h2></td>\n";
00251 echo " </tr>\n";
00252 $numDocs = sizeof($docs);
00253 if ($numDocs==0)
00254 {
00255 echo " <tr>\n";
00256 echo " <td colspan=\"2\">".matches_text(0)."</td>\n";
00257 echo " </tr>\n";
00258 }
00259 else
00260 {
00261 echo " <tr>\n";
00262 echo " <td colspan=\"2\">".matches_text($numDocs);
00263 echo "\n";
00264 echo " </td>\n";
00265 echo " </tr>\n";
00266 $num=1;
00267 foreach ($docs as $doc)
00268 {
00269 echo " <tr>\n";
00270 echo " <td align=\"right\">$num.</td>";
00271 echo "<td><a class=\"el\" href=\"".$doc["url"]."\">".$doc["name"]."</a></td>\n";
00272 echo " <tr>\n";
00273 echo " <td></td><td class=\"tiny\">".report_matches()." ";
00274 foreach ($doc["words"] as $wordInfo)
00275 {
00276 $word = $wordInfo["word"];
00277 $matchRight = substr($wordInfo["match"],strlen($word));
00278 echo "<b>$word</b>$matchRight(".$wordInfo["freq"].") ";
00279 }
00280 echo " </td>\n";
00281 echo " </tr>\n";
00282 $num++;
00283 }
00284 }
00285 echo "</table>\n";
00286 }
|
|
||||||||||||||||
|
Definition at line 71 of file search.php. References computeIndex(), readInt(), and readString(). Referenced by main().
00072 {
00073 $index = computeIndex($word);
00074 if ($index!=-1) // found a valid index
00075 {
00076 fseek($file,$index*4+4); // 4 bytes per entry, skip header
00077 $index = readInt($file);
00078 if ($index) // found words matching first two characters
00079 {
00080 $start=sizeof($statsList);
00081 $count=$start;
00082 fseek($file,$index);
00083 $w = readString($file);
00084 while ($w)
00085 {
00086 $statIdx = readInt($file);
00087 if ($word==substr($w,0,strlen($word)))
00088 { // found word that matches (as substring)
00089 $statsList[$count++]=array(
00090 "word"=>$word,
00091 "match"=>$w,
00092 "index"=>$statIdx,
00093 "full"=>strlen($w)==strlen($word),
00094 "docs"=>array()
00095 );
00096 }
00097 $w = readString($file);
00098 }
00099 $totalFreq=0;
00100 for ($count=$start;$count<sizeof($statsList);$count++)
00101 {
00102 $statInfo = &$statsList[$count];
00103 fseek($file,$statInfo["index"]);
00104 $numDocs = readInt($file);
00105 $docInfo = array();
00106 // read docs info + occurrence frequency of the word
00107 for ($i=0;$i<$numDocs;$i++)
00108 {
00109 $idx=readInt($file);
00110 $freq=readInt($file);
00111 $docInfo[$i]=array("idx"=>$idx,"freq"=>$freq,"rank"=>0.0);
00112 $totalFreq+=$freq;
00113 if ($statInfo["full"]) $totalFreq+=$freq;
00114 }
00115 // read name an url info for the doc
00116 for ($i=0;$i<$numDocs;$i++)
00117 {
00118 fseek($file,$docInfo[$i]["idx"]);
00119 $docInfo[$i]["name"]=readString($file);
00120 $docInfo[$i]["url"]=readString($file);
00121 }
00122 $statInfo["docs"]=$docInfo;
00123 }
00124 for ($count=$start;$count<sizeof($statsList);$count++)
00125 {
00126 $statInfo = &$statsList[$count];
00127 for ($i=0;$i<sizeof($statInfo["docs"]);$i++)
00128 {
00129 $docInfo = &$statInfo["docs"];
00130 // compute frequency rank of the word in each doc
00131 $statInfo["docs"][$i]["rank"]=
00132 (float)$docInfo[$i]["freq"]/$totalFreq;
00133 }
00134 }
00135 }
00136 }
00137 return $statsList;
00138 }
|
Here is the call graph for this function:

|
|
Definition at line 11 of file search.php.
00012 {
00013 return "Search Results";
00014 }
|
|
||||||||||||
|
Definition at line 239 of file search.php. Referenced by main().
00240 {
00241 $sorted = $docs;
00242 usort($sorted,"compare_rank");
00243 return $sorted;
00244 }
|
1.3.5