search.php File Reference

Go to the source code of this file.

Functions

 search_results ()
 matches_text ($num)
 report_matches ()
 readInt ($file)
 readString ($file)
 readHeader ($file)
 computeIndex ($word)
 search ($file, $word, &$statsList)
 combine_results ($results, &$docs)
 normalize_ranking (&$docs)
 filter_results ($docs, &$requiredWords, &$forbiddenWords)
 compare_rank ($a, $b)
 sort_results ($docs, &$sorted)
 report_results (&$docs)
 main ()


Function Documentation

combine_results results,
&$  docs
 

Definition at line 140 of file search.php.

Referenced by main().

00141 {
00142   foreach ($results as $wordInfo)
00143   {
00144     $docsList = &$wordInfo["docs"];
00145     foreach ($docsList as $di)
00146     {
00147       $key=$di["url"];
00148       $rank=$di["rank"];
00149       if (in_array($key, array_keys($docs)))
00150       {
00151         $docs[$key]["rank"]+=$rank;
00152         $docs[$key]["rank"]*=2; // multiple matches increases rank 
00153       }
00154       else
00155       {
00156         $docs[$key] = array("url"=>$key,
00157             "name"=>$di["name"],
00158             "rank"=>$rank
00159             );
00160       }
00161       $docs[$key]["words"][] = array(
00162                "word"=>$wordInfo["word"],
00163                "match"=>$wordInfo["match"],
00164                "freq"=>$di["freq"]
00165                );
00166     }
00167   }
00168   return $docs;
00169 }

compare_rank a,
b
 

Definition at line 230 of file search.php.

00231 {
00232   if ($a["rank"] == $b["rank"]) 
00233   {
00234     return 0;
00235   }
00236   return ($a["rank"]>$b["rank"]) ? -1 : 1; 
00237 }

computeIndex word  ) 
 

Definition at line 58 of file search.php.

Referenced by search().

00059 {
00060   if (strlen($word)<2) return -1;
00061   // high char of the index
00062   $hi = ord($word{0});
00063   if ($hi==0) return -1;
00064   // low char of the index
00065   $lo = ord($word{1});
00066   if ($lo==0) return -1;
00067   // return index
00068   return $hi*256+$lo;
00069 }

filter_results docs,
&$  requiredWords,
&$  forbiddenWords
 

Definition at line 190 of file search.php.

Referenced by main().

00191 {
00192   $filteredDocs=array();
00193   while (list ($key, $val) = each ($docs)) 
00194   {
00195     $words = &$docs[$key]["words"];
00196     $copy=1; // copy entry by default
00197     if (sizeof($requiredWords)>0)
00198     {
00199       foreach ($requiredWords as $reqWord)
00200       {
00201         $found=0;
00202         foreach ($words as $wordInfo)
00203         { 
00204           $found = $wordInfo["word"]==$reqWord;
00205           if ($found) break;
00206         }
00207         if (!$found) 
00208         {
00209           $copy=0; // document contains none of the required words
00210           break;
00211         }
00212       }
00213     }
00214     if (sizeof($forbiddenWords)>0)
00215     {
00216       foreach ($words as $wordInfo)
00217       {
00218         if (in_array($wordInfo["word"],$forbiddenWords))
00219         {
00220           $copy=0; // document contains a forbidden word
00221           break;
00222         }
00223       }
00224     }
00225     if ($copy) $filteredDocs[$key]=$docs[$key];
00226   }
00227   return $filteredDocs;
00228 }

main  ) 
 

Definition at line 288 of file search.php.

References combine_results(), filter_results(), normalize_ranking(), readHeader(), report_results(), search(), and sort_results().

00289 {
00290   if(strcmp('4.1.0', phpversion()) > 0) 
00291   {
00292     die("Error: PHP version 4.1.0 or above required!");
00293   }
00294   if (!($file=fopen("search.idx","rb"))) 
00295   {
00296     die("Error: Search index file could NOT be opened!");
00297   }
00298   if (readHeader($file)!="DOXS")
00299   {
00300     die("Error: Header of index file is invalid!");
00301   }
00302   $query="";
00303   if (array_key_exists("query", $_GET))
00304   {
00305     $query=$_GET["query"];
00306   }
00307   echo "<input class=\"search\" type=\"text\" name=\"query\" value=\"$query\" size=\"20\" accesskey=\"s\"/>\n";
00308   echo "</span>\n";
00309   echo "</form>\n";
00310   echo "</div>\n";
00311   $results = array();
00312   $requiredWords = array();
00313   $forbiddenWords = array();
00314   $foundWords = array();
00315   $word=strtok($query," ");
00316   while ($word) // for each word in the search query
00317   {
00318     if (($word{0}=='+')) { $word=substr($word,1); $requiredWords[]=$word; }
00319     if (($word{0}=='-')) { $word=substr($word,1); $forbiddenWords[]=$word; }
00320     if (!in_array($word,$foundWords))
00321     {
00322       $foundWords[]=$word;
00323       search($file,$word,$results);
00324     }
00325     $word=strtok(" ");
00326   }
00327   $docs = array();
00328   combine_results($results,$docs);
00329   // filter out documents with forbidden word or that do not contain
00330   // required words
00331   $filteredDocs = filter_results($docs,$requiredWords,$forbiddenWords);
00332   // normalize rankings so they are in the range [0-100]
00333   normalize_ranking($filteredDocs);
00334   // sort the results based on rank
00335   $sorted = array();
00336   sort_results($filteredDocs,$sorted);
00337   // report results to the user
00338   report_results($sorted);
00339   fclose($file);
00340 }

Here is the call graph for this function:

matches_text num  ) 
 

Definition at line 16 of file search.php.

00017 {
00018   if ($num==0)
00019   {
00020     return "Sorry, no documents matching your query.";
00021   }
00022   else if ($num==1)
00023   {
00024     return "Found <b>1</b> document matching your query.";
00025   }
00026   else // $num>1
00027   {
00028     return "Found <b>$num</b> documents matching your query. Showing best matches first.";
00029   }
00030 }

normalize_ranking &$  docs  ) 
 

Definition at line 171 of file search.php.

Referenced by main().

00172 {
00173   $maxRank = 0.0000001;
00174   // compute maximal rank
00175   foreach ($docs as $doc) 
00176   {
00177     if ($doc["rank"]>$maxRank)
00178     {
00179       $maxRank=$doc["rank"];
00180     }
00181   }
00182   reset($docs);
00183   // normalize rankings
00184   while (list ($key, $val) = each ($docs)) 
00185   {
00186     $docs[$key]["rank"]*=100/$maxRank;
00187   }
00188 }

readHeader file  ) 
 

Definition at line 51 of file search.php.

Referenced by main().

00052 {
00053     $header =fgetc($file); $header.=fgetc($file);
00054     $header.=fgetc($file); $header.=fgetc($file);
00055     return $header;
00056 }

readInt file  ) 
 

Definition at line 37 of file search.php.

Referenced by search().

00038 {
00039   $b1 = ord(fgetc($file)); $b2 = ord(fgetc($file));
00040   $b3 = ord(fgetc($file)); $b4 = ord(fgetc($file));
00041   return ($b1<<24)|($b2<<16)|($b3<<8)|$b4;
00042 }

readString file  ) 
 

Definition at line 44 of file search.php.

Referenced by search().

00045 {
00046   $result="";
00047   while (ord($c=fgetc($file))) $result.=$c;
00048   return $result;
00049 }

report_matches  ) 
 

Definition at line 32 of file search.php.

00033 {
00034  return "Matches: ";
00035 }

report_results &$  docs  ) 
 

Definition at line 246 of file search.php.

Referenced by main().

00247 {
00248   echo "<table cellspacing=\"2\">\n";
00249   echo "  <tr>\n";
00250   echo "    <td colspan=\"2\"><h2>".search_results()."</h2></td>\n";
00251   echo "  </tr>\n";
00252   $numDocs = sizeof($docs);
00253   if ($numDocs==0)
00254   {
00255     echo "  <tr>\n";
00256     echo "    <td colspan=\"2\">".matches_text(0)."</td>\n";
00257     echo "  </tr>\n";
00258   }
00259   else
00260   {
00261     echo "  <tr>\n";
00262     echo "    <td colspan=\"2\">".matches_text($numDocs);
00263     echo "\n";
00264     echo "    </td>\n";
00265     echo "  </tr>\n";
00266     $num=1;
00267     foreach ($docs as $doc)
00268     {
00269       echo "  <tr>\n";
00270       echo "    <td align=\"right\">$num.</td>";
00271       echo     "<td><a class=\"el\" href=\"".$doc["url"]."\">".$doc["name"]."</a></td>\n";
00272       echo "  <tr>\n";
00273       echo "    <td></td><td class=\"tiny\">".report_matches()." ";
00274       foreach ($doc["words"] as $wordInfo)
00275       {
00276         $word = $wordInfo["word"];
00277         $matchRight = substr($wordInfo["match"],strlen($word));
00278         echo "<b>$word</b>$matchRight(".$wordInfo["freq"].") ";
00279       }
00280       echo "    </td>\n";
00281       echo "  </tr>\n";
00282       $num++;
00283     }
00284   }
00285   echo "</table>\n";
00286 }

search file,
word,
&$  statsList
 

Definition at line 71 of file search.php.

References computeIndex(), readInt(), and readString().

Referenced by main().

00072 {
00073   $index = computeIndex($word);
00074   if ($index!=-1) // found a valid index
00075   {
00076     fseek($file,$index*4+4); // 4 bytes per entry, skip header
00077     $index = readInt($file);
00078     if ($index) // found words matching first two characters
00079     {
00080       $start=sizeof($statsList);
00081       $count=$start;
00082       fseek($file,$index);
00083       $w = readString($file);
00084       while ($w)
00085       {
00086         $statIdx = readInt($file);
00087         if ($word==substr($w,0,strlen($word)))
00088         { // found word that matches (as substring)
00089           $statsList[$count++]=array(
00090               "word"=>$word,
00091               "match"=>$w,
00092               "index"=>$statIdx,
00093               "full"=>strlen($w)==strlen($word),
00094               "docs"=>array()
00095               );
00096         }
00097         $w = readString($file);
00098       }
00099       $totalFreq=0;
00100       for ($count=$start;$count<sizeof($statsList);$count++)
00101       {
00102         $statInfo = &$statsList[$count];
00103         fseek($file,$statInfo["index"]); 
00104         $numDocs = readInt($file);
00105         $docInfo = array();
00106         // read docs info + occurrence frequency of the word
00107         for ($i=0;$i<$numDocs;$i++)
00108         {
00109           $idx=readInt($file); 
00110           $freq=readInt($file); 
00111           $docInfo[$i]=array("idx"=>$idx,"freq"=>$freq,"rank"=>0.0);
00112           $totalFreq+=$freq;
00113           if ($statInfo["full"]) $totalFreq+=$freq;
00114         }
00115         // read name an url info for the doc
00116         for ($i=0;$i<$numDocs;$i++)
00117         {
00118           fseek($file,$docInfo[$i]["idx"]);
00119           $docInfo[$i]["name"]=readString($file);
00120           $docInfo[$i]["url"]=readString($file);
00121         }
00122         $statInfo["docs"]=$docInfo;
00123       }
00124       for ($count=$start;$count<sizeof($statsList);$count++)
00125       {
00126         $statInfo = &$statsList[$count];
00127         for ($i=0;$i<sizeof($statInfo["docs"]);$i++)
00128         {
00129           $docInfo = &$statInfo["docs"];
00130           // compute frequency rank of the word in each doc
00131           $statInfo["docs"][$i]["rank"]=
00132             (float)$docInfo[$i]["freq"]/$totalFreq;
00133         }
00134       }
00135     }
00136   }
00137   return $statsList;
00138 }

Here is the call graph for this function:

search_results  ) 
 

Definition at line 11 of file search.php.

00012 {
00013   return "Search Results";
00014 }

sort_results docs,
&$  sorted
 

Definition at line 239 of file search.php.

Referenced by main().

00240 {
00241   $sorted = $docs;
00242   usort($sorted,"compare_rank");
00243   return $sorted;
00244 }


Generated on Tue Feb 17 09:06:18 2004 for eCos EDOSK-2674 HAL by doxygen 1.3.5