<?

// -----------------------------------------------
// ParseDocText($text)
// -----------------------------------------------
function ParseDocText($text){
    global $all_words;
    global $stopword_arr;
    global $highlight_stopwords;
    global $hlbeg, $hlend;
       
    $str=$text;
    for ($i=0; $i<count($all_words); $i++) {
	$word=$all_words[$i];
	$str = preg_replace("/([\s\t\r\n\~\!\@\#\$\%\^\&\*\(\)\-\_\=\+\\\|\{\}\[\]\;\:\'\"\<\>\?\/\,\.]+)($word)/i","\\1$hlbeg\\2$hlend",$str);
	$str = preg_replace("/^($word)/i","$hlbeg\\1$hlend",$str);
    }

    if ($highlight_stopwords == 'yes') {
	reset($stopword_arr);
	while(list($word,$temp)=each($stopword_arr)) {
    		$str = preg_replace("/([\s\t\r\n\~\!\@\#\$\%\^\&\*\(\)\-\_\=\+\\\|\{\}\[\]\;\:\'\"\<\>\?\/\,\.]+)($word)/i","\\1$hlbeg\\2$hlend",$str);
		$str = preg_replace("/^($word)/i","$hlbeg\\1$hlend",$str);
 	}
    }

    return $str;
}

// -----------------------------------------------
// ParseStr($qwe)
// -----------------------------------------------
function ParseStr($qwe) {
  global $DEFAULT_QUERY_TYPE;
  global $DEBUG;
  global $rus_bool_lang;

  if ($DEBUG) echo "Begin ParseStr(): qwe=$qwe<BR><HR>";

  $qwe=preg_replace("/[\!\@\#\$\%\^\*\-\_\+\=\\\{\}\[\]\;\'\:\"\<\>\?\/\,\.]{1,}/","&",$qwe);
  $qwe=trim($qwe);

  // query language normalizer  

  if ($rus_bool_lang == 'yes') {
  	$qwe=eregi_replace(" {0,}\| {0,}| {1,}or {1,}| {1,} {1,}","|",$qwe);
  	$qwe=eregi_replace(" {0,}\& {0,}| {1,}and {1,}| {1,} {1,}","&",$qwe);
  	$qwe=eregi_replace(" {0,}\~ {0,}| {1,}not {1,}| {1,}without {1,}| {1,} {1,}| {1,} {1,}","~",$qwe);
  } else {
  	$qwe=eregi_replace(" {0,}\| {0,}| {1,}or {1,}","|",$qwe);
  	$qwe=eregi_replace(" {0,}\& {0,}| {1,}and {1,}","&",$qwe);
  	$qwe=eregi_replace(" {0,}\~ {0,}| {1,}not {1,}| {1,}without {1,}","~",$qwe);
  }
  
  $qwe=ereg_replace(" {0,}\( {0,}","(",$qwe);
  $qwe=ereg_replace(" {0,}\) {0,}",")",$qwe);
  
  // default query type is and
  if (strtolower($DEFAULT_QUERY_TYPE) == 'or') {
      $qwe=ereg_replace(" {1,}","|",$qwe);
      $qwe=ereg_replace("\&\|{1,}","|",$qwe);
      $qwe=ereg_replace("\|\&{1,}","|",$qwe);
  } else {
      $qwe=ereg_replace(" {1,}","&",$qwe);    
      $qwe=ereg_replace("\&\|{1,}","&",$qwe);
      $qwe=ereg_replace("\|\&{1,}","&",$qwe);
  }      

  // remove unnesessary boolean operators
  $qwe=ereg_replace("\|{1,}","|",$qwe);
  $qwe=ereg_replace("&{1,}","&",$qwe);    
  $qwe=ereg_replace("~{1,}","~",$qwe);    
  $qwe=ereg_replace("\|\&\|","&",$qwe);
  $qwe=ereg_replace("[\|\&\~]{1,}$","",$qwe);
  $qwe=ereg_replace("^[\|\&]{1,}","",$qwe);

  // transform "w1 ~w2" -> "w1 default_op ~ w2"
  // ") ~w" -> ") default_op ~w"
  // ") w" -> ") default_op w"
  // "w (" -> "w default_op ("
  // ")(" -> ") default_op ("
  if (strtolower($DEFAULT_QUERY_TYPE) == 'or') {
  	$qwe=ereg_replace("([^\&\|\(\)]+)~([^\&\|\(\)]+)","\\1|~\\2",$qwe);
        $qwe=ereg_replace("\)~{1,}",")|~",$qwe);
        $qwe=ereg_replace("\)([^\&\|\(\)]+)",")|\\1",$qwe);
        $qwe=ereg_replace("([^\&\|\(\)]+)\(","\\1|(",$qwe);
        $qwe=ereg_replace("\) *\(",")|(",$qwe);
  } else {
  	$qwe=ereg_replace("([^\&\|\(\)]+)~([^\&\|\(\)]+)","\\1&~\\2",$qwe);
        $qwe=ereg_replace("\)~{1,}",")&~",$qwe);
        $qwe=ereg_replace("\)([^\&\|\(\)]+)",")&\\1",$qwe);
        $qwe=ereg_replace("([^\&\|\(\)]+)\(","\\1&(",$qwe);
        $qwe=ereg_replace("\) *\(",")&(",$qwe);
  }
  
  // remove errornous format of query - ie: '(&', '&)', '(|', '|)', '~&', '~|', '~)'
  $qwe=ereg_replace("\(\&{1,}","(",$qwe);
  $qwe=ereg_replace("\&{1,}\)",")",$qwe);
  $qwe=ereg_replace("\~{1,}\)",")",$qwe);
  $qwe=ereg_replace("\(\|{1,}","(",$qwe);
  $qwe=ereg_replace("\|{1,}\)",")",$qwe);
  $qwe=ereg_replace("\~{1,}\&{1,}","&",$qwe);
  $qwe=ereg_replace("\~{1,}\|{1,}","|",$qwe);

  if ($DEBUG) echo "End ParseStr(): qwe=$qwe<BR><HR>";

  return($qwe);
}

// -----------------------------------------------
// ParseQ($q)
// -----------------------------------------------
function ParseQ($q){
   global $DEBUG;

   if ($DEBUG) echo "Begin ParseQ(): q=$q<BR>";

   if (ereg_replace(" ","",$q)==''){
   	return '';
   }               
   
   $q=ParseStr($q);
   
   $q=eregi_replace("\&"," && ",$q);
   $q=eregi_replace("\|"," || ",$q);
   $q=eregi_replace("\~"," ! ",$q);
   $q=ereg_replace("\("," ( ",$q);
   $q=ereg_replace("\)"," ) ",$q);
   $q="( $q )"; 
   $q=ereg_replace(" {1,}"," ",$q);

   if ($DEBUG) echo "End ParseQ(): q=$q<BR>";

   return $q;
}

// -----------------------------------------------
// last_parse($q)
// -----------------------------------------------
function last_parse($q){
   global $all_words;
   global $dbtype;
   global $dbmode;
   global $db_format;
   global $ul, $ue, $uestr, $ulstr;
   global $tagstr, $timestr, $catstr,$langstr;
   global $DEBUG;
   global $temp_table;
   global $final_word;
   global $ispellmode;
   global $wordsinfo;
   global $soundex_suggestions,$soundex;

   global $last_mod_field;
   global $crc_field;
 
   $state=0;
   $qu="";
   $w=""; 
   $n=0;

   $t=strtok($q," ");
   $words=array();
   
   if (($dbmode == 'crc') ||
       ($dbmode == 'crc-multi')) {
       $word_field = 'word_id';
       $word_table = 'ndict';
       $word_type = 'INT';
       $str_sep = "";
   } else {
       $word_field = 'word';
       $word_table = 'dict';
       $word_type = 'VARCHAR(32)';
       $str_sep = "'";
   }   
   
   while(($t!="")&&($error=="")){

     switch($state){
       case 0;  
       
       if(($t=="||") || 
          ($t=="&&") || 
	  ($t==")")) $error="at '$t'";
	  
       else if ($t=="!") { 
         $state=0;
         $qu="$qu NOT "; break;
       } else if($t=="("){
         $n++;
         $state=0;
         $qu="$qu(";
       } else {
         $state=1;
	 $t=strtolower($t);
	 if (!is_stopword($t)) {
  	    normalize_word($t);
            $words[]=$t;
            $qu="$qu %$t% ";
         } else {
            $qu="$qu 1=1 ";	
            $wordsinfo .= "<b>$t</b>: stopword; ";
         }
       }
       break;
       
       case 1;  
       if(($t=="||")||($t=="&&")){
         $state=0;
         if ($t=='||') $qu="$qu OR ";
         else $qu="$qu AND ";
       }
       else if($t==")") {
         $n--;
         $state=1;
         $qu="$qu)";
       }
       else
         $error=" at '$t'";
       break;
     }
     $t=strtok(" ");
   }
   
   if($error=="" && $n != 0) $error='Unmatched brackets';
   if($error!="") print_error_local($error);

   if (eregi('db',$ispellmode) || eregi('text',$ispellmode)) check_words();

   if (count($words)) $wordsinfo .= "looking for: ";

   for ($i=0; $i<count($words); $i++) {
      $word=$words[$i];
      $qu_norm='';
      
      for ($j=0; $j<count($final_word["$word"]); $j++) {
         $norm_word=$final_word["$word"][$j];
	 $all_words[]=$norm_word;
         
	 $wordsinfo .="<b>$norm_word</b>; ";
	 
	 if (($dbmode == 'crc') ||
	     ($dbmode == 'crc-multi')) { 
	     $norm_word_crc[$norm_word]=crc32($norm_word);
	     $norm_word=$norm_word_crc[$norm_word];
	 }
	 
         switch ($dbtype) {
            case 'mysql' : $qu_norm .= " sum($word_field=$str_sep$norm_word$str_sep)>0 OR "; break;
	    case 'oracle7':
	    case 'oracle8':
            case 'oracle': $qu_norm .= " sum(decode($word_field,$str_sep$norm_word$str_sep,1,0))>0 OR "; break;
            case 'pgsql' : $qu_norm .= " sum(case $word_field when $str_sep$norm_word$str_sep then 1 else 0 end)>0 OR "; break;
            default      : $qu_norm .= " sum($word_field=$str_sep$norm_word$str_sep)>0 OR "; break;
         } 
         if($w=="") $w="$str_sep$norm_word$str_sep"; else $w .=",$str_sep$norm_word$str_sep";
      }
     
      $qu_norm="($qu_norm)";
      $qu_norm=str_replace("OR )",")",$qu_norm);
      $qu=str_replace("%$word%",$qu_norm,$qu);
   }

   if ($soundex == 'yes') {
   	for ($i=0; $i<count($all_words); $i++) {
   		$soundex_suggestions .= suggest_soundex($norm_word);
   	}
        if ($soundex_suggestions == '') $soundex_suggestions = '-';
   }   

   if (($dbtype == 'mysql') || ($dbtype == '')) $sql_small = 'SQL_SMALL_RESULT';

   if (($dbmode == 'multi') ||
       ($dbmode == 'crc-multi')) {
// ------------------
// DBMode = multi OR crc-multi
      $query="";

      if ((($dbtype == 'pgsql') || ($dbtype == 'mysql')) &&
           (count($words))){
      	 $make_temp_table=1;
         $temp_table = 't'.time().rand(1000,9999).rand(1000,9999);
      } else {
         $make_temp_table=0;
         $temp_table="";
      }

      for ($i=0; $i<count($words); $i++) {
         $word=$words[$i];
         $qu_norm='';

         for ($j=0; $j<count($final_word["$word"]); $j++) {
            $norm_word=$final_word["$word"][$j];
	    
            $dict=get_dict_tab($norm_word);
	    
	    $old_norm_word=$norm_word;
	    if ($dbmode == 'crc-multi') $norm_word=$norm_word_crc[$norm_word];

            if ($query=="") {
               if ($make_temp_table == 0) {
                  $query="SELECT url_id,$word_field,intag
                            FROM $dict
                           WHERE $word_field = $str_sep$norm_word$str_sep";
               } else {
	          if ($dbtype == 'mysql') {	
                      $query="CREATE /*!32302 TEMPORARY */ TABLE $temp_table (
		              url_id INT DEFAULT '0' NOT NULL,
			      $word_field $word_type DEFAULT '0' NOT NULL,
			      intag  TINYINT DEFAULT '0' NOT NULL,
			      KEY i1$temp_table(url_id),
			      KEY i2$temp_table($word_field))";
			      
		      if($DEBUG) echo "last_parse(): ",$query,"<BR><HR>";
                      if (!db_query($query)) print_error_local('Query error: '.$query."\n<BR>".db_error());

                      $query="INSERT INTO $temp_table
		               SELECT url_id,$word_field,intag
                        	FROM $dict
                               WHERE $word_field = $str_sep$norm_word$str_sep";
		      
		      if($DEBUG) echo "last_parse(): ",$query,"<BR><HR>";
                      if (!db_query($query)) print_error_local('Query error: '.$query."\n<BR>".db_error());

		      $query="SELECT count(*)
		                FROM $temp_table";
		      
		      if($DEBUG) echo "last_parse(): ",$query,"<BR><HR>";
                      if (!$res=db_query($query)) print_error_local('Query error: '.$query."\n<BR>".db_error());

                      if ($row=db_fetchrow($res)) {
                      	$count=$row[0];
                      	$wordsinfo .= "<b>$old_norm_word</b>: $count; ";
                      }

                      db_freeresult($res);
		  } else {
		  // not mysql
                      $query="SELECT url_id,$word_field,intag
                        	INTO TEMP $temp_table
                        	FROM $dict
                               WHERE $word_field = $str_sep$norm_word$str_sep";
		  }
               }
            } else {
	       if ($dbtype == 'mysql') {	
	           $query ="INSERT INTO $temp_table
                	    SELECT url_id,$word_field,intag
                    	      FROM $dict
                             WHERE $word_field = $str_sep$norm_word$str_sep";	       
	           if($DEBUG) echo "last_parse(): ",$query,"<BR><HR>";
                   if (!db_query($query)) print_error_local('Query error: '.$query."\n<BR>".db_error());

                   $query="SELECT count(*)
		             FROM $temp_table";
		      
		   if($DEBUG) echo "last_parse(): ",$query,"<BR><HR>";
                   if (!$res=db_query($query)) print_error_local('Query error: '.$query."\n<BR>".db_error());

                   if ($row=db_fetchrow($res)) {
                      	$count=$row[0]-$count;
                      	$wordsinfo .= "<b>$old_norm_word</b>: $count; ";
                        $count=$row[0];
                   }

                   db_freeresult($res);
	       } else {
	           $query.="UNION ALL
                	    SELECT url_id,$word_field,intag
                    	      FROM $dict
                             WHERE $word_field = $str_sep$norm_word$str_sep";
	       }
            }
        }
      }

      if (($make_temp_table == 1) && ($dbtype != 'mysql')) {
         if($DEBUG) echo "last_parse(): ",$query,"<BR><HR>";
         if (!db_query($query)) print_error_local('Query error: '.$query."\n<BR>".db_error());
      }

      if($ulstr||$uestr||$tagstr||$timestr||$catstr||$langstr) {
         if (($dbtype == 'oracle') || 
	     ($dbtype == 'oracle7') ||
	     ($dbtype == 'oracle8')) {
                 $query_url_id="SELECT url_id,
                                       sum(intag) as r 
                                  FROM url, ($query) 
             	                 WHERE url.rec_id=url_id 
             	                   $catstr
                                   $ulstr
                                   $uestr
                                   $langstr
             	                   $tagstr
             	                   $timestr 
             	                 GROUP BY url_id 
             	             HAVING ($qu)
             	              ORDER BY r DESC";
           
                 $query_count_url_id="SELECT count(*)
                                        FROM (
                                           SELECT url_id
                                             FROM url, ($query) 
                      	                    WHERE url.rec_id=url_id 
                      	                      $catstr
                      	                      $ulstr
                                              $uestr
                                              $langstr
                      	                      $tagstr 
                                              $timestr
                      	                    GROUP BY url_id
                      	                   HAVING ($qu)
                                        )";

         } elseif (($dbtype == 'pgsql') ||
	           ($dbtype == 'mysql')) {
                 $query_url_id="SELECT $sql_small
		                       url_id,
                                       sum(intag) as r 
                                  FROM url, $temp_table 
             	                 WHERE url.rec_id=url_id 
             	                   $catstr
             	                   $ulstr
                                   $uestr
                                   $langstr
             	                   $tagstr 
                                   $timestr
             	                 GROUP BY url_id 
             	             HAVING ($qu)
             	              ORDER BY r DESC";
           
                 $query_count_url_id="";
         }
      } else {
         // not ul,ue nor tagstr timestr,catstr,langstr
         if (($dbtype == 'oracle')||
	     ($dbtype == 'oracle7') ||
	     ($dbtype == 'oracle8')) {
                 $query_url_id="SELECT url_id,
                                       sum(intag) as r 
             	                  FROM ($query) 
             	                 GROUP BY url_id 
             	                HAVING ($qu)
             	                 ORDER BY r DESC";
           
                 $query_count_url_id="SELECT count(*)
                                        FROM (
             	                           SELECT url_id
             	                             FROM ($query) 
             	                            GROUP BY url_id 
             	                           HAVING ($qu)
             	                        )";
         } elseif (($dbtype == 'pgsql') ||
	           ($dbtype == 'mysql')) {
                 $query_url_id="SELECT $sql_small
		                       url_id,
                                       sum(intag) as r 
             	                  FROM $temp_table 
             	                 GROUP BY url_id 
             	                HAVING ($qu)
             	                 ORDER BY r DESC";
           
                 $query_count_url_id="";
         }
      }

   } else {
// ------------------
// DBMode = single or crc

      if($ulstr||$uestr||$tagstr||$timestr||$catstr||$langstr) {
         $query_url_id="SELECT $sql_small
                               $word_table.url_id,
                               sum($word_table.intag) as r 
                          FROM url,$word_table
     	                 WHERE url.rec_id=$word_table.url_id 
     	                   $catstr
     	                   $ulstr
                           $uestr
                           $langstr
     	                   $tagstr 
                           $timestr
     	                   AND $word_table.$word_field in ($w) 
     	                 GROUP BY url_id 
     	             HAVING ($qu)
     	              ORDER BY r DESC";
   
         $query_count_url_id="SELECT count(*)
                                FROM (
                                   SELECT $word_table.url_id
                                     FROM url,$word_table
              	                    WHERE url.rec_id=$word_table.url_id 
              	                      $catstr
              	                      $ulstr
                                      $uestr
                                      $langstr
              	                      $tagstr 
                                      $timestr
              	                      AND $word_table.$word_field in ($w) 
              	                    GROUP BY $word_table.url_id
              	                   HAVING ($qu)
                                )";
      } else {
         $query_url_id="SELECT $sql_small
                               $word_table.url_id,
                               sum($word_table.intag) as r 
     	                  FROM $word_table 
     	                 WHERE $word_table.$word_field in ($w) 
     	                 GROUP BY url_id 
     	                HAVING ($qu)
     	                 ORDER BY r DESC";
   
         $query_count_url_id="SELECT count(*)
                                FROM (
     	                           SELECT $word_table.url_id
     	                             FROM $word_table 
     	                            WHERE $word_table.$word_field in ($w)
     	                            GROUP BY $word_table.url_id 
     	                           HAVING ($qu)
     	                        )";
      }
   }

   if ($db_format == '3.1') {
   	$cat_field = ',category';
   } else {
   	$cat_field = '';
   }


   $query_url="SELECT $sql_small
                      url.url,
                      url.title,
      	              url.txt,
      	              url.content_type,
      	              url.docsize,
      	              $last_mod_field,
      	              url.keywords,
      	              url.description,
      	              $crc_field,
      	              url.rec_id
      	              $cat_field
                 FROM url
                WHERE url.rec_id = %URL_IN%";

   return array($query_url_id,$query_url,$query_count_url_id);
}

?>

