t:Array
(
[0] => 15502
)
vs:Array
(
[0] => 262517
)
vts:Array
(
[0] => 0
)
c:Array
(
[0] => 8078
)
p:Array
(
[0] => 6248
)
(UPDATE STATS SET total=15502, views=262517, votes=0,comment_no=8078,pic=6248)/*********************************************************************/
if ($HTTP_GET_VARS['prune']) $prune=$HTTP_GET_VARS['prune'];
elseif ($argv[2]==1 || $argv[2]==0) $prune=$argv[2];
else $prune=1; //// SET IF IT IS ON TEST MODE?
if ($HTTP_GET_VARS['mail']) $ml=$HTTP_GET_VARS['mail'];
elseif ($argv[3]==1 || $argv[3]==0) $ml=$argv[3];
else $ml=0; //// SET IF IT IS ON TEST MODE?
if ($HTTP_GET_VARS['verbose']) $verbose=$HTTP_GET_VARS['verbose'];
elseif ($argv[4]==1 || $argv[4]==0) $verbose=$argv[4];
else $verbose=0; //// SET IF IT IS ON TEST MODE?
if ($HTTP_GET_VARS['test']) $test=$HTTP_GET_VARS['test'];
elseif ($argv[5]==1 || $argv[5]==0) $test=$argv[5];
else $test=0; //// SET IF IT IS ON TEST MODE?
if ($HTTP_GET_VARS['skip']) $skip=$HTTP_GET_VARS['skip'];
elseif ($argv[6]==1 || $argv[6]==0) $skip=$argv[6];
else $skip=0; //// SET IF IT IS ON TEST MODE?
if ($test) echo "test mode active
";
$first_time=1; /// INITIAL TO IDENTIFY FIRST TIME CRAWL
$img_width=80; /// THUMBNAIL IMAGE WIDTH
$total_comments=0;
$total_pics=0;
/********************************************************************************************************/
/* START ACTUAL WORK HERE
/********************************************************************************************************/
if ($HTTP_GET_VARS['url']) $url=$HTTP_GET_VARS['url']; /// RETRIEVE SOURCE'S URL TO CRAWL
elseif ($argv[1]) $url=$argv[1]; /// IF IT IS PASSED AS AN COMMAND LINE ARGUMENT
else { echo "no url passed.. please indicate one news source to crawl.\n"; exit; } //ERROR
$bareurl=str_replace("-","",$url); /// RETRIEVE URL NAME & REMOVE NON-ALPHANUMERICS
$url=$news_index[$bareurl]; /// INDICATE STARTING ADDRESS TO CRAWL
$baseurl=substr($url,0,-strlen(strrchr($url,'/'))); /// IDENTIFY THE BASE URL TO USE FOR REFERENCE
if (!array_key_exists($bareurl, $url_regex))
{ echo "News source (".$bareurl.") not in DB."; exit; } // SOURCE NOT IN DB
/*********** if a google news source *********/
if ($bareurl=="googlenews")
{
for ($k=0; $k<10; $k++) /// FETCH THE FIRST TEN PAGES OF GOOGLE RESULTS WE NEED TO PARSE
{
$source_files[$k]=shell_exec('links -source \''.$news_index[$bareurl].strval($k*10).'\'');
$source_files[$k]=preg_replace($news_block[$bareurl],'$1',$source_files[$k],1);
}
for ($i=0; $i<$k; $i++) $source_file=$source_file.$source_files[$i];
$google_file=preg_split('/
'.$arabic_month.'<\/p>(\d\d)
/is',$contents,$temp);
if ($temp[1]) return ($temp[1]);
$handle = fopen("months2.html", "r");
$contents = fread($handle, filesize("months.html")); fclose($handle);
$s=preg_match('/
'.$arabic_month.'<\/p>(\d\d)
/is',$contents,$temp);
return ($temp[1]);
}
/********************************************************************************************************/
/* FUNCTION TO CONVERT ENGLISH FORMATTED DATE TO SQL FORMAT
/********************************************************************************************************/
function convert_time_eng($original_t)
{
//echo "[$original_t]
\n";
preg_match('/\s*(.+?) (\d+?)\, (\d\d\d\d)/is',$original_t,$dt);
if ($dt[1] && $dt[2] && $dt[3]) { return($dt[3]."/".month_num2($dt[1])."/".$dt[2]); }
elseif (strpos($original_t,"hour"))
{
preg_match('/\s*(\d+) hour/is',$original_t,$dt);
$tm=getdate();
$tme=$tm[0]-intval($dt[1])*60*60;
return (date('Y/m/d',$tme));
}
elseif (strpos($original_t,"minute"))
{
preg_match('/\s*(\d+) minute/is',$original_t,$dt);
$tm=getdate();
$tme=$tm[0]-intval($dt[1])*60;
return (gmdate('Y/m/d',$tme));
}
}
/********************************************************************************************************/
/* FUNCTION TO CONVERT ENGLISH MONTHS TO NUMBERS
/********************************************************************************************************/
function month_num2($english_month)
{
//echo "m:$english_month
";
$english_month=substr(strtolower($english_month),0,3);
if ($english_month=="jan") return("01");
if ($english_month=="feb") return("02");
if ($english_month=="mar") return("03");
if ($english_month=="apr") return("04");
if ($english_month=="may") return("05");
if ($english_month=="jun") return("06");
if ($english_month=="jul") return("07");
if ($english_month=="aug") return("08");
if ($english_month=="sep") return("09");
if ($english_month=="oct") return("10");
if ($english_month=="nov") return("11");
if ($english_month=="dec") return("12");
}
/********************************************************************************************************/
/* FUNCTION TO CREATE AND STORE THUMBNAILS AND RETURN REFERENCE FOR DB
/********************************************************************************************************/
function store_img($b,$img,$w)
{
if (!remote_file_exists($img)) return ("");
$bin_img=shell_exec('links -source \''.addslashes($img).'\'');
//echo "[".'links -source \''.$img.'\''."]\n";
$ps=strpos($bin_img,"