// @author
// @contact
// @describe 关键字采集
include_once('./config.php');
function getFile($url)
{
$url = eregi_replace('^http://', '', $url);
$temp = explode('/', $url);
$host = array_shift($temp);
$path = '/'.implode('/', $temp);
$temp = explode(':', $host);
$host = $temp[0];
$port = isset($temp[1]) ? $temp[1] : 80;
$fp = @fsockopen($host, $port, &$errno, &$errstr, 30);
if ($fp)
{
@fputs($fp, "GET $path HTTP/1.1\r\n");
@fputs($fp, "Host: $host\r\n");
@fputs($fp, "Accept: */*\r\n");
@fputs($fp, "Referer: http://$host/\r\n");
@fputs($fp, "User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)\r\n");
@fputs($fp, "Connection: Close\r\n\r\n");
}
$Content = '';
while ($str = @fread($fp, 4096))
$Content .= $str;
@fclose($fp);
return $Content;
}
$lastflesh=@filemtime($topfile);
$timestamp = time();
if($lastflesh + ($flush * 60) < $timestamp ){
$html=getFile("http://top.baidu.com/buzz/top10.html");
preg_match_all ('/([^<]+)\<\/a>\<\/td>/',$html,$tmp);[/color]
foreach($tmp[1] as $value){
$value=mb_convert_encoding($value,'UTF-8','GB2312');
$str.="<".$value.">";
}
$fp=fopen($topfile,"w");
fwrite($fp,$str);
fclose($fp);
chmod($topfile, 0777);
echo "关键字更新成功并写入文本";
}else{
echo "关键字在更新间隔时间内,晚些再试";
}
?>
采集地址:http://top.baidu.com/buzz/top10.html
匹配代码: 关键词
我以前找人写的,现在网页有点变动,改不好了。
谁帮我改好了我以后见一次加一次分。
我改动的是这行,但老不对,莫非其它地方也要改?
preg_match_all ('/([^<]+)\<\/a>\<\/td>/',$html,$tmp);
还有个config.php 的文件:
$topfile = "hw.txt";//关键字存放文件
$flush="1";//更新时间间隔,单位分钟.
?>
// @contact
// @describe 关键字采集
include_once('./config.php');
function getFile($url)
{
$url = eregi_replace('^http://', '', $url);
$temp = explode('/', $url);
$host = array_shift($temp);
$path = '/'.implode('/', $temp);
$temp = explode(':', $host);
$host = $temp[0];
$port = isset($temp[1]) ? $temp[1] : 80;
$fp = @fsockopen($host, $port, &$errno, &$errstr, 30);
if ($fp)
{
@fputs($fp, "GET $path HTTP/1.1\r\n");
@fputs($fp, "Host: $host\r\n");
@fputs($fp, "Accept: */*\r\n");
@fputs($fp, "Referer: http://$host/\r\n");
@fputs($fp, "User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)\r\n");
@fputs($fp, "Connection: Close\r\n\r\n");
}
$Content = '';
while ($str = @fread($fp, 4096))
$Content .= $str;
@fclose($fp);
return $Content;
}
$lastflesh=@filemtime($topfile);
$timestamp = time();
if($lastflesh + ($flush * 60) < $timestamp ){
$html=getFile("http://top.baidu.com/buzz/top10.html");
preg_match_all ('/
foreach($tmp[1] as $value){
$value=mb_convert_encoding($value,'UTF-8','GB2312');
$str.="<".$value.">";
}
$fp=fopen($topfile,"w");
fwrite($fp,$str);
fclose($fp);
chmod($topfile, 0777);
echo "关键字更新成功并写入文本";
}else{
echo "关键字在更新间隔时间内,晚些再试";
}
?>
采集地址:http://top.baidu.com/buzz/top10.html
匹配代码:
我以前找人写的,现在网页有点变动,改不好了。
谁帮我改好了我以后见一次加一次分。
我改动的是这行,但老不对,莫非其它地方也要改?
preg_match_all ('/
还有个config.php 的文件:
$topfile = "hw.txt";//关键字存放文件
$flush="1";//更新时间间隔,单位分钟.
?>
网站 SEO
centos几个软件源以


2010/02/01 09:59 | by 
