curl 采集网站内容

收藏
0 1732
SuperXu
SuperXu 2017-06-17 16:55:19
付费话题:0 积分
//初始化一个curl对象
$curl =curl_init();

curl_setopt($curl, CURLOPT_URL, "http://www.itkee.cn");
curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
$data = curl_exec($curl);
curl_close($curl);
$pos = strpos($data,'utf-8');
if($pos===false){$data = iconv("gbk","utf-8",$data);}
preg_match("/<title>(.*)<\/title>/i",$data, $title);

echo $title[1];exit;

public function getContent($url){
   //初始化一个curl对象
   $curl =curl_init();

   //设置你需要抓取的url
   curl_setopt($curl,CURLOPT_URL,$url);
   //设置header
   curl_setopt($curl,CURLOPT_HEADER,1);
   //设置curl参数,要求结果保存到字符串中还是输出到屏幕
   curl_setopt($curl,CURLOPT_RETURNTRANSFER,1);
   // 运行cURL,请求网页
   $data = curl_exec($curl);
   
   $pos = strpos($data,'utf-8');
   if($pos===false){$data = iconv("gbk","utf-8",$data);}
   //关闭url请求
   curl_close();

   $results = array();
   //采集标题
   preg_match("/<title>(.*)<\/title>/i",$data, $results['title']);

   //采集内容
   preg_match("/<div id=\"article_content\">(.*)<\/div>/i",$data, $results['contents']);
   return $results;
}

评论话题
提交