记录备忘。
获取网页,支持https。
//-->get html
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $link); //设置需要获取的url
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_HEADER,0);
//curl_setopt($ch, CURLOPT_NOBODY, TRUE);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST,false);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER,false);
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla 5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0' ); //设置user agent,模拟浏览器
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT,10); //超时限制
$html=curl_exec($ch);
curl_close($ch);
//-->parse html
$doc = new DOMDocument();
@$doc->loadHTML($html);
$nodes = $doc->getElementsByTagName('title');
$title = $nodes->item(0)->nodeValue; //网页标题
$meta_array = array();
$metas = $doc->getElementsByTagName('meta');
for ($i = 0; $i < $metas->length; $i++) {
$meta = $metas->item($i);
if($meta->getAttribute('name')) {
$meta_array[strtolower($meta->getAttribute('name'))] = $meta->getAttribute('content');
}
if($meta->getAttribute('http-equiv')) {
$meta_array[strtolower($meta->getAttribute('http-equiv'))] = $meta->getAttribute('content');
}
if($meta->getAttribute('scheme')) {
$meta_array[strtolower($meta->getAttribute('scheme'))] = $meta->getAttribute('content');
}
}
@$keywords=$meta_array['keywords']; //网页关键字
@$description=$meta_array['description']; //网页描述