| 注册
请输入搜索内容

热门搜索

Java Linux MySQL PHP JavaScript Hibernate jQuery Nginx
phpw34
10年前发布

php判断来访者是否是搜索引擎的爬虫

我们可以通过HTTP_USER_AGENT来判断是否是蜘蛛,搜索引擎的蜘蛛都有自己的独特标志,下面列取了一部分。

function is_crawler() {      $userAgent = strtolower($_SERVER['HTTP_USER_AGENT']);      $spiders = array(          'Googlebot', // Google 爬虫          'Baiduspider', // 百度爬虫          'Yahoo! Slurp', // 雅虎爬虫          'YodaoBot', // 有道爬虫          'msnbot' // Bing爬虫          // 更多爬虫关键字      );      foreach ($spiders as $spider) {          $spider = strtolower($spider);          if (strpos($userAgent, $spider) !== false) {              return true;          }      }      return false;  } 

下面的php代码附带了更多的蜘蛛标识
function isCrawler() {          echo $agent= strtolower($_SERVER['HTTP_USER_AGENT']);          if (!empty($agent)) {                  $spiderSite= array(                          "TencentTraveler",                          "Baiduspider+",                          "BaiduGame",                          "Googlebot",                          "msnbot",                          "Sosospider+",                          "Sogou web spider",                          "ia_archiver",                          "Yahoo! Slurp",                          "YoudaoBot",                          "Yahoo Slurp",                          "MSNBot",                          "Java (Often spam bot)",                          "BaiDuSpider",                          "Voila",                          "Yandex bot",                          "BSpider",                          "twiceler",                          "Sogou Spider",                          "Speedy Spider",                          "Google AdSense",                          "Heritrix",                          "Python-urllib",                          "Alexa (IA Archiver)",                          "Ask",                          "Exabot",                          "Custo",                          "OutfoxBot/YodaoBot",                          "yacy",                          "SurveyBot",                          "legs",                          "lwp-trivial",                          "Nutch",                          "StackRambler",                          "The web archive (IA Archiver)",                          "Perl tool",                          "MJ12bot",                          "Netcraft",                          "MSIECrawler",                          "WGet tools",                          "larbin",                          "Fish search",                  );                  foreach($spiderSite as $val) {                          $str = strtolower($val);                          if (strpos($agent, $str) !== false) {                                  return true;                          }                  }          } else {                  return false;          }  }  if  (isCrawler()){         echo "你好蜘蛛精!";  }  else{       echo "你不是蜘蛛精啊!";  }