七月 1st, 2009php检测远程文件
此文章来源于后羿之弓,转载请注明出处
通常我们会用php去检测远程文件是否存或者是判断远程文件类型,以及通过文件大小来判断远程文件是否有效(比如一首正常的流行歌曲的mp3文件不会少于300k吧)。
方法众多,本人觉得通过socket抓出HTTP报头来判断无疑是性价比最优的。下面就是通过修改HttpClient得到了一个getFileHeader的类(含有示例及结果)
- <?php
- error_reporting(0);
- header('content-type:text/html; charset=utf-8');
- $mp3url = 'http://learning.sohu.com/zt/freshenglish/sep17/songs.mp3';
- $request = new getFileHeader($mp3url);
- $request->doRequest();
- var_dump($request->getHeaders());
- class getFileHeader{
- var $host;//主机
- var $file;//远端文件
- var $port;//端口
- var $path;
- var $username;
- var $password;
- var $timeout = 8;
- var $debug = false;
- var $use_gzip = true;
- var $accept = 'image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/xaml+xml, application/vnd.ms-xpsdocument, application/x-ms-xbap, application/x-ms-application, */*';
- var $accept_encoding = 'gzip, deflate';
- var $accept_language = 'zh-cn';
- var $user_agent = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; (R1 1.3); .NET CLR 1.1.4322; .NET CLR 2.0.50727; .NET CLR 3.0.04506.30)';
- var $cookies = array('__utma'=>'130375891.1873577237.1204525960.1205473824.1205476776.10','__utmz'=>'130375891.1205397510.8.2.utmccn=(referral)|utmcsr=music.tyfo.com|utmcct=/music_dht.html|utmcmd=referral','__utmb=130375891','ASP.NET_SessionId'=>'bzr0w2455juwyly53k0gh4fr','__utmc'=>'130375891');
- var $referer;
- var $headers;
- var $errormsg;
- var $status;
- function __construct($file){
- $fileinfo = parse_url($file);
- $this->file = $file;
- $this->path = @$fileinfo['path'].$fileinfo['query'];
- $this->host = $fileinfo['host'];
- $this->username = @$fileinfo['user'];
- $this->username = @$fileinfo['pass'];
- $this->port = @$fileinfo['port'] ? intval($fileinfo['port']) : 80;
- }
- function buildRequest() {
- $headers = array();
- $headers[] = "GET {$this->path} HTTP/1.0"; // Using 1.1 leads to all manner of problems, such as "chunked" encoding
- $headers[] = "Host: {$this->host}";
- $headers[] = "User-Agent: {$this->user_agent}";
- $headers[] = "Accept: {$this->accept}";
- if ($this->use_gzip) {
- $headers[] = "Accept-encoding: {$this->accept_encoding}";
- }
- $headers[] = "Accept-language: {$this->accept_language}";
- if ($this->referer) {
- $headers[] = "Referer: {$this->referer}";
- }
- // Cookies
- if ($this->cookies) {
- $cookie = 'Cookie: ';
- foreach ($this->cookies as $key => $value) {
- $cookie .= "$key=$value; ";
- }
- $headers[] = $cookie;
- }
- // Basic authentication
- if ($this->username && $this->password) {
- $headers[] = 'Authorization: BASIC '.base64_encode($this->username.':'.$this->password);
- }
- $request = implode("\r\n", $headers)."\r\n\r\n".$this->postdata;
- return $request;
- }
- function doRequest() {
- if (!$fp = @fsockopen($this->host, $this->port, $errno, $errstr, $this->timeout)) {
- switch($errno) {
- case -3:
- $this->errormsg = 'Socket creation failed (-3)';
- case -4:
- $this->errormsg = 'DNS lookup failure (-4)';
- case -5:
- $this->errormsg = 'Connection refused or timed out (-5)';
- default:
- $this->errormsg = 'Connection failed ('.$errno.')';
- $this->errormsg .= ' '.$errstr;
- $this->debug($this->errormsg);
- }
- return false;
- }
- socket_set_timeout($fp, $this->timeout);
- $request = $this->buildRequest();
- $this->debug('Request', $request);
- fwrite($fp, $request);
- // Reset all the variables that should not persist between requests
- $this->headers = array();
- $this->errormsg = '';
- // Set a couple of flags
- $inHeaders = true;
- $atStart = true;
- // Now start reading back the response
- while (!feof($fp)) {
- $line = fgets($fp, 4096);
- if ($atStart) {
- // Deal with first line of returned data
- $atStart = false;
- if (!preg_match('/HTTP\/(\\d\\.\\d)\\s*(\\d+)\\s*(.*)/', $line, $m)) {
- $this->errormsg = "Status code line invalid: ".htmlentities($line);
- $this->debug($this->errormsg);
- return false;
- }
- $http_version = $m[1]; // not used
- $this->status = $m[2];
- $status_string = $m[3]; // not used
- $this->debug(trim($line));
- continue;
- }
- if ($inHeaders) {
- if (trim($line) == '') {
- $inHeaders = false;
- $this->debug('Received Headers', $this->headers);
- if ($this->headers_only) {
- break; // Skip the rest of the input
- }
- continue;
- }
- if (!preg_match('/([^:]+):\\s*(.*)/', $line, $m)) {
- // Skip to the next header
- continue;
- }
- $key = strtolower(trim($m[1]));
- $val = trim($m[2]);
- // Deal with the possibility of multiple headers of same name
- if (isset($this->headers[$key])) {
- if (is_array($this->headers[$key])) {
- $this->headers[$key][] = $val;
- } else {
- $this->headers[$key] = array($this->headers[$key], $val);
- }
- } else {
- $this->headers[$key] = $val;
- }
- continue;
- }
- break;
- }
- fclose($fp);
- }
- function getHeaders() {
- return $this->headers;
- }
- function debug($msg, $object = false) {
- if ($this->debug) {
- print '<div style="border: 1px solid red; padding: 0.5em; margin: 0.5em;"><strong>HttpClient Debug:</strong> '.$msg;
- if ($object) {
- ob_start();
- print_r($object);
- $content = htmlentities(ob_get_contents());
- ob_end_clean();
- print '<pre>'.$content.'</pre>';
- }
- print '</div>';
- }
- }
- }
- ?>
上述代码执行的结果是
- array(14) {
- ["date"]=>
- string(29) "Wed, 01 Jul 2009 13:07:17 GMT"
- ["server"]=>
- string(39) "Apache/1.3.37 (Unix) mod_gzip/1.3.26.1a"
- ["vary"]=>
- string(15) "Accept-Encoding"
- ["cache-control"]=>
- string(11) "max-age=120"
- ["expires"]=>
- string(29) "Wed, 01 Jul 2009 13:09:17 GMT"
- ["last-modified"]=>
- string(29) "Tue, 23 Sep 2003 10:31:20 GMT"
- ["etag"]=>
- string(24) ""6d414a-252dd5-3f7020f8""
- ["accept-ranges"]=>
- string(5) "bytes"
- ["content-length"]=>
- string(7) "2436565"
- ["content-type"]=>
- string(10) "audio/mpeg"
- ["age"]=>
- string(2) "52"
- ["x-cache"]=>
- string(44) "HIT from 31145605.40779458.40031417.sohu.com"
- ["via"]=>
- string(50) "1.0 31145605.40779458.40031417.sohu.com:80 (squid)"
- ["connection"]=>
- string(5) "close"
- }