PHP-如何抓取视频网站的视频截图?

WordPress 开发 WordPress 开发 主题:1098 回复:2322

PHP-如何抓取视频网站的视频截图?

夜无邪 发布于 2017-01-30 字数 94 浏览 1095 回复 2

比如我知道一个youku的视频地址,根据这个地址抓取对应视频的一帧关键画面的截图,如何来做?

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

支持 Markdown 语法,需要帮助?

评论(2

虐人心 2017-03-19 2 楼

试试这个吧,其实都是根据正则从页面源文件上去匹配相应的标签,如果你想从视频里截取,那么在客户端来设计,应该难度很大,很难实现吧。

<?php
$link = 'http://v.youku.com/v_show/id_XNDQzMzU3NDMy.html';
$host = 'youku.com';

$result = getVideoInfo($link, $host);
print_r($result);

function getVideoInfo($link, $host) {
$return = array();
if('youku.com' == $host) {
//分析视频网址,获取视频编码号
preg_matchall("/id(w+)[=|.html]/", $link, $matches);
if(!empty($matches[1][0])) {
$return['flashvar'] = $matches[1][0];
}
//获取视频页面内容,存与$text中
$text=file_get_contents($link);
//获取视频标题
preg_match("/<title>(.?) - (.)</title>/", $text, $title);
var_dump($title);
preg_match_all("/<li class="download"(.)</li>/",$text,$match2);
preg_match("/http://vimg(.
)|">/",$match2[1][0],$imageurl);
if (!empty($imageurl[1])) {
$return['imageurl'] = 'http://vimg'.$imageurl[1];
}
if (!empty($title)) {
$return['title'] = $title[1];
}
} elseif('ku6.com' == $host) {
// http://v.ku6.com/show/bjbJKPEex097wVtC.html
// http://v.ku6.com/special/index_3628020.html
//对于酷6网,末尾以index_开头的地址需要另外分析其视频编码
$text=file_get_contents($link);
preg_match_all("//([w-]+).html/", $link, $matches);
if(1 > pregmatch("//index([w-]+).html/", $link) && !empty($matches[1][0])) {
$return['flashvar'] = $matches[1][0];
}else{
preg_match_all("/refer/(.)/v.swf/",$text,$videourl);
$return['flashvar'] = $videourl[1][0];
}
preg_match("/<title>(.
?) - (.)</title>/", $text, $title);
//经分析,酷六的视频截图地址在视频页面的<span class="s_pic“></span>标签之间
preg_match_all("/<span class="s_pic">(.
)</span>/",$text,$imageurl);
if (!empty($imageurl[1][0])) {
$return['imageurl'] = $imageurl[1][0];
}
if (!empty($title)) {
$return['title'] = $title[1];
}
}elseif ('tudou.com' == $host){
//http://www.tudou.com/programs/view/_ke1lzCnBYw/
$tudou = file_get_contents($link);
preg_match_all("/view/([w-]+)//", $link, $matches);
if(!empty($matches[1][0])) {
$return['flashvar'] = $matches[1][0];
}
preg_match("/<title>(.?) - (.)</title>/", $tudou, $title);
preg_match_all("/<span class="s_pic">(.*)</span>/",$tudou,$imageurl);
if (!empty($imageurl[1][0])) {
$return['imageurl'] = $imageurl[1][0];
}
if (!empty($title)) {
$return['title'] = $title[1];
}
}
return $return;
}

瑾兮 2017-03-14 1 楼

之前用到的一个主流视频网站地址解析类,很方便,供参考。

<?php
class VideoUrlParser
{
const USER_AGENT = "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.10 (KHTML, like Gecko)
Chrome/8.0.552.224 Safari/534.10";
const CHECK_URL_VALID = "/(youku.com|tudou.com|ku6.com|56.com|letv.com|video.sina.com.cn|(my.)?tv.sohu.com|v.qq.com)/";

/**
 * parse 
 * 
 * @param string $url 
 * @param mixed $createObject 
 * @static
 * @access public
 * @return void
 */
static public function parse($url='', $createObject=true){
    $lowerurl = strtolower($url);
    preg_match(self::CHECK_URL_VALID, $lowerurl, $matches);
    if(!$matches) return false;

    switch($matches[1]){
    case 'youku.com':
        $data = self::_parseYouku($url);
        break;
    case 'tudou.com':
        $data = self::_parseTudou($url);
        break;
    case 'ku6.com':
        $data = self::_parseKu6($url);
        break;
    case '56.com':
        $data = self::_parse56($url);
        break;
    case 'letv.com':
        $data = self::_parseLetv($url);
        break;
    case 'video.sina.com.cn':
        $data = self::_parseSina($url);
        break;
    case 'my.tv.sohu.com':
    case 'tv.sohu.com':
    case 'sohu.com':
        $data = self::_parseSohu($url);
        break;
    case 'v.qq.com':
        $data = self::_parseQq($url);
        break;
    default:
        $data = false;
    }

    if($data &amp;&amp; $createObject) $data['object'] = "&lt;embed src="{$data['swf']}" quality="high" width="480" height="400" align="middle" allowNetworking="all" allowScriptAccess="always" type="application/x-shockwave-flash"&gt;&lt;/embed&gt;";
    return $data;
}
/**
 * 腾讯视频 
 * http://v.qq.com/cover/o/o9tab7nuu0q3esh.html?vid=97abu74o4w3_0
 * http://v.qq.com/play/97abu74o4w3.html
 * http://v.qq.com/cover/d/dtdqyd8g7xvoj0o.html
 * http://v.qq.com/cover/d/dtdqyd8g7xvoj0o/9SfqULsrtSb.html
 * http://imgcache.qq.com/tencentvideo_v1/player/TencentPlayer.swf?_v=20110829&amp;vid=97abu74o4w3&amp;autoplay=1&amp;list=2&amp;showcfg=1&amp;tpid=23&amp;title=%E7%AC%AC%E4%B8%80%E7%8E%B0%E5%9C%BA&amp;adplay=1&amp;cid=o9tab7nuu0q3esh
 */ 
private function _parseQq($url){
    if(preg_match("//play//", $url)){
        $html = self::_fget($url);
        preg_match("/url=[^"]+/", $html, $matches);
        if(!$matches); return false;
        $url = $matches[0];
    }
    preg_match("/vid=([^_]+)/", $url, $matches);
    $vid = $matches[1];
    $html = self::_fget($url);
    // query
    preg_match("/flashvarss=s"([^;]+)/s", $html, $matches);
    $query = $matches[1];
    if(!$vid){
        preg_match("/vids?=s?vids?||s?"(w+)";/i", $html, $matches);
        $vid = $matches[1];
    }
    $query = str_replace('"+vid+"', $vid, $query);
    parse_str($query, $output);
    $data['img'] = "http://vpic.video.qq.com/{$$output['cid']}/{$vid}_1.jpg";
    $data['url'] = $url;
    $data['title'] = $output['title'];
    $data['swf'] = "http://imgcache.qq.com/tencentvideo_v1/player/TencentPlayer.swf?".$query;
    return $data;
}

/**
 * 优酷网 
 * http://v.youku.com/v_show/id_XMjI4MDM4NDc2.html
 * http://player.youku.com/player.php/sid/XMjU0NjI2Njg4/v.swf
 */ 
private function _parseYouku($url){
    preg_match("#id_(w+)#", $url, $matches);

    if (empty($matches)){
        preg_match("#v_playlist/#", $url, $mat);
        if(!$mat) return false;

        $html = self::_fget($url);

        preg_match("#videoId2s*=s*'(w+)'#", $html, $matches);
        if(!$matches) return false;
    }

    $link = "http://v.youku.com/player/getPlayList/VideoIDS/{$matches[1]}/timezone/+08/version/5/source/out?password=&amp;ran=2513&amp;n=3";

    $retval = self::_cget($link);
    if ($retval) {
        $json = json_decode($retval, true);

        $data['img'] = $json['data'][0]['logo'];
        $data['title'] = $json['data'][0]['title'];
        $data['url'] = $url;
        $data['swf'] = "http://player.youku.com/player.php/sid/{$matches[1]}/v.swf";

        return $data;
    } else {
        return false;
    }
}

/**
 * 土豆网
 * http://www.tudou.com/programs/view/Wtt3FjiDxEE/
 * http://www.tudou.com/v/Wtt3FjiDxEE/v.swf
 * 
 * http://www.tudou.com/playlist/p/a65718.html?iid=74909603
 * http://www.tudou.com/l/G5BzgI4lAb8/&amp;iid=74909603/v.swf
 */
private function _parseTudou($url){
    preg_match("#view/([-w]+)/#", $url, $matches);

    if (empty($matches)) {
        if (strpos($url, "/playlist/") == false) return false;

        if(strpos($url, 'iid=') !== false){
            $quarr = explode("iid=", $lowerurl);
            if (empty($quarr[1]))  return false;
        }elseif(preg_match("#p/l(d+).#", $lowerurl, $quarr)){
            if (empty($quarr[1])) return false;
        }

        $html = self::_fget($url);
        $html = iconv("GB2312", "UTF-8", $html);

        preg_match("/lid_codes=slcodes=s['"]([^'"]+)/s", $html, $matches);
        $icode = $matches[1];

        preg_match("/iids=s.*?||s(d+)/sx", $html, $matches);
        $iid = $matches[1];

        preg_match("/listDatas=s([{.*}])/sx", $html, $matches);

        $find = array("/n/", '/s/', "/:[^d"]w+[^,]*,/i", "/({|,)(w+):/");
        $replace = array("", "", ':"",', '\1"\2":');
        $str = preg_replace($find, $replace, $matches[1]);
        //var_dump($str);
        $json = json_decode($str);
        //var_dump($json);exit;
        if(is_array($json) || is_object($json) &amp;&amp; !empty($json)){
            foreach ($json as $val) {
                if ($val-&gt;iid == $iid) {
                    break;
                }
            }
        }

        $data['img'] = $val-&gt;pic;
        $data['title'] = $val-&gt;title;
        $data['url'] = $url;
        $data['swf'] = "http://www.tudou.com/l/{$icode}/&amp;iid={$iid}/v.swf";

        return $data;
    }

    $host = "www.tudou.com";
    $path = "/v/{$matches[1]}/v.swf";

    $ret = self::_fsget($path, $host);

    if (preg_match("#nLocation: (.*)n#", $ret, $mat)) {
        parse_str(parse_url(urldecode($mat[1]), PHP_URL_QUERY));

        $data['img'] = $snap_pic;
        $data['title'] = $title;
        $data['url'] = $url;
        $data['swf'] = "http://www.tudou.com/v/{$matches[1]}/v.swf";

        return $data;
    }
    return false;
}

/**
 * 酷6网 
 * http://v.ku6.com/film/show_520/3X93vo4tIS7uotHg.html
 * http://v.ku6.com/special/show_4926690/Klze2mhMeSK6g05X.html
 * http://v.ku6.com/show/7US-kDXjyKyIInDevhpwHg...html
 * http://player.ku6.com/refer/3X93vo4tIS7uotHg/v.swf
 */
private function _parseKu6($url){
    if(preg_match("/show_/", $url)){
        preg_match("#/([-w]+).html#", $url, $matches);
        $url = "http://v.ku6.com/fetchVideo4Player/{$matches[1]}.html";
        $html = self::_fget($url);

        if ($html) {
            $json = json_decode($html, true);
            if(!$json) return false;

            $data['img'] = $json['data']['picpath'];
            $data['title'] = $json['data']['t'];
            $data['url'] = $url;
            $data['swf'] = "http://player.ku6.com/refer/{$matches[1]}/v.swf";

            return $data;
        } else {
            return false;
        }
    }elseif(preg_match("/show//", $url, $matches)){
        $html = self::_fget($url);
        preg_match("/ObjectInfos?=s?([^n]*)};/si", $html, $matches);
        $str = $matches[1];
        // img
        preg_match("/covers?:s?"([^"]+)"/", $str, $matches);
        $data['img'] = $matches[1];
        // title
        preg_match("/title"?s?:s?"([^"]+)"/", $str, $matches);
        $jsstr = "{"title":"{$matches[1]}"}";
        $json = json_decode($jsstr, true);
        $data['title'] = $json['title'];
        // url
        $data['url'] = $url;
        // query
        preg_match("/"(vid=[^"]+)"sname="flashVars"/s", $html, $matches);
        $query = str_replace("&amp;amp;", '&amp;', $matches[1]);
        preg_match("///player.ku6cdn.com[^"']+/", $html, $matches);
        $data['swf'] = 'http:'.$matches[0].'?'.$query;

        return $data;
    }
}

/**
 * 56网
 * http://www.56.com/u73/v_NTkzMDcwNDY.html
 * http://player.56.com/v_NTkzMDcwNDY.swf
 */
private function _parse56($url){
    preg_match("#/v_(w+).html#", $url, $matches);

    if (empty($matches)) return false;

    $link="http://vxml.56.com/json/{$matches[1]}/?src=out";
    $retval = self::_cget($link);

    if ($retval) {
        $json = json_decode($retval, true);

        $data['img'] = $json['info']['img'];
        $data['title'] = $json['info']['Subject'];
        $data['url'] = $url;
        $data['swf'] = "http://player.56.com/v_{$matches[1]}.swf";

        return $data;
    } else {
        return false;
    } 
}

/**
 * 乐视网 
 * http://www.letv.com/ptv/vplay/1168109.html
 * http://www.letv.com/player/x1168109.swf
 */
private function _parseLetv($url){
    $html = self::_fget($url);
    preg_match("#http://v.t.sina.com.cn/([^'"]*)#", $html, $matches);
    parse_str(parse_url(urldecode($matches[0]), PHP_URL_QUERY));
    preg_match("#vplay/(d+)#", $url, $matches);
    $data['img'] = $pic;
    $data['title'] = $title;
    $data['url'] = $url;
    $data['swf'] = "http://www.letv.com/player/x{$matches[1]}.swf";

    return $data;
}

// 搜狐TV http://my.tv.sohu.com/u/vw/5101536
private function _parseSohu($url){
    $html = self::_fget($url);
    $html = iconv("GB2312", "UTF-8", $html);
    preg_match_all("/og:(?:title|image|videosrc)"scontent="([^"]+)"/s", $html, $matches);
    $data['img'] = $matches[1][1];
    $data['title'] = $matches[1][0];
    $data['url'] = $url;
    $data['swf'] = $matches[1][2];
    return $data;
}

/*
 * 新浪播客
 * http://video.sina.com.cn/v/b/48717043-1290055681.html
 * http://you.video.sina.com.cn/api/sinawebApi/outplayrefer.php/vid=48717043_1290055681_PUzkSndrDzXK+l1lHz2stqkP7KQNt6nki2O0u1ehIwZYQ0/XM5GdatoG5ynSA9kEqDhAQJA4dPkm0x4/s.swf
 */
private function _parseSina($url){
    preg_match("/(d+)(?:-|_)(d+)/", $url, $matches);
    $url = "http://video.sina.com.cn/v/b/{$matches[1]}-{$matches[2]}.html";
    $html = self::_fget($url);
    preg_match("/videos?:s?([^&lt;]+)}/", $html, $matches);
    $find = array("/n/", "/s*/", "/'/", "/{([^:,]+):/", "/,([^:]+):/", "/:[^d"]w+[^,]*,/i");
    $replace = array('', '', '"', '{"\1":', ',"\1":', ':"",');
    $str = preg_replace($find, $replace, $matches[1]);
    $arr = json_decode($str, true);

    $data['img'] = $arr['pic'];
    $data['title'] = $arr['title'];
    $data['url'] = $url;
    $data['swf'] = $arr['swfOutsideUrl'];

    return $data;
}

/*
 * 通过 file_get_contents 获取内容
 */
private function _fget($url=''){
    if(!$url) return false;
    $html = file_get_contents($url);
    // 判断是否gzip压缩
    if($dehtml = self::_gzdecode($html))
        return $dehtml;
    else
        return $html;
}

/*
 * 通过 fsockopen 获取内容
 */
private function _fsget($path='/', $host='', $user_agent=''){
    if(!$path || !$host) return false;
    $user_agent = $user_agent ? $user_agent : self::USER_AGENT;

    $out = &lt;&lt;&lt;HEADER

GET $path HTTP/1.1
Host: $host
User-Agent: $user_agent
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8
Accept-Language: zh-cn,zh;q=0.5
Accept-Charset: GB2312,utf-8;q=0.7,*;q=0.7rnrn
HEADER;
$fp = @fsockopen($host, 80, $errno, $errstr, 10);
if (!$fp) return false;
if(!fputs($fp, $out)) return false;
while ( !feof($fp) ) {
$html .= fgets($fp, 1024);
}
fclose($fp);
// 判断是否gzip压缩
if($dehtml = self::_gzdecode($html))
return $dehtml;
else
return $html;
}

/*
 * 通过 curl 获取内容
 */
private function _cget($url='', $user_agent=''){
    if(!$url) return;

    $user_agent = $user_agent ? $user_agent : self::USER_AGENT;

    $ch = curl_init();
    curl_setopt($ch, CURLOPT_URL, $url);
    curl_setopt($ch, CURLOPT_HEADER, 0);
    if(strlen($user_agent)) curl_setopt($ch, CURLOPT_USERAGENT, $user_agent);

    ob_start();
    curl_exec($ch);
    $html = ob_get_contents();        
    ob_end_clean();

    if(curl_errno($ch)){
        curl_close($ch);
        return false;
    }
    curl_close($ch);
    if(!is_string($html) || !strlen($html)){
        return false;
    }
    return $html;
    // 判断是否gzip压缩
    if($dehtml = self::_gzdecode($html))
        return $dehtml;
    else
        return $html;
}

private function _gzdecode($data) {
    $len = strlen ( $data );
    if ($len &lt; 18 || strcmp ( substr ( $data, 0, 2 ), "x1fx8b" )) {
        return null; // Not GZIP format (See RFC 1952) 
    }
    $method = ord ( substr ( $data, 2, 1 ) ); // Compression method 
    $flags = ord ( substr ( $data, 3, 1 ) ); // Flags 
    if ($flags &amp; 31 != $flags) {
        // Reserved bits are set -- NOT ALLOWED by RFC 1952 
        return null;
    }
    // NOTE: $mtime may be negative (PHP integer limitations) 
    $mtime = unpack ( "V", substr ( $data, 4, 4 ) );
    $mtime = $mtime [1];
    $xfl = substr ( $data, 8, 1 );
    $os = substr ( $data, 8, 1 );
    $headerlen = 10;
    $extralen = 0;
    $extra = "";
    if ($flags &amp; 4) {
        // 2-byte length prefixed EXTRA data in header 
        if ($len - $headerlen - 2 &lt; 8) {
            return false; // Invalid format 
        }
        $extralen = unpack ( "v", substr ( $data, 8, 2 ) );
        $extralen = $extralen [1];
        if ($len - $headerlen - 2 - $extralen &lt; 8) {
            return false; // Invalid format 
        }
        $extra = substr ( $data, 10, $extralen );
        $headerlen += 2 + $extralen;
    }

    $filenamelen = 0;
    $filename = "";
    if ($flags &amp; 8) {
        // C-style string file NAME data in header 
        if ($len - $headerlen - 1 &lt; 8) {
            return false; // Invalid format 
        }
        $filenamelen = strpos ( substr ( $data, 8 + $extralen ), chr ( 0 ) );
        if ($filenamelen === false || $len - $headerlen - $filenamelen - 1 &lt; 8) {
            return false; // Invalid format 
        }
        $filename = substr ( $data, $headerlen, $filenamelen );
        $headerlen += $filenamelen + 1;
    }

    $commentlen = 0;
    $comment = "";
    if ($flags &amp; 16) {
        // C-style string COMMENT data in header 
        if ($len - $headerlen - 1 &lt; 8) {
            return false; // Invalid format 
        }
        $commentlen = strpos ( substr ( $data, 8 + $extralen + $filenamelen ), chr ( 0 ) );
        if ($commentlen === false || $len - $headerlen - $commentlen - 1 &lt; 8) {
            return false; // Invalid header format 
        }
        $comment = substr ( $data, $headerlen, $commentlen );
        $headerlen += $commentlen + 1;
    }

    $headercrc = "";
    if ($flags &amp; 1) {
        // 2-bytes (lowest order) of CRC32 on header present 
        if ($len - $headerlen - 2 &lt; 8) {
            return false; // Invalid format 
        }
        $calccrc = crc32 ( substr ( $data, 0, $headerlen ) ) &amp; 0xffff;
        $headercrc = unpack ( "v", substr ( $data, $headerlen, 2 ) );
        $headercrc = $headercrc [1];
        if ($headercrc != $calccrc) {
            return false; // Bad header CRC 
        }
        $headerlen += 2;
    }

    // GZIP FOOTER - These be negative due to PHP's limitations 
    $datacrc = unpack ( "V", substr ( $data, - 8, 4 ) );
    $datacrc = $datacrc [1];
    $isize = unpack ( "V", substr ( $data, - 4 ) );
    $isize = $isize [1];

    // Perform the decompression: 
    $bodylen = $len - $headerlen - 8;
    if ($bodylen &lt; 1) {
        // This should never happen - IMPLEMENTATION BUG! 
        return null;
    }
    $body = substr ( $data, $headerlen, $bodylen );
    $data = "";
    if ($bodylen &gt; 0) {
        switch ($method) {
            case 8 :
                // Currently the only supported compression method: 
                $data = gzinflate ( $body );
                break;
            default :
                // Unknown compression method 
                return false;
        }
    } else {
        //...
    }

    if ($isize != strlen ( $data ) || crc32 ( $data ) != $datacrc) {
        // Bad format!  Length or CRC doesn't match! 
        return false;
    }
    return $data;
}

}