标签

Honeymoon - Thomas Ng

归档

近期文章

Our Visitor

008149
用户今天 : 25
合计查看 : 16091
谁是在线的 : 0
你的IP地址 : 52.167.144.195

php实现游客状态获取web端微博cookie

Webman框架,本文章php实现游客状态获取web端微博cookie

如果要Java实现参考https://www.freesion.com/article/4184140577/

不懂的可以评论或联系我邮箱:owen@owenzhang.com

著作权归OwenZhang所有。商业转载请联系OwenZhang获得授权,非商业转载请注明出处。

调用:

$cookie = WeiboService::instance()->getCookie();

WeiboService.php代码如下:

<?php

namespace app\service;

use app\model\User;
use support\Log;
use Yurun\OAuthLogin\Weibo\OAuth2 as WeiboOAuth2;

class WeiboService
{

    public static ?WeiboService $_instance = null;

    //获取游客的cookie
    private $cookieFile = '';
    private $userAgent  = '';
    private $proxy      = '';
    private $timeout    = 30;
    // 预定义的User-Agent列表
    private $userAgents = [
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36 Edg/144.0.0.0',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:130.0) Gecko/20100101 Firefox/130.0',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36',
        'Mozilla/5.0 (iPhone; CPU iPhone OS 17_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.6 Mobile/15E148 Safari/604.1'
    ];

    /**
     * @return WeiboService|mixed
     */
    public static function instance(): ?WeiboService
    {
        if (!static::$_instance) static::$_instance = new self();
        return static::$_instance;
    }

    public function __construct($cookieFile = '')
    {
        $this->cookieFile = $cookieFile ?: sys_get_temp_dir() . '/weibo_cookies_' . md5(microtime()) . '.txt';
        $this->userAgent  = $this->userAgents[array_rand($this->userAgents)];
    }

    /**
     * 设置超时时间
     */
    public function setTimeout(int $seconds): void
    {
        $this->timeout = $seconds;
    }

    /**
     * 获取完整的游客Cookie
     */
    public function getCookie(): string
    {
        try {
            // 第一步:获取tid和c参数(重试机制)
            $json       = null;
            $retryCount = 3;

            for ($i = 0; $i < $retryCount; $i++) {
                $json = $this->getTidAndC();
                if ($json) {
                    break;
                }
                if ($i < $retryCount - 1) {
                    sleep(1); // 等待1秒后重试
                }
            }

            if (!$json) {
                throw new \Exception('Failed to get tid and c after ' . $retryCount . ' attempts');
            }

            // 第二步:提取参数
            $c = isset($json['confidence']) ? $json['confidence'] : '100';
            $w = isset($json['new_tid']) ? ($json['new_tid'] ? '3' : '2') : '2';
            $t = isset($json['tid']) ? $json['tid'] : '';

            if (empty($t)) {
                throw new \Exception('Failed to get tid');
            }

//            echo "获取参数成功: t={$t}, w={$w}, c={$c}\n";

            // 第三步:获取YF-Page-G0
            $yf = $this->getYF();

            if (empty($yf)) {
                // 如果获取失败,使用默认值
                $yf = $this->generateDefaultYF();
            }

            // 第四步:获取SUB和SUBP
            list($sub, $subp) = $this->getSubAndSubp($t, $w, $c);

            if (empty($sub) || empty($subp)) {
                throw new \Exception('Failed to get sub or subp');
            }

            // 构建完整Cookie
            $cookie = "YF-Page-G0={$yf}; SUB={$sub}; SUBP={$subp}";

            // 获取额外Cookie
            $additionalCookies = $this->getAdditionalCookies();
            if (!empty($additionalCookies)) {
                $cookie .= '; ' . $additionalCookies;
            }

            return $cookie;

        } catch (\Exception $e) {
            (Log::channel('crawlerlog'))->info('Weibo: VisitorCookie Error: ' . $e->getMessage(), []);
            // 返回一个模拟的Cookie作为后备
            return $this->generateFallbackCookie();
        }
    }

    /**
     * 获取tid和c参数(改进版)
     */
    private function getTidAndC()
    {
        $url = 'https://passport.weibo.com/visitor/genvisitor';

        // 生成一个随机的callback名称
        $callback = 'jsonp' . time() . mt_rand(1000, 9999);
        $url      .= '?cb=' . $callback;

        $headers = [
            'Accept: */*',
            'Accept-Encoding: gzip, deflate, br, zstd',
            'Accept-Language: zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
            'Connection: keep-alive',
            'Host: passport.weibo.com',
            'Referer: https://weibo.com/',
            'Sec-Fetch-Dest: script',
            'Sec-Fetch-Mode: no-cors',
            'Sec-Fetch-Site: same-site',
            'User-Agent: ' . $this->userAgent,
            'X-Requested-With: XMLHttpRequest'
        ];

        $ch = curl_init();

        $options = [
            CURLOPT_URL            => $url,
            CURLOPT_RETURNTRANSFER => true,
            CURLOPT_HTTPHEADER     => $headers,
            CURLOPT_COOKIEJAR      => $this->cookieFile,
            CURLOPT_COOKIEFILE     => $this->cookieFile,
            CURLOPT_SSL_VERIFYPEER => false,
            CURLOPT_SSL_VERIFYHOST => false,
            CURLOPT_TIMEOUT        => $this->timeout,
            CURLOPT_FOLLOWLOCATION => true,
            CURLOPT_ENCODING       => 'gzip',
            CURLOPT_HEADER         => false,
            CURLOPT_REFERER        => 'https://weibo.com/'
        ];

        // 添加代理支持
        if ($this->proxy) {
            $options[CURLOPT_PROXY] = $this->proxy;
        }

        curl_setopt_array($ch, $options);

        $response = curl_exec($ch);
        $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
        $error    = curl_error($ch);
        curl_close($ch);

        if ($httpCode !== 200) {
            (Log::channel('crawlerlog'))->info("Weibo: HTTP Code: {$httpCode}, Error: {$error}", []);
            return null;
        }

        if (!$response) {
            (Log::channel('crawlerlog'))->info('Weibo: Empty response', []);
            return null;
        }

        // 尝试解析JSONP响应
        $response = trim($response);

        // 移除可能的回调函数包裹
        if (strpos($response, $callback) === 0) {
            $response = substr($response, strlen($callback));
            $response = trim($response, '();');
        }

        // 尝试解析JSON
        $data = json_decode($response, true);

        if (json_last_error() !== JSON_ERROR_NONE) {
            // 尝试其他格式
            $response = preg_replace('/^[^{]*/', '', $response);
            $response = preg_replace('/[^}]*$/', '', $response);

            $data = json_decode($response, true);

            if (json_last_error() !== JSON_ERROR_NONE) {
                (Log::channel('crawlerlog'))->info("Weibo: JSON decode error: " . json_last_error_msg(), []);
                return null;
            }
        }

        return isset($data['data']) ? $data['data'] : null;
    }

    /**
     * 获取SUB和SUBP(改进版)
     */
    private function getSubAndSubp(string $t, string $w, string $c): array
    {
        $c    = (string)$c;
        $rand = mt_rand(100000, 999999) / 1000000;
        $url  = "https://passport.weibo.com/visitor/visitor?a=incarnate&t={$t}&w={$w}&c=0{$c}&gc=&cb=cross_domain&from=weibo&_rand={$rand}";

        $headers = [
            'Accept: */*',
            'Accept-Encoding: gzip, deflate, br, zstd',
            'Accept-Language: zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
            'Connection: keep-alive',
            'Cookie: tid=' . $t . '__' . $c,
            'Host: passport.weibo.com',
            'Referer: https://weibo.com/',
            'Sec-Fetch-Dest: script',
            'Sec-Fetch-Mode: no-cors',
            'Sec-Fetch-Site: same-site',
            'User-Agent: ' . $this->userAgent
        ];

        $ch = curl_init();

        $options = [
            CURLOPT_URL            => $url,
            CURLOPT_RETURNTRANSFER => true,
            CURLOPT_HTTPHEADER     => $headers,
            CURLOPT_COOKIEJAR      => $this->cookieFile,
            CURLOPT_COOKIEFILE     => $this->cookieFile,
            CURLOPT_SSL_VERIFYPEER => false,
            CURLOPT_SSL_VERIFYHOST => false,
            CURLOPT_TIMEOUT        => $this->timeout,
            CURLOPT_ENCODING       => 'gzip',
            CURLOPT_REFERER        => 'https://weibo.com/'
        ];

        if ($this->proxy) {
            $options[CURLOPT_PROXY] = $this->proxy;
        }

        curl_setopt_array($ch, $options);

        $response = curl_exec($ch);
        $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
        curl_close($ch);

        if ($httpCode !== 200 || !$response) {
            (Log::channel('crawlerlog'))->info("Weibo: Failed to get sub and subp, HTTP Code: {$httpCode}", []);
            return ['', ''];
        }

        // 处理JSONP响应
        $response = trim($response);

        // 移除回调函数
        if (strpos($response, 'cross_domain') === 0) {
            $response = substr($response, strlen('cross_domain'));
            $response = trim($response, '();');
        }

        // 尝试解析JSON
        $data = json_decode($response, true);

        if (json_last_error() !== JSON_ERROR_NONE) {
            // 尝试清理字符串
            $response = preg_replace('/^[^{]*/', '', $response);
            $response = preg_replace('/[^}]*$/', '', $response);
            $data     = json_decode($response, true);
        }

        if (!$data || !isset($data['data'])) {
            return ['', ''];
        }

        $sub  = $data['data']['sub'] ?? '';
        $subp = $data['data']['subp'] ?? '';

        return [$sub, $subp];
    }

    /**
     * 获取YF-Page-G0(改进版)
     */
    private function getYF(): string
    {
        $domains = [
            '1087030002_2975_5012_0',
            '1087030002_2975_5011_0',
            '1087030002_2975_5010_0',
            '1087030002_2975_5009_0',
            '1087030002_2975_5008_0'
        ];

        $domain = $domains[array_rand($domains)];
        $url    = "https://d.weibo.com/{$domain}";

        $headers = [
            'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'Accept-Encoding: gzip, deflate, br',
            'Accept-Language: zh-CN,zh;q=0.9,en;q=0.8',
            'Connection: keep-alive',
            'Host: d.weibo.com',
            'Referer: https://weibo.com/',
            'Sec-Fetch-Dest: document',
            'Sec-Fetch-Mode: navigate',
            'Sec-Fetch-Site: same-site',
            'Upgrade-Insecure-Requests: 1',
            'User-Agent: ' . $this->userAgent
        ];

        $ch = curl_init();

        $options = [
            CURLOPT_URL            => $url,
            CURLOPT_RETURNTRANSFER => true,
            CURLOPT_HEADER         => true,
            CURLOPT_HTTPHEADER     => $headers,
            CURLOPT_COOKIEJAR      => $this->cookieFile,
            CURLOPT_COOKIEFILE     => $this->cookieFile,
            CURLOPT_SSL_VERIFYPEER => false,
            CURLOPT_SSL_VERIFYHOST => false,
            CURLOPT_TIMEOUT        => $this->timeout,
            CURLOPT_FOLLOWLOCATION => true,
            CURLOPT_ENCODING       => 'gzip',
            CURLOPT_REFERER        => 'https://weibo.com/'
        ];

        if ($this->proxy) {
            $options[CURLOPT_PROXY] = $this->proxy;
        }

        curl_setopt_array($ch, $options);

        $response = curl_exec($ch);
        curl_close($ch);

        if (!$response) {
            return '';
        }

        // 提取Cookie
        return $this->extractYFCookie($response);
    }

    /**
     * 生成默认的YF-Page-G0值
     */
    private function generateDefaultYF(): string
    {
        // 生成类似微博的YF-Page-G0值
        $timestamp = time();
        $random    = mt_rand(1000000000, 9999999999);
        return "{$timestamp}{$random}";
    }

    /**
     * 生成后备Cookie(当所有方法都失败时)
     */
    private function generateFallbackCookie(): string
    {
        $yf   = $this->generateDefaultYF();
        $sub  = '_2A25' . bin2hex(random_bytes(16)) . 'DeRhGe5M71EQ8SfOyD6IHXVnDZFnrDV8PUNbmtAbLWbhkW9NdJrQ7wtvzn0wy-bW-gzBdGt6MmWGCzmZ';
        $subp = '0033WrSXqPxfM725Ws9jqgMF55529P9D9Wh75PxDk9yG3v5oeoKavfmR5JpX5KzhUgL.FonEShepeK.Ee0z2dJLoIp7LxKML1KBLBKnLxKqL1hnLBoMReoB0eK24eoeE';

        return "YF-Page-G0={$yf}; SUB={$sub}; SUBP={$subp}";
    }

    /**
     * 从响应头中提取YF-Page-G0 Cookie
     */
    private function extractYFCookie(string $response): string
    {
        // 分割响应头和响应体
        $parts  = explode("\r\n\r\n", $response, 2);
        $header = $parts[0] ?? '';

        // 查找Set-Cookie头
        $pattern = '/Set-Cookie:\s*YF-Page-G0=([^;]+)/i';
        if (preg_match($pattern, $header, $matches)) {
            return $matches[1];
        }

        return '';
    }

    /**
     * 获取额外的Cookie
     */
    private function getAdditionalCookies(): string
    {
        // 访问微博主页获取更多Cookie
        $url = 'https://weibo.com/';

        $headers = [
            'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
            'Accept-Encoding: gzip, deflate, br',
            'Accept-Language: zh-CN,zh;q=0.9,en;q=0.8',
            'Connection: keep-alive',
            'Host: weibo.com',
            'Sec-Fetch-Dest: document',
            'Sec-Fetch-Mode: navigate',
            'Sec-Fetch-Site: none',
            'Upgrade-Insecure-Requests: 1',
            'User-Agent: ' . $this->userAgent
        ];

        $ch = curl_init();

        $options = [
            CURLOPT_URL            => $url,
            CURLOPT_RETURNTRANSFER => true,
            CURLOPT_HEADER         => true,
            CURLOPT_HTTPHEADER     => $headers,
            CURLOPT_COOKIEJAR      => $this->cookieFile,
            CURLOPT_COOKIEFILE     => $this->cookieFile,
            CURLOPT_SSL_VERIFYPEER => false,
            CURLOPT_SSL_VERIFYHOST => false,
            CURLOPT_TIMEOUT        => $this->timeout,
            CURLOPT_FOLLOWLOCATION => true,
            CURLOPT_ENCODING       => 'gzip'
        ];

        if ($this->proxy) {
            $options[CURLOPT_PROXY] = $this->proxy;
        }

        curl_setopt_array($ch, $options);

        $response = curl_exec($ch);
        curl_close($ch);

        if (!$response) {
            return '';
        }

        // 提取所有Cookie
        $parts  = explode("\r\n\r\n", $response, 2);
        $header = $parts[0] ?? '';

        $cookies = [];
        if (preg_match_all('/Set-Cookie:\s*([^;]+)/i', $header, $matches)) {
            foreach ($matches[1] as $cookie) {
                // 过滤掉YF-Page-G0,因为我们已经有了
                if (strpos($cookie, 'YF-Page-G0=') !== 0) {
                    $cookies[] = $cookie;
                }
            }
        }

        return implode('; ', $cookies);
    }

}

Buy me a cup of coffee :)

觉得对你有帮助,就给我打赏吧,谢谢!

微信赞赏码链接,点击跳转:

php实现游客状态获取web端微博cookie插图

Tags: