php实现无须账号自动获取微博cookie
Webman框架,本文章PHP实现无须账号自动获取微博cookie
如果要Java实现参考https://www.freesion.com/article/4184140577/
不懂的可以评论或联系我邮箱:owen@owenzhang.com
著作权归OwenZhang所有。商业转载请联系OwenZhang获得授权,非商业转载请注明出处。
调用:
$cookie = WeiboService::instance()->getCookie();
WeiboService.php代码如下:
<?php
namespace app\service;
use app\model\User;
use support\Log;
use Yurun\OAuthLogin\Weibo\OAuth2 as WeiboOAuth2;
class WeiboService
{
public static ?WeiboService $_instance = null;
//获取游客的cookie
private $cookieFile = '';
private $userAgent = '';
private $proxy = '';
private $timeout = 30;
// 预定义的User-Agent列表
private $userAgents = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36 Edg/144.0.0.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:130.0) Gecko/20100101 Firefox/130.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36',
'Mozilla/5.0 (iPhone; CPU iPhone OS 17_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.6 Mobile/15E148 Safari/604.1'
];
/**
* @return WeiboService|mixed
*/
public static function instance(): ?WeiboService
{
if (!static::$_instance) static::$_instance = new self();
return static::$_instance;
}
public function __construct($cookieFile = '')
{
$this->cookieFile = $cookieFile ?: sys_get_temp_dir() . '/weibo_cookies_' . md5(microtime()) . '.txt';
$this->userAgent = $this->userAgents[array_rand($this->userAgents)];
}
/**
* 设置超时时间
*/
public function setTimeout(int $seconds): void
{
$this->timeout = $seconds;
}
/**
* 获取完整的游客Cookie
*/
public function getCookie(): string
{
try {
// 第一步:获取tid和c参数(重试机制)
$json = null;
$retryCount = 3;
for ($i = 0; $i < $retryCount; $i++) {
$json = $this->getTidAndC();
if ($json) {
break;
}
if ($i < $retryCount - 1) {
sleep(1); // 等待1秒后重试
}
}
if (!$json) {
throw new \Exception('Failed to get tid and c after ' . $retryCount . ' attempts');
}
// 第二步:提取参数
$c = isset($json['confidence']) ? $json['confidence'] : '100';
$w = isset($json['new_tid']) ? ($json['new_tid'] ? '3' : '2') : '2';
$t = isset($json['tid']) ? $json['tid'] : '';
if (empty($t)) {
throw new \Exception('Failed to get tid');
}
// echo "获取参数成功: t={$t}, w={$w}, c={$c}\n";
// 第三步:获取YF-Page-G0
$yf = $this->getYF();
if (empty($yf)) {
// 如果获取失败,使用默认值
$yf = $this->generateDefaultYF();
}
// 第四步:获取SUB和SUBP
list($sub, $subp) = $this->getSubAndSubp($t, $w, $c);
if (empty($sub) || empty($subp)) {
throw new \Exception('Failed to get sub or subp');
}
// 构建完整Cookie
$cookie = "YF-Page-G0={$yf}; SUB={$sub}; SUBP={$subp}";
// 获取额外Cookie
$additionalCookies = $this->getAdditionalCookies();
if (!empty($additionalCookies)) {
$cookie .= '; ' . $additionalCookies;
}
return $cookie;
} catch (\Exception $e) {
(Log::channel('crawlerlog'))->info('Weibo: VisitorCookie Error: ' . $e->getMessage(), []);
// 返回一个模拟的Cookie作为后备
return $this->generateFallbackCookie();
}
}
/**
* 获取tid和c参数(改进版)
*/
private function getTidAndC()
{
$url = 'https://passport.weibo.com/visitor/genvisitor';
// 生成一个随机的callback名称
$callback = 'jsonp' . time() . mt_rand(1000, 9999);
$url .= '?cb=' . $callback;
$headers = [
'Accept: */*',
'Accept-Encoding: gzip, deflate, br, zstd',
'Accept-Language: zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'Connection: keep-alive',
'Host: passport.weibo.com',
'Referer: https://weibo.com/',
'Sec-Fetch-Dest: script',
'Sec-Fetch-Mode: no-cors',
'Sec-Fetch-Site: same-site',
'User-Agent: ' . $this->userAgent,
'X-Requested-With: XMLHttpRequest'
];
$ch = curl_init();
$options = [
CURLOPT_URL => $url,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_HTTPHEADER => $headers,
CURLOPT_COOKIEJAR => $this->cookieFile,
CURLOPT_COOKIEFILE => $this->cookieFile,
CURLOPT_SSL_VERIFYPEER => false,
CURLOPT_SSL_VERIFYHOST => false,
CURLOPT_TIMEOUT => $this->timeout,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_ENCODING => 'gzip',
CURLOPT_HEADER => false,
CURLOPT_REFERER => 'https://weibo.com/'
];
// 添加代理支持
if ($this->proxy) {
$options[CURLOPT_PROXY] = $this->proxy;
}
curl_setopt_array($ch, $options);
$response = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
$error = curl_error($ch);
curl_close($ch);
if ($httpCode !== 200) {
(Log::channel('crawlerlog'))->info("Weibo: HTTP Code: {$httpCode}, Error: {$error}", []);
return null;
}
if (!$response) {
(Log::channel('crawlerlog'))->info('Weibo: Empty response', []);
return null;
}
// 尝试解析JSONP响应
$response = trim($response);
// 移除可能的回调函数包裹
if (strpos($response, $callback) === 0) {
$response = substr($response, strlen($callback));
$response = trim($response, '();');
}
// 尝试解析JSON
$data = json_decode($response, true);
if (json_last_error() !== JSON_ERROR_NONE) {
// 尝试其他格式
$response = preg_replace('/^[^{]*/', '', $response);
$response = preg_replace('/[^}]*$/', '', $response);
$data = json_decode($response, true);
if (json_last_error() !== JSON_ERROR_NONE) {
(Log::channel('crawlerlog'))->info("Weibo: JSON decode error: " . json_last_error_msg(), []);
return null;
}
}
return isset($data['data']) ? $data['data'] : null;
}
/**
* 获取SUB和SUBP(改进版)
*/
private function getSubAndSubp(string $t, string $w, string $c): array
{
$c = (string)$c;
$rand = mt_rand(100000, 999999) / 1000000;
$url = "https://passport.weibo.com/visitor/visitor?a=incarnate&t={$t}&w={$w}&c=0{$c}&gc=&cb=cross_domain&from=weibo&_rand={$rand}";
$headers = [
'Accept: */*',
'Accept-Encoding: gzip, deflate, br, zstd',
'Accept-Language: zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'Connection: keep-alive',
'Cookie: tid=' . $t . '__' . $c,
'Host: passport.weibo.com',
'Referer: https://weibo.com/',
'Sec-Fetch-Dest: script',
'Sec-Fetch-Mode: no-cors',
'Sec-Fetch-Site: same-site',
'User-Agent: ' . $this->userAgent
];
$ch = curl_init();
$options = [
CURLOPT_URL => $url,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_HTTPHEADER => $headers,
CURLOPT_COOKIEJAR => $this->cookieFile,
CURLOPT_COOKIEFILE => $this->cookieFile,
CURLOPT_SSL_VERIFYPEER => false,
CURLOPT_SSL_VERIFYHOST => false,
CURLOPT_TIMEOUT => $this->timeout,
CURLOPT_ENCODING => 'gzip',
CURLOPT_REFERER => 'https://weibo.com/'
];
if ($this->proxy) {
$options[CURLOPT_PROXY] = $this->proxy;
}
curl_setopt_array($ch, $options);
$response = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
if ($httpCode !== 200 || !$response) {
(Log::channel('crawlerlog'))->info("Weibo: Failed to get sub and subp, HTTP Code: {$httpCode}", []);
return ['', ''];
}
// 处理JSONP响应
$response = trim($response);
// 移除回调函数
if (strpos($response, 'cross_domain') === 0) {
$response = substr($response, strlen('cross_domain'));
$response = trim($response, '();');
}
// 尝试解析JSON
$data = json_decode($response, true);
if (json_last_error() !== JSON_ERROR_NONE) {
// 尝试清理字符串
$response = preg_replace('/^[^{]*/', '', $response);
$response = preg_replace('/[^}]*$/', '', $response);
$data = json_decode($response, true);
}
if (!$data || !isset($data['data'])) {
return ['', ''];
}
$sub = $data['data']['sub'] ?? '';
$subp = $data['data']['subp'] ?? '';
return [$sub, $subp];
}
/**
* 获取YF-Page-G0(改进版)
*/
private function getYF(): string
{
$domains = [
'1087030002_2975_5012_0',
'1087030002_2975_5011_0',
'1087030002_2975_5010_0',
'1087030002_2975_5009_0',
'1087030002_2975_5008_0'
];
$domain = $domains[array_rand($domains)];
$url = "https://d.weibo.com/{$domain}";
$headers = [
'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding: gzip, deflate, br',
'Accept-Language: zh-CN,zh;q=0.9,en;q=0.8',
'Connection: keep-alive',
'Host: d.weibo.com',
'Referer: https://weibo.com/',
'Sec-Fetch-Dest: document',
'Sec-Fetch-Mode: navigate',
'Sec-Fetch-Site: same-site',
'Upgrade-Insecure-Requests: 1',
'User-Agent: ' . $this->userAgent
];
$ch = curl_init();
$options = [
CURLOPT_URL => $url,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_HEADER => true,
CURLOPT_HTTPHEADER => $headers,
CURLOPT_COOKIEJAR => $this->cookieFile,
CURLOPT_COOKIEFILE => $this->cookieFile,
CURLOPT_SSL_VERIFYPEER => false,
CURLOPT_SSL_VERIFYHOST => false,
CURLOPT_TIMEOUT => $this->timeout,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_ENCODING => 'gzip',
CURLOPT_REFERER => 'https://weibo.com/'
];
if ($this->proxy) {
$options[CURLOPT_PROXY] = $this->proxy;
}
curl_setopt_array($ch, $options);
$response = curl_exec($ch);
curl_close($ch);
if (!$response) {
return '';
}
// 提取Cookie
return $this->extractYFCookie($response);
}
/**
* 生成默认的YF-Page-G0值
*/
private function generateDefaultYF(): string
{
// 生成类似微博的YF-Page-G0值
$timestamp = time();
$random = mt_rand(1000000000, 9999999999);
return "{$timestamp}{$random}";
}
/**
* 生成后备Cookie(当所有方法都失败时)
*/
private function generateFallbackCookie(): string
{
$yf = $this->generateDefaultYF();
$sub = '_2A25' . bin2hex(random_bytes(16)) . 'DeRhGe5M71EQ8SfOyD6IHXVnDZFnrDV8PUNbmtAbLWbhkW9NdJrQ7wtvzn0wy-bW-gzBdGt6MmWGCzmZ';
$subp = '0033WrSXqPxfM725Ws9jqgMF55529P9D9Wh75PxDk9yG3v5oeoKavfmR5JpX5KzhUgL.FonEShepeK.Ee0z2dJLoIp7LxKML1KBLBKnLxKqL1hnLBoMReoB0eK24eoeE';
return "YF-Page-G0={$yf}; SUB={$sub}; SUBP={$subp}";
}
/**
* 从响应头中提取YF-Page-G0 Cookie
*/
private function extractYFCookie(string $response): string
{
// 分割响应头和响应体
$parts = explode("\r\n\r\n", $response, 2);
$header = $parts[0] ?? '';
// 查找Set-Cookie头
$pattern = '/Set-Cookie:\s*YF-Page-G0=([^;]+)/i';
if (preg_match($pattern, $header, $matches)) {
return $matches[1];
}
return '';
}
/**
* 获取额外的Cookie
*/
private function getAdditionalCookies(): string
{
// 访问微博主页获取更多Cookie
$url = 'https://weibo.com/';
$headers = [
'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding: gzip, deflate, br',
'Accept-Language: zh-CN,zh;q=0.9,en;q=0.8',
'Connection: keep-alive',
'Host: weibo.com',
'Sec-Fetch-Dest: document',
'Sec-Fetch-Mode: navigate',
'Sec-Fetch-Site: none',
'Upgrade-Insecure-Requests: 1',
'User-Agent: ' . $this->userAgent
];
$ch = curl_init();
$options = [
CURLOPT_URL => $url,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_HEADER => true,
CURLOPT_HTTPHEADER => $headers,
CURLOPT_COOKIEJAR => $this->cookieFile,
CURLOPT_COOKIEFILE => $this->cookieFile,
CURLOPT_SSL_VERIFYPEER => false,
CURLOPT_SSL_VERIFYHOST => false,
CURLOPT_TIMEOUT => $this->timeout,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_ENCODING => 'gzip'
];
if ($this->proxy) {
$options[CURLOPT_PROXY] = $this->proxy;
}
curl_setopt_array($ch, $options);
$response = curl_exec($ch);
curl_close($ch);
if (!$response) {
return '';
}
// 提取所有Cookie
$parts = explode("\r\n\r\n", $response, 2);
$header = $parts[0] ?? '';
$cookies = [];
if (preg_match_all('/Set-Cookie:\s*([^;]+)/i', $header, $matches)) {
foreach ($matches[1] as $cookie) {
// 过滤掉YF-Page-G0,因为我们已经有了
if (strpos($cookie, 'YF-Page-G0=') !== 0) {
$cookies[] = $cookie;
}
}
}
return implode('; ', $cookies);
}
}
Buy me a cup of coffee 🙂
觉得对你有帮助,就给我打赏吧,谢谢!

