Skip to content

Commit b70faa4

Browse files
feat: 添加 WikipediaTopViewsSource 并增强查询源优先级系统
- 添加 WikipediaTopViewsSource,支持6小时缓存和文章过滤 - 在 QuerySource 基类添加 get_priority() 默认实现 (100) - 创建 DashboardClient 用于 API 积分获取,支持降级到 HTML 解析 - 添加 verify-context CLI 命令用于 PR 上下文验证 - 在 ReviewMetadata 添加 branch 和 head_sha 字段用于上下文追踪 - 增强 fetch 命令,支持通过 gh CLI 自动检测 PR - 添加 gh CLI 完整错误处理 (FileNotFoundError, Timeout, PermissionError) - 添加 WikipediaTopViewsSource 导入的 ImportError 处理 - 增强 _parse_points 处理纯空格字符串 - 修复 DashboardClient API 端点硬编码,使用常量配置 - 优化 WikipediaTopViewsSource aiohttp 会话管理,添加连接器配置 - 修复 WikipediaTopViewsSource 本地时间问题,改用 UTC 时间 - 增强 verify-context Git 分支检测,添加诊断信息 - 简化 PointsDetector 异常处理,使用统一兜底 - 添加 cmd_fetch 和 cmd_verify_context 错误日志记录 - 修复积分打印日志安全问题,改用 debug 级别 - 修复 cmd_fetch 打印原始异常安全问题 - 修复 lang 参数未验证安全问题 - 修复 Topviews 永久不可用 Bug(成功时恢复可用状态) - 修复 0 积分解析错误 Bug - 修复 gh 需要 -R 参数指定仓库 - 修复双斜杠 URL 问题 修复审查评论: - PR#12: 修复多个安全问题 (积分日志、异常泄露、参数验证) - PR#12: 修复多个 Bug (永久禁用、0积分、PR仓库) 测试: 483 个单元测试通过
1 parent acb41eb commit b70faa4

22 files changed

Lines changed: 1656 additions & 39 deletions

config.example.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,11 @@ query_engine:
5757
wikipedia:
5858
enabled: true # Wikipedia 热门话题
5959
timeout: 15 # 超时(秒)
60+
wikipedia_top_views:
61+
enabled: true # Wikipedia 热门浏览(真实热搜数据)
62+
timeout: 30 # 超时(秒)
63+
lang: "en" # 语言
64+
ttl: 21600 # 缓存 TTL(秒,默认 6 小时)
6065
bing_suggestions:
6166
enabled: true # Bing 建议API
6267
bing_api:

pyproject.toml

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ version = "1.0.0"
88
requires-python = ">=3.10"
99
dependencies = [
1010
"playwright>=1.49.0",
11-
"playwright-stealth>=1.0.6",
11+
"playwright-stealth>=1.0.6,<2.0",
1212
"pyyaml>=6.0.1",
1313
"aiohttp>=3.11.0",
1414
"beautifulsoup4>=4.12.3",
@@ -35,17 +35,7 @@ dev = [
3535
"pytest-xdist>=3.5.0",
3636
"hypothesis>=6.125.0",
3737
"faker>=35.0.0",
38-
]
39-
test = [
40-
"pytest>=8.0.0",
41-
"pytest-asyncio>=0.24.0",
42-
"pytest-playwright>=0.5.0",
43-
"pytest-benchmark>=5.0.0",
44-
"pytest-cov>=6.0.0",
45-
"pytest-timeout>=2.3.0",
46-
"pytest-xdist>=3.5.0",
47-
"hypothesis>=6.125.0",
48-
"faker>=35.0.0",
38+
"respx>=0.21.0",
4939
]
5040
viz = [
5141
"streamlit>=1.41.0",

src/account/points_detector.py

Lines changed: 36 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,15 @@
88

99
from playwright.async_api import Page
1010

11-
from constants import REWARDS_URLS
11+
from api.dashboard_client import DashboardClient
1212

1313
logger = logging.getLogger(__name__)
1414

1515

1616
class PointsDetector:
1717
"""积分检测器类"""
1818

19-
DASHBOARD_URL = REWARDS_URLS["dashboard"]
19+
DASHBOARD_URL = "https://rewards.bing.com/"
2020

2121
POINTS_SELECTORS = [
2222
"p.text-title1.font-semibold",
@@ -90,11 +90,25 @@ async def get_current_points(self, page: Page, skip_navigation: bool = False) ->
9090
logger.debug("跳过导航,使用当前页面")
9191
await page.wait_for_timeout(1000)
9292

93+
# 优先使用 Dashboard API
94+
try:
95+
logger.debug("尝试使用 Dashboard API 获取积分...")
96+
client = DashboardClient(page)
97+
api_points: int | None = await client.get_current_points()
98+
if api_points is not None and api_points >= 0:
99+
logger.debug(f"✓ 从 API 获取积分: {api_points:,}")
100+
return int(api_points)
101+
except Exception as e:
102+
logger.warning(
103+
f"API 获取积分失败({type(e).__name__}: {e}),使用 HTML 解析作为备用"
104+
)
105+
106+
# 备用:HTML 解析
93107
logger.debug("尝试从页面源码提取积分...")
94108
points = await self._extract_points_from_source(page)
95109

96110
if points is not None:
97-
logger.info(f"✓ 从源码提取积分: {points:,}")
111+
logger.debug(f"✓ 从源码提取积分: {points:,}")
98112
return points
99113

100114
logger.debug("源码提取失败,尝试选择器...")
@@ -107,13 +121,14 @@ async def get_current_points(self, page: Page, skip_navigation: bool = False) ->
107121
points_text = await element.text_content()
108122
logger.debug(f"找到积分文本: {points_text}")
109123

110-
points = self._parse_points(points_text)
124+
if points_text:
125+
points = self._parse_points(points_text)
111126

112-
if points is not None and points >= 100:
113-
logger.info(f"✓ 当前积分: {points:,}")
114-
return points
115-
elif points is not None:
116-
logger.debug(f"积分值太小,可能是误识别: {points}")
127+
if points is not None and points >= 100:
128+
logger.debug(f"✓ 当前积分: {points:,}")
129+
return points
130+
elif points is not None:
131+
logger.debug(f"积分值太小,可能是误识别: {points}")
117132

118133
except Exception as e:
119134
logger.debug(f"选择器 {selector} 失败: {e}")
@@ -143,7 +158,7 @@ def _parse_points(self, text: str) -> int | None:
143158
Returns:
144159
积分数量,失败返回 None
145160
"""
146-
if not text:
161+
if not text or not text.strip():
147162
return None
148163

149164
try:
@@ -310,7 +325,12 @@ async def _check_task_status(self, page: Page, selectors: list, task_name: str)
310325
Returns:
311326
任务状态字典
312327
"""
313-
status = {"found": False, "completed": False, "progress": None, "max_progress": None}
328+
status: dict[str, bool | int | None] = {
329+
"found": False,
330+
"completed": False,
331+
"progress": None,
332+
"max_progress": None,
333+
}
314334

315335
try:
316336
for selector in selectors:
@@ -338,10 +358,12 @@ async def _check_task_status(self, page: Page, selectors: list, task_name: str)
338358
# 查找类似 "15/30" 的进度
339359
progress_match = re.search(r"(\d+)\s*/\s*(\d+)", text)
340360
if progress_match:
341-
status["progress"] = int(progress_match.group(1))
342-
status["max_progress"] = int(progress_match.group(2))
361+
progress_val = int(progress_match.group(1))
362+
max_progress_val = int(progress_match.group(2))
363+
status["progress"] = progress_val
364+
status["max_progress"] = max_progress_val
343365

344-
if status["progress"] >= status["max_progress"]:
366+
if progress_val >= max_progress_val:
345367
status["completed"] = True
346368

347369
logger.debug(f"{task_name} 状态: {status}")

src/api/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
"""API clients module"""
2+
3+
from .dashboard_client import DashboardClient
4+
5+
__all__ = ["DashboardClient"]

src/api/dashboard_client.py

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
"""
2+
Dashboard API Client
3+
4+
Fetches points data from Microsoft Rewards Dashboard API.
5+
"""
6+
7+
import logging
8+
import re
9+
from typing import Any
10+
11+
from playwright.async_api import Page
12+
13+
from constants import API_ENDPOINTS, REWARDS_URLS
14+
15+
logger = logging.getLogger(__name__)
16+
17+
18+
class DashboardClient:
19+
"""Client for fetching data from Microsoft Rewards Dashboard API"""
20+
21+
def __init__(self, page: Page):
22+
"""
23+
Initialize Dashboard client
24+
25+
Args:
26+
page: Playwright Page object
27+
"""
28+
self.page = page
29+
self._cached_points: int | None = None
30+
base = REWARDS_URLS.get("dashboard", "https://rewards.bing.com")
31+
self._base_url = base.rstrip("/")
32+
33+
async def get_current_points(self) -> int | None:
34+
"""
35+
Get current points from Dashboard API
36+
37+
Attempts to fetch points via API call first, falls back to
38+
parsing page content if API fails.
39+
40+
Returns:
41+
Points balance or None if unable to determine
42+
"""
43+
try:
44+
points = await self._fetch_points_via_api()
45+
if points is not None and points >= 0:
46+
self._cached_points = points
47+
return points
48+
except TimeoutError as e:
49+
logger.warning(f"API request timeout: {e}")
50+
except ConnectionError as e:
51+
logger.warning(f"API connection error: {e}")
52+
except Exception as e:
53+
logger.warning(f"API call failed: {e}")
54+
55+
try:
56+
points = await self._fetch_points_via_page_content()
57+
if points is not None and points >= 0:
58+
self._cached_points = points
59+
return points
60+
except Exception as e:
61+
logger.debug(f"Page content parsing failed: {e}")
62+
63+
return self._cached_points
64+
65+
async def _fetch_points_via_api(self) -> int | None:
66+
"""
67+
Fetch points via internal API endpoint
68+
69+
Returns:
70+
Points balance or None
71+
"""
72+
try:
73+
api_url = f"{self._base_url}{API_ENDPOINTS['dashboard_balance']}"
74+
response = await self.page.evaluate(
75+
f"""
76+
async () => {{
77+
try {{
78+
const resp = await fetch('{api_url}', {{
79+
method: 'GET',
80+
credentials: 'include'
81+
}});
82+
if (!resp.ok) return null;
83+
return await resp.json();
84+
}} catch {{
85+
return null;
86+
}}
87+
}}
88+
"""
89+
)
90+
91+
if response and isinstance(response, dict):
92+
available = response.get("availablePoints")
93+
balance = response.get("pointsBalance")
94+
points = available if available is not None else balance
95+
if points is not None:
96+
try:
97+
return int(points)
98+
except (ValueError, TypeError):
99+
pass
100+
101+
except Exception as e:
102+
logger.debug(f"API fetch error: {e}")
103+
104+
return None
105+
106+
async def _fetch_points_via_page_content(self) -> int | None:
107+
"""
108+
Extract points from page content as fallback
109+
110+
Returns:
111+
Points balance or None
112+
"""
113+
try:
114+
content = await self.page.content()
115+
116+
patterns = [
117+
r'"availablePoints"\s*:\s*(\d+)',
118+
r'"pointsBalance"\s*:\s*(\d+)',
119+
r'"totalPoints"\s*:\s*(\d+)',
120+
]
121+
122+
for pattern in patterns:
123+
match = re.search(pattern, content)
124+
if match:
125+
points = int(match.group(1))
126+
if 0 <= points <= 1000000:
127+
return points
128+
129+
except Exception as e:
130+
logger.debug(f"Page content extraction error: {e}")
131+
132+
return None
133+
134+
async def get_dashboard_data(self) -> dict[str, Any] | None:
135+
"""
136+
Fetch full dashboard data
137+
138+
Returns:
139+
Dashboard data dict or None
140+
"""
141+
try:
142+
api_url = f"{self._base_url}{API_ENDPOINTS['dashboard_data']}"
143+
response = await self.page.evaluate(
144+
f"""
145+
async () => {{
146+
try {{
147+
const resp = await fetch('{api_url}', {{
148+
method: 'GET',
149+
credentials: 'include'
150+
}});
151+
if (!resp.ok) return null;
152+
return await resp.json();
153+
}} catch {{
154+
return null;
155+
}}
156+
}}
157+
"""
158+
)
159+
160+
if response is not None and isinstance(response, dict):
161+
return dict(response)
162+
163+
except TimeoutError as e:
164+
logger.warning(f"Dashboard API timeout: {e}")
165+
except ConnectionError as e:
166+
logger.warning(f"Dashboard API connection error: {e}")
167+
except Exception as e:
168+
logger.warning(f"Dashboard API error: {e}")
169+
170+
return None

src/constants/urls.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@
3737

3838
API_ENDPOINTS = {
3939
"dashboard": "https://rewards.bing.com/api/getuserinfo",
40+
"dashboard_balance": "/api/getuserbalance",
41+
"dashboard_data": "/api/dashboard",
4042
"report_activity": "https://rewards.bing.com/api/reportactivity",
4143
"quiz": "https://www.bing.com/bingqa/ReportActivity",
4244
"app_dashboard": "https://prod.rewardsplatform.microsoft.com/dapi/me",

src/review/models.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,10 @@ class ReviewMetadata(BaseModel):
118118
pr_number: int
119119
owner: str
120120
repo: str
121+
branch: str = Field(default="", description="拉取评论时的分支名称")
122+
head_sha: str = Field(default="", description="拉取评论时的 HEAD commit SHA(前7位)")
121123
last_updated: str = Field(default_factory=lambda: datetime.utcnow().isoformat())
122-
version: str = "2.2"
124+
version: str = "2.3"
123125
etag_comments: str | None = Field(None, description="GitHub ETag,用于条件请求")
124126
etag_reviews: str | None = Field(None, description="Reviews ETag")
125127

0 commit comments

Comments
 (0)