diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..2c7a058 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 RainySY + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index da88ecd..eded9f1 100644 --- a/README.md +++ b/README.md @@ -6,8 +6,8 @@ ## ✨ 功能 - 🔗 **自动检测** — 聊天中出现 `linux.do` 链接立即触发 -- 🛡️ **绕过 Cloudflare** — 使用 [Scrapling](https://github.com/D4Vinci/Scrapling) 的 StealthyFetcher 自动解 Turnstile -- 📸 **截图预览** — 全页面截图(1920×1080) +- 🛡️ **绕过 Cloudflare** — 使用 [Scrapling](https://github.com/D4Vinci/Scrapling) 的 StealthySession 自动解 Turnstile +- 📸 **截图预览** — 全页面截图(1280×1024) - 📝 **内容摘要** — 提取标题 + 正文前 400 字 - ⚡ **异步非阻塞** — Scrapling 在独立线程池运行,不阻塞 AstrBot 主循环 - 💾 **缓存机制** — 30 分钟内相同链接直接返回缓存截图 @@ -66,7 +66,7 @@ https://linux.do/t/topic/1378383 │ linux.do/xx │ │ 事件监听器 │ └─────────────┘ └──────┬───────┘ │ - asyncio.to_thread() + run_in_executor() │ ┌───────▼────────┐ │ Thread Pool │ @@ -91,11 +91,13 @@ https://linux.do/t/topic/1378383 ## ⚙️ 配置 -通过 `_conf_schema.json` 支持以下配置(可选): +通过 `_conf_schema.json` 支持以下配置: -- `cache_ttl`: 缓存有效期(秒,默认 1800) -- `screenshot_width`: 截图宽度(默认 1920) -- `max_content_length`: 内容摘要最大长度(默认 400) +| 配置项 | 说明 | 默认值 | +|--------|------|--------| +| `cache_ttl` | 缓存有效期(秒),设为 0 关闭缓存 | 1800 | +| `max_content_length` | 内容摘要最大长度(字符) | 400 | +| `screenshot_timeout` | 截图超时(秒) | 15 | ## ⚠️ 注意事项 diff --git a/main.py b/main.py index 81202ac..7b0064d 100644 --- a/main.py +++ b/main.py @@ -13,6 +13,7 @@ import hashlib from pathlib import Path from concurrent.futures import ThreadPoolExecutor import html as html_mod +import threading from astrbot.core.utils.astrbot_path import get_astrbot_data_path @@ -22,12 +23,11 @@ from astrbot.api import logger from astrbot.api import AstrBotConfig try: - from scrapling.fetchers import StealthyFetcher, StealthySession as _StealthySession + from scrapling.fetchers import StealthySession as _StealthySession from lxml import html as _lh SCRAPLING_AVAILABLE = True except ImportError: SCRAPLING_AVAILABLE = False - StealthyFetcher = None _StealthySession = None _lh = None @@ -53,6 +53,7 @@ class LinuxDoPreviewPlugin(Star): ) self._stats = {"total": 0, "cache_hit": 0, "error": 0} + self._stats_lock = threading.Lock() async def terminate(self): _EXECUTOR.shutdown(wait=False) @@ -94,10 +95,12 @@ class LinuxDoPreviewPlugin(Star): if summary: yield event.plain_result(summary) - self._stats["total"] += 1 + with self._stats_lock: + self._stats["total"] += 1 except Exception as e: - self._stats["error"] += 1 + with self._stats_lock: + self._stats["error"] += 1 logger.error(f"[LinuxDoPreview] 预览失败: {type(e).__name__}: {e}") yield event.plain_result(f"❌ 预览获取失败: {str(e)[:200]}") @@ -130,8 +133,6 @@ class LinuxDoPreviewPlugin(Star): and sz > 50 * 1024 # 小于 50KB 的截图视为无效(黑屏/空白) ) - StealthyFetcher.adaptive = True # type: ignore[union-attr] - with _StealthySession( # type: ignore[union-attr] headless=True, solve_cloudflare=True ) as session: @@ -148,7 +149,8 @@ class LinuxDoPreviewPlugin(Star): session, url, screenshot_path ) else: - self._stats["cache_hit"] += 1 + with self._stats_lock: + self._stats["cache_hit"] += 1 logger.info( f"[LinuxDoPreview] 使用缓存截图: {screenshot_path.name}" ) @@ -158,10 +160,9 @@ class LinuxDoPreviewPlugin(Star): # ─────────── 截图(复用 StealthySession 的浏览器上下文) ─────────── - @staticmethod - @staticmethod - def _take_screenshot(session, url: str, save_path: Path) -> Path | None: + def _take_screenshot(self, session, url: str, save_path: Path) -> Path | None: """在已有 cf_clearance 的上下文中新建标签页截图""" + timeout_ms = self.config.get("screenshot_timeout", 15) * 1000 try: ctx = session.context if not ctx: @@ -171,13 +172,13 @@ class LinuxDoPreviewPlugin(Star): page.set_viewport_size({"width": 1280, "height": 1024}) # 导航(已有 cf_clearance cookie,不应再触发 Cloudflare) - page.goto(url, wait_until="load", timeout=30000) + page.goto(url, wait_until="load", timeout=timeout_ms) page.wait_for_timeout(3000) page.screenshot( path=str(save_path), full_page=True, - timeout=20000, + timeout=timeout_ms, ) sz = save_path.stat().st_size logger.info( @@ -238,12 +239,11 @@ class LinuxDoPreviewPlugin(Star): break return "\n\n".join(parts) - @staticmethod - def _build_summary(title: str, content: str, url: str) -> str: + def _build_summary(self, title: str, content: str, url: str) -> str: lines = [f"📌 {title}"] if content: lines.append("") - max_len = 400 + max_len = self.config.get("max_content_length", 400) lines.append(content[:max_len]) if len(content) > max_len: lines[-1] += "…" diff --git a/metadata.yaml b/metadata.yaml index 7d3133d..8dbb58d 100644 --- a/metadata.yaml +++ b/metadata.yaml @@ -2,10 +2,10 @@ name: astrbot_plugin_linuxdo display_name: LinuxDo Preview short_desc: 自动检测 linux.do 链接,绕过 Cloudflare 截图发送预览 desc: > - 自动检测聊天消息中的 linux.do 链接,使用 Scrapling 的 StealthyFetcher + 自动检测聊天消息中的 linux.do 链接,使用 Scrapling 的 StealthySession 绕过 Cloudflare Turnstile 防护,获取页面截图和内容摘要并发送预览。 支持缓存避免重复请求,异步非阻塞设计。 -author: astrbot_plugin_linuxdo +author: RainySY version: 1.0.0 repo: https://github.com/sakuradairong/astrbot_plugin_linuxdo astrbot_version: ">=4.16" diff --git a/requirements.txt b/requirements.txt index d52fc13..eb882bc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ scrapling[fetchers]>=0.4 +lxml>=5.0