#!/usr/bin/env python3 import email.utils import json import os import urllib.parse import urllib.request import xml.etree.ElementTree as ET def normalize_pubdate(value: str) -> str: if not value: return '' try: dt = email.utils.parsedate_to_datetime(value) return dt.strftime('%Y-%m-%d') except Exception: return value def infer_provider(title: str, link: str) -> str: text = (title + ' ' + link).lower() for needle, provider in [ ('openai', 'OpenAI'), ('anthropic', 'Anthropic'), ('claude', 'Anthropic'), ('gemini', 'Google'), ('google', 'Google'), ('deepseek', 'DeepSeek'), ('qwen', 'Qwen'), ('dashscope', 'DashScope'), ('zhipu', '智谱'), ('baidu', '百度'), ('tencent', '腾讯'), ('minimax', 'MiniMax'), ('x.ai', 'xAI'), ('xai', 'xAI') ]: if needle in text: return provider return '' query = os.environ.get("INTRADAY_DISCOVERY_QUERY", "").strip() if not query: print("[]") raise SystemExit(0) url = "https://www.bing.com/search?format=rss&q=" + urllib.parse.quote(query) req = urllib.request.Request(url, headers={ "User-Agent": "Mozilla/5.0", "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8", }) with urllib.request.urlopen(req, timeout=20) as resp: body = resp.read().decode("utf-8", errors="ignore") root = ET.fromstring(body) items = [] for item in root.findall('./channel/item'): title = (item.findtext('title') or '').strip() link = (item.findtext('link') or '').strip() desc = (item.findtext('description') or '').strip() pub = (item.findtext('pubDate') or '').strip() provider = infer_provider(title, link) provider_url = '' if link: parsed = urllib.parse.urlparse(link) provider_url = f"{parsed.scheme}://{parsed.netloc}" if parsed.scheme and parsed.netloc else '' items.append({ "title": title, "summary": desc, "url": link, "provider": provider, "provider_url": provider_url, "published_at": normalize_pubdate(pub), }) print(json.dumps(items, ensure_ascii=False))