# -*- coding: utf-8 -*-
import requests
import json
import urllib3
from urllib3.exceptions import InsecureRequestWarning
import urllib.parse
import time
import os
# --- 配置区域 ---
# 初始关键词列表
initial_keywords = [
"银行", "存款", "利息", "利率", "存钱", "提前还房贷", "房贷", "手机银行",
"理财", "退税", "活期", "定期", "攒钱", "存单", "银行卡", "房产契税",
"银行贷款", "自动转存", "存单", "存折", "大额存单", "阶梯式存钱"
]
# 输出结果的文件名
output_filename = "douyin_xiala.txt"
# 设定最高分数
start_score = 20
# 循环次数
num_cycles = 10
# --- 重试相关配置 ---
max_retries = 3 # 单个关键词失败后的最大重试次数
retry_delay_time = 5.0 # 重试前的等待时间(秒)
# --- 请求相关配置 ---
request_sleep_time = 2.0 # 每个关键词处理后的基础等待时间(秒)
request_timeout = 25 # 单次请求的超时时间(秒)
# 原始 URL 中的关键词(URL编码后的形式),用于后续替换
original_encoded_keyword = "%E9%93%B6%E8%A1%8C%E6%8E%92" # 确保与 base_url 匹配
# 基础 URL (确保这里的参数仍然有效)
base_url = 'https://www.douyin.com/aweme/v1/web/search/sug/?device_platform=webapp&aid=6383&channel=channel_pc_web&keyword=%E9%93%B6%E8%A1%8C%E6%8E%92&source=aweme_video_web&from_group_id=7492793884725103884&update_version_code=170400&pc_client_type=1&pc_libra_divert=Mac&support_h265=1&support_dash=1&version_code=170400&version_name=17.4.0&cookie_enabled=true&screen_width=1440&screen_height=900&browser_language=zh-CN&browser_platform=MacIntel&browser_name=Edge&browser_version=135.0.0.0&browser_online=true&engine_name=Blink&engine_version=135.0.0.0&os_name=Mac+OS&os_version=10.15.7&cpu_core_num=2&device_memory=8&platform=PC&downlink=10&effective_type=4g&round_trip_time=50&webid=7496420735217305107&uifid=3d19da18631c82d4defe04a0ec2c3c4673f37bfeb7cebeed4cb99b3c023f108f9c4cf6d8d01f885b842e1a400dc83c981d10278c26ca421a05b462f3ec0bb63c737b6cf25b3b243e8c9dd740d47c1e920ed1c1ff2c8c870febbebfd09d97c75842adf3fae79d70a522ca9dcdc8fb3d9fe460374993cc9ece14989b7a90c894ae7570bb8741911d2d3bf69adc5ba5fc25f9253539c35f519bf92b743d80068ad3&msToken=vqsZJM4ixyZwtjFoj4EbedUFaUN-BuL2s-mlssbCS4f4A8qt3RnT04txHu5o3mn1GOmO7FsFPSsEzy8VOmUdfXRqwFBtgMGXm_V71fcws8dnZQ7e5rbLxGvhjS8d1DFAdyr-9K2amvOW44PdmEQsbIBsIsUovgf5gd5pA81wY0ls&a_bogus=Q6UVktX7E2%2FbFdKGucPne5ZUlSfMNsuyzNiKRqMT9PuKGZtchWN1paeAjowYe-dV2upkwoZ7CdFMGdxc%2FGUhZHnpumpDuKX6DT55I6mL2qqgGlGBEqRmCLzFuw0F8cTLe%2F9GiAgRlssx1DOlnr5BAd5aC5Fo-ORpWNMSdZT99EAgfA8kwo3iOChkxgkS0CxG&verifyFp=verify_m9to01we_bubjojli_Bn25_4FBT_8Vkq_xP7No5tkMI3w&fp=verify_m9to01we_bubjojli_Bn25_4FBT_8Vkq_xP7No5tkMI3w'
# Cookie 字符串 (确保仍然有效)
cookie_string = '__ac_nonce=06808a30000bd1533b501; __ac_signature=_02B4Z6wo00f01s0CFBAAAIDCKgJDBu25t87NIhCAANtVbc; ttwid=1%7CtLCsB59X2g46kv-9OmzjoKymhLSvS4aeKxUO9Mmb0Fs%7C1745396481%7Cc082e1352d88ba6e7ab64920ef3919fa728d80a6404aad08735adfdafac82c5a; UIFID_TEMP=3d19da18631c82d4defe04a0ec2c3c4673f37bfeb7cebeed4cb99b3c023f108f9c4cf6d8d01f885b842e1a400dc83c98d6572229210832de290c96ec4ce32bc43b4813e3ddc7ef317408c97e6e034ed7; x-web-secsdk-uid=41c278bc-6fc9-4909-b7bc-688e67ca5cb4; douyin.com; s_v_web_id=verify_m9to01we_bubjojli_Bn25_4FBT_8Vkq_xP7No5tkMI3w; device_web_cpu_core=2; device_web_memory_size=8; hevc_supported=true; dy_swidth=1440; dy_sheight=900; strategyABtestKey=%221745396485.556%22; volume_info=%7B%22isUserMute%22%3Afalse%2C%22isMute%22%3Afalse%2C%22volume%22%3A0.5%7D; passport_csrf_token=ed5e8d35ac13ae2498bb395387776372; passport_csrf_token_default=ed5e8d35ac13ae2498bb395387776372; xgplayer_user_id=598126046839; FORCE_LOGIN=%7B%22videoConsumedRemainSeconds%22%3A180%7D; biz_trace_id=1cbc5280; fpk1=U2FsdGVkX18+qEuyxIlrvkst7zhrHh8CLJG5XN5uPBArj0DxKG+L01tusLaWxI5qCU38zDXsIpweTtQKHExXnw==; fpk2=1b04588d93b1afac45573a43b9e15594; __security_mc_1_s_sdk_crypt_sdk=1cc71895-4f0a-bafb; bd_ticket_guard_client_web_domain=2; sdk_source_info=7e276470716a68645a606960273f276364697660272927676c715a6d6069756077273f276364697660272927666d776a68605a607d71606b766c6a6b5a7666776c7571273f275e58272927666a6b766a69605a696c6061273f27636469766027292762696a6764695a7364776c6467696076273f275e5827292771273f273532353c3d31333c3630313234272927676c715a75776a716a666a69273f2763646976602778; bit_env=lDLmrc_DyjLeh0CoCX8rVkBNVEnyGb386kITo3OsM-Yy9ubjCm-GIaUi-NjvDNGA_fEr0KHytBQvXtWru2mlCQeIi5L4DiQmVUDkIMw1FPoVrmhyK-ycU1QNl95GaaABFU1UrKMaeK2WxwuqUlCvi2JsXV3Dm7Rn21KR-FPUEi4ofMRcNtUqejwLfrNjY7zJHHZ5ja0OQPnBF15SklHhQa6CvOuF--_bveHYwDRyPNVRaY1DkMaDDvX-hPck3KwVL3EBAtjlcFGcQFWeLvgvm5krZAL7YZrCMLK4j3Vl8OjUpVCdD-ABRH2aT_2wnbazF8J0_dkpLR_dIijcJeCI1_kILO1JsAZ_IL30ww_ZD9avo3LXrGh8XUKMouIMFnysBw_Wlku0NfNk9EPqmqkPek110c8VbfxAORAmQKbad_Vr08LQLJ1P4eqzWkC6JLxn0b1QB-eX24g5YwAYldbMjRAyzg2NZuqclDZUIsX6AzDRrb25XUC5zKLZXHy5bsjzfR1F9kYFYVfbvCM3FBUonO6WxtPd5QTFkFKBxsTiLkCkamiRM-fNVtRohcrWO6WX; gulu_source_res=eyJwX2luIjoiN2VhNzNmZDFiYTQwNGIwYzQ1NzEyMzdlOTRjOGIzNWU3ZTY0NDliYzVlYTczNGU1Y2E4MDU4ZTIxZmExYjc2MCJ9; passport_auth_mix_state=9m6vyymhkwa1h3ojt0zdkjhbzgzh63h8; passport_assist_user=CkDiLNiQ2nBuUqwauAw5ahpfema8DGjE6PmWcJoQNyrC_iJqM1VEI2GlnYwgfeUYWo9oe_l-TmEKmSOjXW4vK5LmGkoKPAAAAAAAAAAAAABO6SLc7fPswMrYPTzIIzsBe7-TaEXg7vyJlaiHBFNXMezn6ou1mxxxLgCMzshgOVTQ9hCYv-8NGImv1lQgASIBA3Y52Ik%3D; n_mh=WxVUnnGzss-L8VcztocA_B9H8caaMvcQRZfc9s5CPTs; sid_guard=5f9f9d5410543dc28656374db9089924%7C1745396505%7C5184000%7CSun%2C+22-Jun-2025+08%3A21%3A45+GMT; uid_tt=8a03611e59a322e033f6d825a5c997bb; uid_tt_ss=8a03611e59a322e033f6d825a5c997bb; sid_tt=5f9f9d5410543dc28656374db9089924; sessionid=5f9f9d5410543dc28656374db9089924; sessionid_ss=5f9f9d5410543dc28656374db9089924; is_staff_user=false; sid_ucp_v1=1.0.0-KDUxYzJmNGVmNzA3YTE1ZDQwZDU3N2NkODU5ODgxNzFlNTEyYzEzOWYKIAjDmYHKw8xWEJnGosAGGO8xIAwwyKmOqgY4B0D0B0gEGgJscSIgNWY5ZjlkNTQxMDU0M2RjMjg2NTYzNzRkYjkwODk5MjQ; ssid_ucp_v1=1.0.0-KDUxYzJmNGVmNzA3YTE1ZDQwZDU3N2NkODU5ODgxNzFlNTEyYzEzOWYKIAjDmYHKw8xWEJnGosAGGO8xIAwwyKmOqgY4B0D0B0gEGgJscSIgNWY5ZjlkNTQxMDU0M2RjMjg2NTYzNzRkYjkwODk5MjQ; login_time=1745396505361; is_dash_user=1; UIFID=3d19da18631c82d4defe04a0ec2c3c4673f37bfeb7cebeed4cb99b3c023f108f9c4cf6d8d01f885b842e1a400dc83c981d10278c26ca421a05b462f3ec0bb63c737b6cf25b3b243e8c9dd740d47c1e920ed1c1ff2c8c870febbebfd09d97c75842adf3fae79d70a522ca9dcdc8fb3d9fe460374993cc9ece14989b7a90c894ae7570bb8741911d2d3bf69adc5ba5fc25f9253539c35f519bf92b743d80068ad3; stream_recommend_feed_params=%22%7B%5C%22cookie_enabled%5C%22%3Atrue%2C%5C%22screen_width%5C%22%3A1440%2C%5C%22screen_height%5C%22%3A900%2C%5C%22browser_online%5C%22%3Atrue%2C%5C%22cpu_core_num%5C%22%3A2%2C%5C%22device_memory%5C%22%3A8%2C%5C%22downlink%5C%22%3A10%2C%5C%22effective_type%5C%22%3A%5C%224g%5C%22%2C%5C%22round_trip_time%5C%22%3A50%7D%22; SelfTabRedDotControl=%5B%5D; FOLLOW_LIVE_POINT_INFO=%22MS4wLjABAAAAWweIz5waTCfF2chzYqlBsCc9gI5Fy4zzc5cDlvk34hY%2F1745424000000%2F0%2F1745396508133%2F0%22; stream_player_status_params=%22%7B%5C%22is_auto_play%5C%22%3A0%2C%5C%22is_full_screen%5C%22%3A0%2C%5C%22is_full_webscreen%5C%22%3A0%2C%5C%22is_mute%5C%22%3A0%2C%5C%22is_speed%5C%22%3A1%2C%5C%22is_visible%5C%22%3A1%7D%22; home_can_add_dy_2_desktop=%221%22; _bd_ticket_crypt_cookie=2307b75d89e851aaddb3a3920ef4a000; __security_mc_1_s_sdk_sign_data_key_web_protect=02559188-44b3-b06f; __security_mc_1_s_sdk_cert_key=488d4411-4ceb-b0eb; __security_server_data_status=1; bd_ticket_guard_client_data=eyJiZC10aWNrZXQtZ3VhcmQtdmVyc2lvbiI6MiwiYmQtdGlja2V0LWd1YXJkLWl0ZXJhdGlvbi12ZXJzaW9uIjoxLCJiZC10aWNrZXQtZ3VhcmQtcmVlLXB1YmxpYy1rZXkiOiJCSHc2Q1FndDBWdDBWNjZnS0VYTkEwbUVyL1h4SXFhd2RCKzJjTTNFUkJSc05yV0JuRkRBUEZ5WlRHN0J2RU1UOHBDRXFwM1pmOW05Q0ZmWnBRdFN0ME09IiwiYmQtdGlja2V0LWd1YXJkLXdlYi12ZXJzaW9uIjoyfQ%3D%3D; IsDouyinActive=true; odin_tt=cb9c58da3d3a7b0ec27a87bbc39be6b18e8272e8241d961e97b852c7aa4e11011b360f134fd37c25518b756fef29e8dd659806bc5e0ddb02bcd67589f50391f5; passport_fe_beating_status=true; publish_badge_show_info=%220%2C0%2C0%2C1745396515314%22; xg_device_score=6.292849610647006'
# Headers (保持不变)
headers = {
'accept': 'application/json, text/plain, */*',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'cookie': cookie_string,
'dnt': '1',
'priority': 'u=1, i',
'referer': 'https://www.douyin.com/?recommend=1',
'sec-ch-ua': '"Microsoft Edge";v="135", "Not-A.Brand";v="8", "Chromium";v="135"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"macOS"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'uifid': '3d19da18631c82d4defe04a0ec2c3c4673f37bfeb7cebeed4cb99b3c023f108f9c4cf6d8d01f885b842e1a400dc83c981d10278c26ca421a05b462f3ec0bb63c737b6cf25b3b243e8c9dd740d47c1e920ed1c1ff2c8c870febbebfd09d97c75842adf3fae79d70a522ca9dcdc8fb3d9fe460374993cc9ece14989b7a90c894ae7570bb8741911d2d3bf69adc5ba5fc25f9253539c35f519bf92b743d80068ad3',
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36 Edg/135.0.0.0'
}
# --- 初始化 ---
current_keywords = set(initial_keywords)
next_cycle_keywords = set()
all_processed_keywords = set()
urllib3.disable_warnings(InsecureRequestWarning)
print(f"开始处理关键词,共计 {num_cycles} 轮循环。")
print(f"结果将追加写入到文件: {output_filename}")
if os.path.exists(output_filename):
print(f"注意:文件 {output_filename} 已存在,将在其后追加内容。")
# --- 主循环 ---
for cycle in range(1, num_cycles + 1):
print(f"n--- 开始第 {cycle}/{num_cycles} 轮 ---")
print(f"本轮需要处理 {len(current_keywords)} 个关键词。")
if not current_keywords:
print("没有新的关键词需要处理,提前结束循环。")
break
keywords_processed_this_cycle = 0
suggestions_added_this_cycle = 0
new_keywords_found_this_cycle = set()
keywords_to_process = current_keywords.copy()
for keyword in keywords_to_process:
if keyword in all_processed_keywords:
continue
print(f" 正在处理关键词: {keyword}")
# --- 重试循环 ---
processed_successfully = False # 标记是否成功处理(包括重试)
for attempt in range(max_retries):
try:
# 1. URL 编码
encoded_keyword = urllib.parse.quote(keyword)
# 2. 构建 URL
current_url = base_url.replace(original_encoded_keyword, encoded_keyword)
# 3. 发送请求
response = requests.get(current_url, headers=headers, verify=False, timeout=request_timeout)
response.raise_for_status() # 检查 HTTP 错误
# 4. 解析 JSON
data = response.json()
# 5. 提取和处理 sug_list
sug_list = data.get('sug_list', [])
processed_count_for_kw = 0
if sug_list:
with open(output_filename, 'a', encoding='utf-8') as f:
for index, item in enumerate(sug_list):
score = start_score - index
if score < 1:
break
if isinstance(item, dict) and 'content' in item:
content = item.get('content', '').strip()
if content:
output_line = f"{content},{score}n"
f.write(output_line)
suggestions_added_this_cycle += 1
processed_count_for_kw += 1
# 添加到下一轮关键词集合 (前提是全局未处理过)
if content not in all_processed_keywords:
next_cycle_keywords.add(content)
new_keywords_found_this_cycle.add(content)
else:
print(f" - 忽略格式不符的项目: {item}")
if processed_count_for_kw > 0:
print(f" 成功处理 {processed_count_for_kw} 条建议。")
else:
# 可能是列表为空,或者内容不符合预期
print(f" 未找到有效建议或建议格式不符。")
else:
print(f" 关键词 '{keyword}' 未返回任何建议 (sug_list 为空或不存在)。")
processed_successfully = True # 标记成功
break # *** 如果成功,跳出重试循环 ***
except requests.exceptions.SSLError as ssl_err:
print(f" ! SSL Error (尝试 {attempt + 1}/{max_retries}): {ssl_err}")
if attempt < max_retries - 1:
print(f" 将在 {retry_delay_time} 秒后重试...")
time.sleep(retry_delay_time)
# 继续下一次重试循环
else:
print(f" ! 达到最大重试次数,放弃关键词 '{keyword}'。")
# 不再重试,循环将自然结束
except (requests.exceptions.RequestException, json.JSONDecodeError) as req_err:
print(f" ! 不可重试的错误: {req_err}")
# 可以在这里添加更详细的错误日志,例如状态码
if 'response' in locals() and hasattr(response, 'status_code'):
print(f" ! 状态码: {response.status_code}")
if response.status_code == 403:
print(" ! 收到 403 Forbidden,Cookie/参数可能失效或触发限制。")
break # *** 不可重试的错误,直接跳出重试循环 ***
except Exception as e:
print(f" ! 处理关键词 '{keyword}' 时发生未知错误: {e}")
break # *** 未知错误也停止重试 ***
# --- 重试循环结束 ---
# 标记该关键词本轮已尝试处理(无论最终成功与否)
all_processed_keywords.add(keyword)
keywords_processed_this_cycle += 1
# 每个关键词处理(或尝试处理)完成后暂停
# print(f" 暂停 {request_sleep_time} 秒...")
time.sleep(request_sleep_time)
# --- 准备下一轮 ---
print(f"n--- 第 {cycle}/{num_cycles} 轮结束 ---")
print(f"本轮实际尝试处理关键词数: {keywords_processed_this_cycle}")
print(f"本轮共写入建议条数: {suggestions_added_this_cycle}")
print(f"本轮发现新关键词数 (待加入下一轮): {len(new_keywords_found_this_cycle)}")
# 计算真正加入下一轮的新词数
new_keywords_for_next_cycle = next_cycle_keywords - all_processed_keywords
print(f"下一轮将处理的新关键词数: {len(new_keywords_for_next_cycle)}")
current_keywords = new_keywords_for_next_cycle
next_cycle_keywords = set() # 清空,为下一轮收集做准备
print(f"n--- 所有 {num_cycles} 轮循环处理完毕 ---")
print(f"总共处理过的独立关键词数量: {len(all_processed_keywords)}")
print(f"请检查最终结果文件: {output_filename}")
评论