携程 景点评论逆向

Sherry 发布于 2025-06-16 94 次阅读 文章 最后更新于 2025-06-16 2259 字


🍔环境准备

  • Python3.8
  • Pycharm
  • nodejs

🍕项目实施

抓包定位到评论接口

可以看到载荷里面有两个加密参

尝试搜索可以定位到,我们跟进看看

可以看到断在了这里,上方则是x-traceID参数生成逻辑

e是固定值

成功拿到x-traceID

而我们的_fxpcqlniredt就是刚刚的e

也就是09031085319076230120

🍬验收结果

import requests
import json
import execjs

headers = {
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36",
}
cookies = {
    "UBT_VID": "1742531264871.43d29ndXOj64",
    "GUID": "09031085319076230120",
    "MKT_CKID": "1742531268102.blche.62cr",
    "_RSG": "fnKLVuOXlT1zZ8giHg9VoA",
    "_RDG": "28782f305beb90264c3ce3c613754f46da",
    "_RGUID": "176f858e-1f55-4c69-ac9f-26a46ca888fd",
    "_ga": "GA1.1.1929460418.1749717176",
    "_RF1": "112.3.14.55",
    "MKT_Pagesource": "PC",
    "ibulanguage": "CN",
    "ibulocale": "zh_cn",
    "cookiePricesDisplayed": "CNY",
    "cticket": "ACD9B385D9E27E3C78F2D2F1C58C9DA00548F1A301A9A1E406E266B4F907BC03",
    "login_type": "0",
    "login_uid": "6B37F1BB676AF2AB0E84A7BC0F7B4332",
    "DUID": "u=85CFB2892A20B7BB3A5B209830B9FD04&v=0",
    "IsNonUser": "F",
    "AHeadUserInfo": "VipGrade=0&VipGradeName=%C6%D5%CD%A8%BB%E1%D4%B1&UserName=&NoReadMessageCount=0",
    "_udl": "708D70C2B179E2F91CC5ED1C2CCE362D",
    "intl_ht1": "h4=2_128902177,2_99048095,4_39520069,4_43578053",
    "Hm_lvt_a8d6737197d542432f4ff4abc6e06384": "1749717173,1749775595",
    "Hm_lpvt_a8d6737197d542432f4ff4abc6e06384": "1749775595",
    "HMACCOUNT": "6075CC8DFEE491D7",
    "Session": "smartlinkcode=U130727&smartlinklanguage=zh&SmartLinkKeyWord=&SmartLinkQuary=&SmartLinkHost=",
    "Union": "AllianceID=4902&SID=130727&OUID=&createtime=1749775596&Expires=1750380395942",
    "_ga_5DVRDQD429": "GS2.1.s1749775595$o2$g0$t1749775600$j55$l0$h0",
    "_ga_B77BES1Z8Z": "GS2.1.s1749775595$o2$g0$t1749775600$j55$l0$h0",
    "_ga_9BZF483VNQ": "GS2.1.s1749775595$o2$g0$t1749775600$j55$l0$h0",
    "nfes_isSupportWebP": "1",
    "_bfa": "1.1742531264871.43d29ndXOj64.1.1749775607082.1749775614452.3.4.290510",
    "_jzqco": "%7C%7C%7C%7C%7C1.991455430.1742531268100.1749775610305.1749775616566.1749775610305.1749775616566.0.0.0.13.13"
}
url = "https://m.ctrip.com/restapi/soa2/13444/json/getCommentCollapseList"

with open('./加密参数.js', 'r', encoding='utf-8') as f:
    jscode = f.read()
jsexec = execjs.compile(jscode)
fxpcqlniredt,traceID = jsexec.call('get_can')

params = {
    "_fxpcqlniredt": fxpcqlniredt,
    "x-traceID": traceID
}

for i in range(1,10):
    data = {
        "arg": {
            "channelType": 2,
            "collapseType": 0,
            "commentTagId": 0,
            "pageIndex": i,
            "pageSize": 10,
            "poiId": 75611,
            "sourceType": 1,
            "sortType": 3,
            "starType": 0
        },
        "head": {
            "cid": "09031085319076230120",
            "ctok": "",
            "cver": "1.0",
            "lang": "01",
            "sid": "8888",
            "syscode": "09",
            "auth": "",
            "xsid": "",
            "extension": []
        }
    }
    data = json.dumps(data, separators=(',', ':'))
    response = requests.post(url, headers=headers, cookies=cookies, params=params, data=data)

    print(response.json())

成功拿到评论数据

爬取多页数据,更换pageIndex

🌮景点列表

poiId为景点参数,翻页景点列表抓包请求

参数一样

成功爬取到数据

import requests
import json
import execjs

headers = {
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36",
}
cookies = {
    "UBT_VID": "1742531264871.43d29ndXOj64",
    "GUID": "09031085319076230120",
    "MKT_CKID": "1742531268102.blche.62cr",
    "_RSG": "fnKLVuOXlT1zZ8giHg9VoA",
    "_RDG": "28782f305beb90264c3ce3c613754f46da",
    "_RGUID": "176f858e-1f55-4c69-ac9f-26a46ca888fd",
    "_ga": "GA1.1.1929460418.1749717176",
    "_RF1": "112.3.14.55",
    "MKT_Pagesource": "PC",
    "ibulanguage": "CN",
    "ibulocale": "zh_cn",
    "cookiePricesDisplayed": "CNY",
    "cticket": "ACD9B385D9E27E3C78F2D2F1C58C9DA00548F1A301A9A1E406E266B4F907BC03",
    "login_type": "0",
    "login_uid": "6B37F1BB676AF2AB0E84A7BC0F7B4332",
    "DUID": "u=85CFB2892A20B7BB3A5B209830B9FD04&v=0",
    "IsNonUser": "F",
    "AHeadUserInfo": "VipGrade=0&VipGradeName=%C6%D5%CD%A8%BB%E1%D4%B1&UserName=&NoReadMessageCount=0",
    "_udl": "708D70C2B179E2F91CC5ED1C2CCE362D",
    "intl_ht1": "h4=2_128902177,2_99048095,4_39520069,4_43578053",
    "Hm_lvt_a8d6737197d542432f4ff4abc6e06384": "1749717173,1749775595",
    "HMACCOUNT": "6075CC8DFEE491D7",
    "Session": "smartlinkcode=U130727&smartlinklanguage=zh&SmartLinkKeyWord=&SmartLinkQuary=&SmartLinkHost=",
    "Union": "AllianceID=4902&SID=130727&OUID=&createtime=1749775596&Expires=1750380395942",
    "nfes_isSupportWebP": "1",
    "Hm_lpvt_a8d6737197d542432f4ff4abc6e06384": "1749777710",
    "_ga_9BZF483VNQ": "GS2.1.s1749777537$o3$g1$t1749777802$j60$l0$h0",
    "_ga_5DVRDQD429": "GS2.1.s1749777537$o3$g1$t1749777802$j60$l0$h0",
    "_ga_B77BES1Z8Z": "GS2.1.s1749777537$o3$g1$t1749777802$j60$l0$h0",
    "_bfa": "1.1742531264871.43d29ndXOj64.1.1749778323390.1749778339606.3.35.10650142842",
    "_jzqco": "%7C%7C%7C%7C%7C1.991455430.1742531268100.1749778325960.1749778341775.1749778325960.1749778341775.0.0.0.44.44"
}
url = "https://m.ctrip.com/restapi/soa2/18109/json/getAttractionList"
with open('./加密参数.js', 'r', encoding='utf-8') as f:
    jscode = f.read()
jsexec = execjs.compile(jscode)
fxpcqlniredt,traceID = jsexec.call('get_can')

params = {
    "_fxpcqlniredt": fxpcqlniredt,
    "x-traceID": traceID
}
data = {
    "head": {
        "cid": "09031085319076230120",
        "ctok": "",
        "cver": "1.0",
        "lang": "01",
        "sid": "8888",
        "syscode": "999",
        "auth": "",
        "xsid": "",
        "extension": []
    },
    "scene": "online",
    "districtId": 2,
    "index": 2,
    "sortType": 1,
    "count": 10,
    "filter": {
        "filterItems": []
    },
    "returnModuleType": "product"
}
data = json.dumps(data, separators=(',', ':'))

response = requests.post(url, headers=headers, cookies=cookies, params=params, data=data)

print(response.json())

🌭地区ID

经过多次测试发现districtId为地区ID

fiddler抓包找到districtId

python正则匹配可以成功拿到id和名称

import re
import requests
requests.packages.urllib3.disable_warnings()

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36",
}
cookies = {
    "UBT_VID": "1742531264871.43d29ndXOj64",
    "GUID": "09031085319076230120",
    "MKT_CKID": "1742531268102.blche.62cr",
    "_RSG": "fnKLVuOXlT1zZ8giHg9VoA",
    "_RDG": "28782f305beb90264c3ce3c613754f46da",
    "_RGUID": "176f858e-1f55-4c69-ac9f-26a46ca888fd",
    "_ga": "GA1.1.1929460418.1749717176",
    "_RF1": "112.3.14.55",
    "MKT_Pagesource": "PC",
    "ibulanguage": "CN",
    "ibulocale": "zh_cn",
    "cookiePricesDisplayed": "CNY",
    "cticket": "ACD9B385D9E27E3C78F2D2F1C58C9DA00548F1A301A9A1E406E266B4F907BC03",
    "login_type": "0",
    "login_uid": "6B37F1BB676AF2AB0E84A7BC0F7B4332",
    "DUID": "u=85CFB2892A20B7BB3A5B209830B9FD04&v=0",
    "IsNonUser": "F",
    "AHeadUserInfo": "VipGrade=0&VipGradeName=%%C6%%D5%%CD%%A8%%BB%%E1%%D4%%B1&UserName=&NoReadMessageCount=0",
    "_udl": "708D70C2B179E2F91CC5ED1C2CCE362D",
    "intl_ht1": "h4=2_128902177,2_99048095,4_39520069,4_43578053",
    "Hm_lvt_a8d6737197d542432f4ff4abc6e06384": "1749717173,1749775595",
    "HMACCOUNT": "6075CC8DFEE491D7",
    "Session": "smartlinkcode=U130727&smartlinklanguage=zh&SmartLinkKeyWord=&SmartLinkQuary=&SmartLinkHost=",
    "Union": "AllianceID=4902&SID=130727&OUID=&createtime=1749775596&Expires=1750380395942",
    "nfes_isSupportWebP": "1",
    "Hm_lvt_e4211314613fcf074540918eb10eeecb": "1749775602",
    "Hm_lpvt_a8d6737197d542432f4ff4abc6e06384": "1749777710",
    "_ga_9BZF483VNQ": "GS2.1.s1749777537$o3$g1$t1749777802$j60$l0$h0",
    "_ga_5DVRDQD429": "GS2.1.s1749777537$o3$g1$t1749777802$j60$l0$h0",
    "_ga_B77BES1Z8Z": "GS2.1.s1749777537$o3$g1$t1749777802$j60$l0$h0",
    "_pd": "%%7B%%22_o%%22%%3A26%%2C%%22s%%22%%3A349%%2C%%22_s%%22%%3A4%%7D",
    "Hm_lpvt_e4211314613fcf074540918eb10eeecb": "1749781382",
    "_bfa": "1.1742531264871.43d29ndXOj64.1.1749781375760.1749781383118.4.6.0",
    "_jzqco": "%%7C%%7C%%7C%%7C%%7C1.991455430.1742531268100.1749781378031.1749781385192.1749781378031.1749781385192.0.0.0.61.61"
}
url = "https://you.ctrip.com/place/shanghai2.html"
response = requests.get(url, headers=headers, cookies=cookies, verify=False)

REG = re.compile(r'"districtId":(\d+).*?"name":"([^"]+)"')
for i in (REG.findall(response.text)[:1]):
    name = i[1]
    districtId = i[0]

print(name,districtId)

🍟地区列表

fiddler抓包看到链接生成请求

我们对其发请求,正则匹配成功获取

import requests
import re
requests.packages.urllib3.disable_warnings()

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36",
}
cookies = {
    "UBT_VID": "1742531264871.43d29ndXOj64",
    "GUID": "09031085319076230120",
    "MKT_CKID": "1742531268102.blche.62cr",
    "_RSG": "fnKLVuOXlT1zZ8giHg9VoA",
    "_RDG": "28782f305beb90264c3ce3c613754f46da",
    "_RGUID": "176f858e-1f55-4c69-ac9f-26a46ca888fd",
    "_ga": "GA1.1.1929460418.1749717176",
    "_RF1": "112.3.14.55",
    "MKT_Pagesource": "PC",
    "ibulanguage": "CN",
    "ibulocale": "zh_cn",
    "cookiePricesDisplayed": "CNY",
    "cticket": "ACD9B385D9E27E3C78F2D2F1C58C9DA00548F1A301A9A1E406E266B4F907BC03",
    "login_type": "0",
    "login_uid": "6B37F1BB676AF2AB0E84A7BC0F7B4332",
    "DUID": "u=85CFB2892A20B7BB3A5B209830B9FD04&v=0",
    "IsNonUser": "F",
    "AHeadUserInfo": "VipGrade=0&VipGradeName=%%C6%%D5%%CD%%A8%%BB%%E1%%D4%%B1&UserName=&NoReadMessageCount=0",
    "_udl": "708D70C2B179E2F91CC5ED1C2CCE362D",
    "intl_ht1": "h4=2_128902177,2_99048095,4_39520069,4_43578053",
    "Hm_lvt_a8d6737197d542432f4ff4abc6e06384": "1749717173,1749775595",
    "HMACCOUNT": "6075CC8DFEE491D7",
    "Session": "smartlinkcode=U130727&smartlinklanguage=zh&SmartLinkKeyWord=&SmartLinkQuary=&SmartLinkHost=",
    "Union": "AllianceID=4902&SID=130727&OUID=&createtime=1749775596&Expires=1750380395942",
    "nfes_isSupportWebP": "1",
    "Hm_lvt_e4211314613fcf074540918eb10eeecb": "1749775602",
    "Hm_lpvt_a8d6737197d542432f4ff4abc6e06384": "1749777710",
    "_ga_9BZF483VNQ": "GS2.1.s1749777537$o3$g1$t1749777802$j60$l0$h0",
    "_ga_5DVRDQD429": "GS2.1.s1749777537$o3$g1$t1749777802$j60$l0$h0",
    "_ga_B77BES1Z8Z": "GS2.1.s1749777537$o3$g1$t1749777802$j60$l0$h0",
    "Hm_lpvt_e4211314613fcf074540918eb10eeecb": "1749782924",
    "_bfa": "1.1742531264871.43d29ndXOj64.1.1749782466948.1749782924562.4.13.0",
    "_jzqco": "%%7C%%7C%%7C%%7C%%7C1.991455430.1742531268100.1749782468657.1749782926968.1749782468657.1749782926968.0.0.0.66.66",
    "_pd": "%%7B%%22_o%%22%%3A50%%2C%%22s%%22%%3A241%%2C%%22_s%%22%%3A4%%7D"
}
url = "https://you.ctrip.com/"

response = requests.get(url, headers=headers, cookies=cookies, verify=False, timeout=10)
# 正则表达式提取地区链接
pattern = r'<a href="(https://you\.ctrip\.com/place/[^"]+)" target="_blank">([^<]+)</a>'
matches = re.findall(pattern, response.text)

print(f"\n找到 {len(matches)} 个地区链接:")
print("-" * 60)

# 去重并排序
unique_places = {}
for url_link, place_name in matches:
    if place_name not in unique_places:
        unique_places[place_name] = url_link

# 按地区名称排序输出
for place_name in sorted(unique_places.keys()):
    url_link = unique_places[place_name]
    print(f"地区: {place_name:<15} 链接: {url_link}")

🥞完结

整体逆向难度很基础,主要还是抓包的过程逻辑要清楚,完结撒花🎆