admin管理员组文章数量:1530845
腾讯视频弹幕地址:http://mfm.video.qq/danmu?timestamp=0&target_id=xxxxx
1.在视频地址中获取target_id
2.通过视频cid和vid获取target_id
3.cid和vid在视频链接网页中可以抓取
4.在电视剧任意一集页面中可以抓取每一集vid,cid一样。
5.弹幕的timestamp参数每隔30s变化一次
代码
import requests
import json
import random
import os
from lxml import etree
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
headers = {
'USER-AGENT': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
}
def get_str_btw(s, f, b):
par = s.partition(f)
return (par[2].partition(b))[0][:]
def filename_find(filepath, return_type=0):
basename = os.path.basename(filepath)
extension = f'.{basename.split(".")[-1]}'
extension_lang = f'.{basename.split(".")[-2]}.{basename.split(".")[-1]}'
if not basename.__contains__('.'):
extension = ''
extension_lang = ''
filename_without_extension = basename[0:len(basename)-len(extension)]
if return_type == 0: # 文件名
return basename
if return_type == 1: # 后缀名
return extension
if return_type == 2: # 无后缀文件名
return filename_without_extension
if return_type == 3: # 后缀名加字幕语言名
return extension_lang
def get_targetid(cid,vid): # 传入豆瓣api获得的腾讯视频uri
base_url = 'https://access.video.qq/danmu_manage/regist?vappid=97767206&vsecret=c0bdcbae120669fff425d0ef853674614aa659c605a613a4'
# print(get_targetid(url))
# 构造请求头部
headers = {
'USER-AGENT':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
}
data = {"wRegistType":2,"vecIdList":[vid],"wSpeSource":0,"bIsGetUserCfg":1,"mapExtData":{vid:{"strCid":cid,"strLid":""}}}
# 发送请求,获得响应
response = requests.post(url=base_url, headers=headers, data=json.dumps(data))
# 获得网页源代码
html = response.text
targetid = get_str_btw(html, 'targetid=', '&')
# 返回targetid
return targetid
def seconds_to_time(seconds):
mm = int(seconds * 100 % 100)
m, s = divmod(seconds, 60)
h, m = divmod(m, 60)
return ("%d:%02d:%02d.%02d" % (h, m, s, mm))
def sort_comment(timestamp,res):
comments = []
con = sorted(res["comments"], key=lambda keys: keys['timepoint'])
for i in range(timestamp-15, timestamp+15):
com = []
for j in con:
if j['timepoint'] == i:
com.append(j)
comments.append(com)
return comments
def down_qq_danmu(targetid, danmu_name):
url = 'https://mfm.video.qq/danmu?target_id=' + targetid
try:
os.remove(danmu_name)
except:
pass
with open(danmu_name, "a", encoding='utf-8') as file:
file.write('''[Script Info]
Title: bilibili ASS 弹幕在线转换
Original Script: 根据 251884753.xml 的弹幕信息,由 https://github/tiansh/us-danmaku 生成
ScriptType: v4.00+
Collisions: Normal
PlayResX: 560
PlayResY: 420
Timer: 10.0000
[V4+ Styles]
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
Style: Fix,Microsoft YaHei UI,20,&H66FFFFFF,&H66FFFFFF,&H66000000,&H66000000,1,0,0,0,100,100,0,0,1,2,0,2,20,20,2,0
Style: R2L,Microsoft YaHei UI,20,&H66FFFFFF,&H66FFFFFF,&H66000000,&H66000000,1,0,0,0,100,100,0,0,1,2,0,2,20,20,2,0
[Events]
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
''')
n = 0
for i in range(15,100000,30):
try:
data = {
"timestamp":i
}
#print(i)
response = requests.get(url,headers=headers,params=data,verify=False)
res = json.loads(response.content) # 字符串转化为列表格式
if res['count'] != 0: #判断弹幕数量,确实是否爬取结束
comments = sort_comment(i,res)
for content in comments:
try:
ram = random.randint(0, len(content) - 1) # 随机数
content_choose = content[ram]['content'] # 爱奇艺弹幕中每秒有多条 随机从中选取一条弹幕作为展示
seconds = content[ram]['timepoint']
line = [20, 40, 60, 80]
style = '\move(%s,%s,%s,%s)' % (random.randint(560, 760), line[n], -random.randint(10, 100), line[n])
# file.write('Dialogue: 0,%(start)s,%(end)s,%(styleid)s,,20,20,2,,{%(styles)s}%(text)s\n' % {'start': seconds_to_time(float(seconds)+ random.random()), 'end': seconds_to_time(float(seconds)+ random.random() + 8), 'styles': styles[i], 'text': item.getElementsByTagName('content')[0].firstChild.data, 'styleid': 'Fix'})
file.write('Dialogue: 0,%(start)s,%(end)s,%(styleid)s,,20,20,2,,{%(styles)s}%(text)s\n' % {
'start': seconds_to_time(float(seconds) + random.random()),
'end': seconds_to_time(float(seconds) + random.random() + 8), 'styles': style,
'text': content_choose,
'styleid': 'Fix'})
n = n + 1
if n > 3:
n = 0
except:
pass
else:
print('爬完了')
break
except:
pass
def qqvideo_bullet(file_path, cid, vid):
targetid = get_targetid(cid, vid)
danmu_name = os.path.dirname(file_path) + "/" + filename_find(file_path, 2) + ".Danish.ass"
down_qq_danmu(targetid, danmu_name)
return danmu_name
def get_vid_list(url): #根据任意一集的url获取电视剧所有集的vid
res = requests.get(url).text
html_elem = etree.HTML(res)
info = html_elem.xpath('//html/head/script[@r-notemplate="true"][@type="text/javascript"]/text()')[0]
# print(info)
vid_info = json.loads(get_str_btw(info, '"vip_ids":', ',"comment_show_type"'))
vid_list = []
for vid in vid_info:
vid_list.append(vid['V'])
return vid_list
if __name__ == "__main__":
uri = "tenvideo2://?action=1&cover_id=ccd36tevohjt9ze&video_id=u0035tjlfmu&from=douban.doc_m" # 豆瓣返回腾讯视频uri
cid = get_str_btw(uri, 'cover_id=', '&')
vid = get_str_btw(uri, 'video_id=', '&')
file_path = r'I:\python练习\爱奇艺弹幕下载转换\[超体]Lucy.2014.BluRay.720p.x264.AC3-BtsHD.mkv'
#qqvideo_bullet(file_path, cid, vid)
url = "https://v.qq/x/cover/mzc00200js3mdvw/q00354i139r.html"
vid_list = get_vid_list(url)
版权声明:本文标题:python 爬取腾讯视频弹幕 内容由热心网友自发贡献,该文观点仅代表作者本人, 转载请联系作者并注明出处:https://m.elefans.com/xitong/1726539465a1074573.html, 本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容,一经查实,本站将立刻删除。
发表评论