admin管理员组

文章数量:1551502

[Python] 纯文本查看 复制代码#!/usr/bin/env python3

# -*- coding: utf-8 -*-

r'''

===========================

* 乐队我做东 m3u8 *

===========================

'''

import os, sys, urllib, requests, re, random

from lxml import etree

from time import sleep

def getWorkdir(subdir):

if os.name == 'nt':

#电脑

workdir = os.path.join('F:\\BaiduNetdiskDownload', subdir)

elif os.name == 'posix':

#手机

workdir = os.path.join('/storage/emulated/0/Download', subdir)

else:

workdir = os.path.join(os.getcwd(), subdir)

if not os.path.exists(workdir):

os.mkdir(workdir)

return workdir

def getM3U8(p_title):

print('\n%s' % p_title)

#解析期页面,得到m3u8链接

try:

p_content = requests.get(host_toc+phase_urls[phase_titles.index(p_title)], headers=header, timeout=30).content.decode('utf-8', errors='ignore')

except:

print(' ... failed opening phase page ... %s' % host_toc+phase_urls[phase_titles.index(p_title)])

return

url_m3u8 = re.findall(r'cms_player = {"yun":true,"url":"(.*?)"', p_content, re.S)[0].replace('\/', '/')

parsed_flag, m3u8_items, host_m3u8 = reParser_m3u8(url_m3u8)

if parsed_flag == 'Y':

getTS(p_title, m3u8_items, host_m3u8)

else:

print(' ... failed parsing m3u8 content ... %s' % url_m3u8)

def reParser_m3u8(url):

try:

m3u8_content = requests.get(url, headers=header, timeout=30).content.decode('utf-8', errors='ignore')

except:

return 'N', [], ''

if re.findall('^(\S+m3u8)$', m3u8_content, re.M):

#如果有嵌套的m3u8

sub_url_m3u8 = os.path.split(url)[0] + '/' + re.findall('^(\S+m3u8)$', m3u8_content, re.M)[0]

return reParser_m3u8(sub_url_m3u8)

else:

#解析并返回播放列表(ts文件序列)

return 'Y', re.findall('^(\S+ts)$', m3u8_content, re.M), os.path.split(url)[0]+'/'

def getTS(p_title, m3u8_items, host_m3u8):

with open(os.path.join(workdir, '%s.txt' % p_title), 'w', encoding='utf-8') as f:

for ts in m3u8_items:

f.write(host_m3u8+ts+'\n')

print(' ... playlist fetched')

#下载ts文件

print(' ... downloading ts ... %d files' % len(m3u8_items))

p_dir = os.path.join(workdir, p_title)

if not os.path.exists(p_dir):

os.mkdir(p_dir)

flag_combine = True

for ts in m3u8_items:

try:

ts_resp = requests.get(host_m3u8+ts).content

except:

print(' ... failed reading ts ... %s' % host_m3u8+ts)

flag_combine = False

else:

try:

with open(os.path.join(p_dir, os.path.split(ts)[1]), 'wb') as f:

f.write(ts_resp)

except:

print(' ... failed saving ts ... %s' % os.path.split(ts)[1])

flag_combine = False

#合并ts文件

if flag_combine:

print(' ... combining ts')

os.chdir(p_dir)

os.system('copy/b *.ts %s.ts' % p_title)

print(' ... done')

else:

print(' ... pls check failed ts & manually combine\n ... done')

print(__doc__)

subdir = 'Band'

workdir = getWorkdir(subdir)

headerpool = ['Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:49.0) Gecko/20100101 Firefox/49.0', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/602.2.14 (KHTML, like Gecko) Version/10.0.1 Safari/602.2.14', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36', 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X 10.5; en-US; rv:1.9.2.15) Gecko/20110303 Firefox/3.6.15', 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36', 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:50.0) Gecko/20100101 Firefox/50.0', 'Mozilla/5.0 (Windows NT 10.0; …) Gecko/20100101 Firefox/61.0', 'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.84 Safari/535.11 SE 2.X MetaSr 1.0', 'Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/2.0.0 Opera 9.50', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.57.2 (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.71 Safari/537.1 LBBROWSER', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36 OPR/26.0.1656.60', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.4.3.4000 Chrome/30.0.1599.101 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:45.0) Gecko/20100101 Firefox/45.0', 'Mozilla/5.0 (Windows NT 6.1; rv:50.0) Gecko/20100101 Firefox/50.0', 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36', 'Mozilla/5.0 (X11; Linux x86_64; rv:50.0) Gecko/20100101 Firefox/50.0', 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; Trident/5.0)', 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; QQBrowser/7.0.3698.400)', 'Mozilla/5.0 (iPad; CPU OS 10_1_1 like Mac OS X) AppleWebKit/602.2.14 (KHTML, like Gecko) Version/10.0 Mobile/14B100 Safari/602.1', 'Opera/8.0 (Windows NT 5.1; U; en)']

header = dict()

header["user-agent"] = random.choice(headerpool)

#目录页

url_toc = 'https://www.116bt/vodshow/4193.html'

host_toc = urllib.parse.urlsplit(url_toc)[0] + '://' + urllib.parse.urlsplit(url_toc)[1]

#解析目录页,得到每期的名字、链接

toc = requests.get(url_toc, headers=header, timeout=30).content.decode('utf-8', errors='ignore')

toc_html = etree.HTML(toc)

phase_urls = toc_html.xpath('//ul[@class="detail-play-list clearfix tab-pane ff-playurl ff-playurl-tab-2 fade"]/li/a/@href')

phase_titles = toc_html.xpath('//ul[@class="detail-play-list clearfix tab-pane ff-playurl ff-playurl-tab-2 fade"]/li/a/@title')

for p_title in phase_titles:

print(p_title)

phase_titles.append('q')

phase_titles.append('a')

#选择要下载的期号

while True:

phase_chosen = ''

while phase_chosen not in phase_titles:

phase_chosen = input('\nWhich one to get? (a)ll? (q)uit? ... ')

if phase_chosen == 'q':

sys.exit()

elif phase_chosen == 'a':

for p_title in phase_titles:

getM3U8(p_title)

sleep(2)

else:

getM3U8(phase_chosen)

sleep(2)

本文标签: 视频下载下载器btPython