admin管理员组文章数量:1650962
项目一:获取酷狗TOP 100
排名
文件&&歌手
时长
效果:
附源码: import time
import json
from bs4 import BeautifulSoup
import requests
class Kugou(object):
def __init__(self):
self.header = {
"User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0'
}
def getInfo(self, url):
html = requests.get(url, headers=self.header)
soup = BeautifulSoup(html.text, 'html.parser')
# print(soup.prettify())
ranks = soup.select('.pc_temp_num')
titles = soup.select('.pc_temp_songlist > ul > li > a') # 层层标签查找
times = soup.select('.pc_temp_time')
for rank, title, songTime in zip(ranks, titles, times):
data = {
# rank 全打印就是带HTML标签的
'rank': rank.get_text().strip(),
'title': title.get_text().split('-')[1].strip(),
'singer': title.get_text().split('-')[0].strip(),
'songTime': songTime.get_text().strip()
}
s = str(data)
print('rank:%2s\t' % data['rank'], 'title:%2s\t' % data['title'], 'singer:%2s\t' %data['singer'], 'songTime:%2s\t' % data['songTime'])
with open('hhh.txt', 'a', encoding='utf8') as f:
f.writelines(s + '\n')
if __name__ == '__main__':
urls = [
'http://www.kugou/yy/rank/home/{}-8888.html'.format(str(i)) for i in range(30)
]
kugou = Kugou()
for url in urls:
kugou.getInfo(url)
time.sleep(1)
部分代码解析--------------------------------------------------------------------
urls = ['http://www.kugou/yy/rank/home/{}-8888.html'.format(str(i)) for i in range(1, 5)]
for i in urls:
print(i)
结果打印:
http://www.kugou/yy/rank/home/1-8888.html
http://www.kugou/yy/rank/home/2-8888.html
http://www.kugou/yy/rank/home/3-8888.html
http://www.kugou/yy/rank/home/4-8888.html
--------------------------------------------------------------------
for rank, title, songTime in zip(ranks, titles, times):
data = {
# rank 全打印就是带HTML标签的
'rank': rank.get_text().strip(),
'title': title.get_text().split('-')[0].strip(),
'singer': title.get_text().split('-')[1].strip(),
'songTime': songTime.get_text()
}
print(data['rank'])
print(data['title'])
print(data['singer'])
print(data['songTime'])
结果打印:
1
飞驰于你
许嵩
4: 04
--------------------------------------------------------------------
for rank, title, songTime in zip(ranks, titles, times):
data = {
# rank 全打印就是带HTML标签的
'rank': rank,
'title': title,
'songTime': songTime
}
print(data['rank'])
print(data['title'])
print(data['songTime'])
结果打印:
1
许嵩 - 飞驰于你
4:04
项目二:搜索曲目获取URL
根据关键字搜索后的结果:
# encoding=utf-8
# Time : 2018/4/27
# Email : z2615@163
# Software: PyCharm
# Language: Python 3
import requests
import json
class KgDownLoader(object):
def __init__(self):
self.search_url = 'http://songsearch.kugou/song_search_v2?callback=jQuery191034642999175022426_1489023388639&keyword={}&page=1&pagesize=30&userid=-1&clientver=&platform=WebFilter&tag=em&filter=2&iscorrection=1&privilege_filter=0&_=1489023388641'
# .format('园游会')
self.play_url = 'http://www.kugou/yy/index.php?r=play/getdata&hash={}'
self.song_info = {
'歌名': None,
'演唱者': None,
'专辑': None,
'filehash': None,
'mp3url': None
}
def get_search_data(self, keys):
search_file = requests.get(self.search_url.format(keys))
search_html = search_file.content.decode().replace(')', '').replace(
'jQuery191034642999175022426_1489023388639(', '')
views = json.loads(search_html)
for view in views['data']['lists']:
song_name = view['SongName'].replace('', '').replace('', '')
album_name = view['AlbumName'].replace('', '').replace('', '')
sing_name = view['SingerName'].replace('', '').replace('', '')
file_hash = view['FileHash']
new_info = {
'歌名': song_name,
'演唱者': sing_name,
'专辑': album_name if album_name else None,
'filehash': file_hash,
'mp3url': None
}
self.song_info.update(new_info)
yield self.song_info
def get_mp3_url(self, filehash):
mp3_file = requests.get(self.play_url.format(filehash)).content.decode()
mp3_json = json.loads(mp3_file)
real_url = mp3_json['data']['play_url']
self.song_info['mp3url'] = real_url
yield self.song_info
def save_mp3(self, song_name, real_url):
with open(song_name + ".mp3", "wb")as fp:
fp.write(requests.get(real_url).content)
if __name__ == '__main__':
kg = KgDownLoader()
mp3_info = kg.get_search_data(input('请输入歌名:'))
for x in mp3_info:
mp3info = kg.get_mp3_url(x['filehash'])
for i in mp3info:
print(i)
项目三:搜索下载歌曲
代码仅供学习参考from selenium import webdriver
from bs4 import BeautifulSoup
import urllib.request
from selenium.webdrivermon.action_chains import ActionChains
input_string = input('>>>please input the search key:')
#input_string="你就不要想起我"
driver = webdriver.Chrome()
driver.get('http://www.kugou/')
a=driver.find_element_by_xpath('/html/body/div[1]/div[1]/div[1]/div[1]/input') #输入搜索内容/html/body/div[1]/div[1]/div[1]/div[1]/input
a.send_keys(input_string)
driver.find_element_by_xpath('/html/body/div[1]/div[1]/div[1]/div[1]/div/i').click() #点击搜索/html/body/div[1]/div[1]/div[1]/div[1]/div/i
for handle in driver.window_handles:#方法二,始终获得当前最后的窗口,所以多要多次使用
driver.switch_to_window(handle)
#result_url = driver.current_url
#driver = webdriver.Firefox()
#driver.maximize_window()
#driver.get(result_url)
#j=driver.find_element_by_xpath('/html/body/div[4]/div[1]/div[2]/ul[2]/li[2]/div[1]/a').get_attribute('title')测试
#print(j)
soup = BeautifulSoup(driver.page_source,'lxml')
PageAll = len(soup.select('ul.list_content.clearfix > li'))
print(PageAll)
for i in range(1,PageAll+1):
j=driver.find_element_by_xpath('/html/body/div[4]/div[1]/div[2]/ul[2]/li[%d]/div[1]/a'%i).get_attribute('title')
print('%d.'%i + j)
choice=input("请输入你要下载的歌曲(输入序号):")
#global mname
#mname=driver.find_element_by_xpath('/html/body/div[4]/div[1]/div[2]/ul[2]/li[%d]/div[1]/a'%choice).get_attribute('title')#歌曲名
a=driver.find_element_by_xpath('/html/body/div[4]/div[1]/div[2]/ul[2]/li[%s]/div[1]/a'%choice)#定位
b=driver.find_element_by_xpath('/html/body/div[4]/div[1]/div[2]/ul[2]/li[%s]/div[1]/a'%choice).get_attribute('title')
actions=ActionChains(driver)#selenium中定义的一个类
actions.move_to_element(a)#将鼠标移动到指定位置
actions.click(a)#点击
actions.perform()
#wait(driver)?
#driver = webdriver.Firefox()
#driver.maximize_window()
#driver.get(result_url)
#windows = driver.window_handles
#driver.switch_to.window(windows[-1])
#handles = driver.window_handles
for handle in driver.window_handles:#方法二,始终获得当前最后的窗口,所以多要多次使用
driver.switch_to_window(handle)
Local=driver.find_element_by_xpath('//*[@id="myAudio"]').get_attribute('src')
print(driver.find_element_by_xpath('//*[@id="myAudio"]').get_attribute('src'))
def cbk(a, b, c):
per = 100.0 * a * b / c
if per > 100:
per = 100
print('%.2f%%' % per)
soup=BeautifulSoup(b)
name=soup.get_text()
path='D:\%s.mp3'%name
urllib.request.urlretrieve(Local, path, cbk)
print('finish downloading %s.mp3' % name + '\n\n')
版权声明:本文标题:python爬取酷狗音乐_Python实例---爬去酷狗音乐 内容由热心网友自发贡献,该文观点仅代表作者本人, 转载请联系作者并注明出处:https://m.elefans.com/dianzi/1729548705a1205959.html, 本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容,一经查实,本站将立刻删除。
发表评论