admin管理员组文章数量:1650921
1.pom.xml 有些依赖与本项目无关
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache/POM/4.0.0" xmlns:xsi="http://www.w3/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache/POM/4.0.0 http://maven.apache/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.zpc</groupId>
<artifactId>timor</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>jar</packaging>
<name>timor</name>
<description>zpc project for Spring Boot</description>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.0.3.RELEASE</version>
<relativePath/> <!-- lookup parent from repository -->
</parent>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<java.version>1.8</java.version>
</properties>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-thymeleaf</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<!-- 热部署组件 -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-devtools</artifactId>
<optional>true</optional>
<scope>true</scope>
</dependency>
<!-- https://mvnrepository/artifact/org.apache.activemq/activemq-all -->
<dependency>
<groupId>org.apache.activemq</groupId>
<artifactId>activemq-all</artifactId>
<version>5.15.4</version>
</dependency>
<!-- jdbcTemplate -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-jdbc</artifactId>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<scope>runtime</scope>
</dependency>
<!--<dependency>
<groupId>com.mchange</groupId>
<artifactId>c3p0</artifactId>
<version>0.9.5.1</version>
</dependency>-->
<!-- druid数据库连接池 -->
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>druid</artifactId>
<version>1.0.9</version>
</dependency>
<!-- shiro start-->
<dependency>
<groupId>org.apache.shiro</groupId>
<artifactId>shiro-core</artifactId>
<version>1.4.0</version>
</dependency>
<dependency>
<groupId>org.apache.shiro</groupId>
<artifactId>shiro-spring</artifactId>
<version>1.4.0</version>
</dependency>
<!-- shiro end -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-jpa</artifactId>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.16.22</version>
<scope>provided</scope>
</dependency>
<!--redis访问工具-->
<!-- https://mvnrepository/artifact/redis.clients/jedis -->
<dependency>
<groupId>redis.clients</groupId>
<artifactId>jedis</artifactId>
<version>2.9.0</version>
</dependency>
<!-- https://mvnrepository/artifact/org.projectlombok/lombok -->
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.16.22</version>
<scope>provided</scope>
</dependency>
<!--java邮件服务-->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-mail</artifactId>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.8.3</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.5</version>
</dependency>
<!--<!– https://mvnrepository/artifact/net.sf.json-lib/json-lib –>-->
<!--<dependency>-->
<!--<groupId>net.sf.json-lib</groupId>-->
<!--<artifactId>json-lib</artifactId>-->
<!--<version>2.1</version>-->
<!--</dependency>-->
<dependency>
<groupId>net.sf.json-lib</groupId>
<artifactId>json-lib</artifactId>
<version>2.4</version>
<classifier>jdk15</classifier>
</dependency>
<!-- https://mvnrepository/artifact/javax.jms/jms -->
<!-- <dependency>
<groupId>javax.jms</groupId>
<artifactId>jms</artifactId>
<version>1.1</version>
</dependency>-->
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<configuration>
<fork>true</fork><!-- fork:如果没有该配置,这个devtools不会起作用 -->
</configuration>
</plugin>
<!--利用Google开源Java容器化工具Jib构建镜像-->
<plugin>
<groupId>com.google.cloud.tools</groupId>
<artifactId>jib-maven-plugin</artifactId>
<version>0.9.0</version>
<configuration>
<to>
<image>gcr.io/my-project/image-built-with-jib</image>
</to>
</configuration>
</plugin>
</plugins>
</build>
</project>
package reptile;
import net.sf.json.JSONObject;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @Author timor
* @Date 2019/1/9
*/
public class SpiderKugou {
public static String filePath = "G:/music/";
public static String mp3 = "https://wwwapi.kugou/yy/index.php?r=play/getdata&callback=jQuery191027067069941080546_1546235744250&"
+ "hash=HASH&album_id=0&_=TIME";
public static String LINK = "https://www.kugou/yy/rank/home/PAGE-8888.html?from=rank";
//"https://www.kugou/yy/rank/home/PAGE-23784.html?from=rank";
public static void main(String[] args) throws IOException {
for(int i = 1 ; i < 23 ; i++){
String url = LINK.replace("PAGE", i + "");
getTitle(url);
//download("https://www.kugou/song/mfy6je5.html");
}
}
public static String getTitle(String url) throws IOException{
HttpGetConnect connect = new HttpGetConnect();
String content = connect.connect(url, "utf-8");
HtmlManage html = new HtmlManage();
Document doc = html.manage(content);
//pc_temp_songlist为歌曲列表的div的class
Element ele = doc.getElementsByClass("pc_temp_songlist").get(0);
//li 是div中的所有歌曲的列表,通过ele.getElementsByTag("li")可以获取所有的li写的详情
Elements eles = ele.getElementsByTag("li");
for(int i = 0 ; i < eles.size() ; i++){
Element item = eles.get(i);
//歌曲名称以及歌手 陈柯宇 - 生僻字
String title = item.attr("title").trim();
//歌曲播放地址 https://www.kugou/song/jla02da.html
String link = item.getElementsByTag("a").first().attr("href");
download(link,title);
}
return null;
}
public static String download(String url,String name) throws IOException{
String hash = "";
HttpGetConnect connect = new HttpGetConnect();
String content = connect.connect(url, "utf-8");
HtmlManage html = new HtmlManage();
String regEx = "\"hash\":\"[0-9A-Z]+\"";
// 编译正则表达式
Pattern pattern = Patternpile(regEx);
Matcher matcher = pattern.matcher(content);
if (matcher.find()) {
hash = matcher.group();
hash = hash.replace("\"hash\":\"", "");
hash = hash.replace("\"", "");
}
String item = mp3.replace("HASH", hash);
item = item.replace("TIME", System.currentTimeMillis() + "");
System.out.println(item);
String mp = connect.connect(item, "utf-8");
mp = mp.substring(mp.indexOf("(") + 1, mp.length() - 2);
JSONObject json = JSONObject.fromObject(mp);
String playUrl = json.getJSONObject("data").getString("play_url");
System.out.print(playUrl + " == ");
FileDownload down = new FileDownload();
down.download(playUrl, filePath + name + ".mp3");
System.out.println(name + "下载完成");
return playUrl;
}
}
package reptile;
import org.apachemons.logging.Log;
import org.apachemons.logging.LogFactory;
import org.apache.http.HttpEntity;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.BasicHttpClientConnectionManager;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
/**
* @Author timor
* @Date 2019/1/9
*/
public class HttpGetConnect {
/**
* 获取html内容
* @param url
* @param charsetName UTF-8、GB2312
* @return
* @throws IOException
*/
public static String connect(String url,String charsetName) throws IOException{
BasicHttpClientConnectionManager connManager = new BasicHttpClientConnectionManager();
CloseableHttpClient httpclient = HttpClients.custom()
.setConnectionManager(connManager)
.build();
String content = "";
try{
HttpGet httpget = new HttpGet(url);
RequestConfig requestConfig = RequestConfig.custom()
.setSocketTimeout(5000)
.setConnectTimeout(50000)
.setConnectionRequestTimeout(50000)
.build();
httpget.setConfig(requestConfig);
httpget.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
httpget.setHeader("Accept-Encoding", "gzip,deflate,sdch");
httpget.setHeader("Accept-Language", "zh-CN,zh;q=0.8");
httpget.setHeader("Connection", "keep-alive");
httpget.setHeader("Upgrade-Insecure-Requests", "1");
httpget.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36");
//httpget.setHeader("Hosts", "www.oschina");
httpget.setHeader("cache-control", "max-age=0");
CloseableHttpResponse response = httpclient.execute(httpget);
int status = response.getStatusLine().getStatusCode();
if (status >= 200 && status < 300) {
HttpEntity entity = response.getEntity();
InputStream instream = entity.getContent();
BufferedReader br = new BufferedReader(new InputStreamReader(instream,charsetName));
StringBuffer sbf = new StringBuffer();
String line = null;
while ((line = br.readLine()) != null){
sbf.append(line + "");
}
br.close();
content = sbf.toString();
} else {
content = "";
}
}catch(Exception e){
e.printStackTrace();
}finally{
httpclient.close();
}
//log.info("content is " + content);
return content;
}
private static Log log = LogFactory.getLog(HttpGetConnect.class);
}
package reptile;
import org.apachemons.logging.Log;
import org.apachemons.logging.LogFactory;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* @Author timor
* @Date 2019/1/9
*/
public class HtmlManage {
public Document manage(String html){
Document doc = Jsoup.parse(html);
return doc;
}
public Document manageDirect(String url) throws IOException{
Document doc = Jsoup.connect( url ).get();
return doc;
}
public List<String> manageHtmlTag(Document doc,String tag ){
List<String> list = new ArrayList<String>();
Elements elements = doc.getElementsByTag(tag);
for(int i = 0; i < elements.size() ; i++){
String str = elements.get(i).html();
list.add(str);
}
return list;
}
public List<String> manageHtmlClass(Document doc,String clas ){
List<String> list = new ArrayList<String>();
Elements elements = doc.getElementsByClass(clas);
for(int i = 0; i < elements.size() ; i++){
String str = elements.get(i).html();
list.add(str);
}
return list;
}
public List<String> manageHtmlKey(Document doc,String key,String value ){
List<String> list = new ArrayList<String>();
Elements elements = doc.getElementsByAttributeValue(key, value);
for(int i = 0; i < elements.size() ; i++){
String str = elements.get(i).html();
list.add(str);
}
return list;
}
private static Log log = LogFactory.getLog(HtmlManage.class);
}
package reptile;
import java.io.*;
import org.apachemons.logging.Log;
import org.apachemons.logging.LogFactory;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
/**
* @Author timor
* @Date 2019/1/9
*/
public class FileDownload {
/**
* 文件下载
* @param url 链接地址
* @param path 要保存的路径及文件名
* @return
*/
public static boolean download(String url,String path) throws IOException {
new WriteText().writeToText(url);
new WriteText().writeToText(path);
boolean flag = false;
CloseableHttpClient httpclient = HttpClients.createDefault();
RequestConfig requestConfig = RequestConfig.custom().setSocketTimeout(2000)
.setConnectTimeout(2000).build();
HttpGet get = new HttpGet(url);
get.setConfig(requestConfig);
BufferedInputStream in = null;
BufferedOutputStream out = null;
try{
for(int i=0;i<3;i++){
CloseableHttpResponse result = httpclient.execute(get);
System.out.println(result.getStatusLine());
if(result.getStatusLine().getStatusCode() == 200){
in = new BufferedInputStream(result.getEntity().getContent());
File file = new File(path);
out = new BufferedOutputStream(new FileOutputStream(file));
byte[] buffer = new byte[1024];
int len = -1;
while((len = in.read(buffer,0,1024)) > -1){
out.write(buffer,0,len);
}
flag = true;
break;
}else if(result.getStatusLine().getStatusCode() == 500){
continue ;
}
}
}catch(Exception e){
e.printStackTrace();
flag = false;
}finally{
get.releaseConnection();
try{
if(in != null){
in.close();
}
if(out != null){
out.close();
}
}catch(Exception e){
e.printStackTrace();
flag = false;
}
}
return flag;
}
private static Log log = LogFactory.getLog(FileDownload.class);
}
package reptile;
import java.io.*;
/**
* @Author timor
* @Date 2019/1/9
* 写入歌曲信息到本地text G:\music\music_info
*/
public class WriteText {
public void writeToText(String musicInfo) throws IOException {
String path = "G:\\music\\music_info\\music_info.txt";
File file = new File(path);
if(!file.exists()){
file.getParentFile().mkdirs();
}
file.createNewFile();
// write
FileWriter fw = new FileWriter(file, true);
BufferedWriter bw = new BufferedWriter(fw);
//写入到txt并自动换行
bw.write(musicInfo+"\r\n");
bw.flush();
bw.close();
fw.close();
// read
FileReader fr = new FileReader(file);
BufferedReader br = new BufferedReader(fr);
String str = br.readLine();
}
}
package com.zpc.timor;
import reptile.WriteText;
import java.io.IOException;
/**
* 写入txt测试类
*/
public class test {
public static void main(String[] args) throws IOException {
new WriteText().writeToText("12376\r\n");
}
}
版权声明:本文标题:java爬虫 酷狗音乐 内容由热心网友自发贡献,该文观点仅代表作者本人, 转载请联系作者并注明出处:https://m.elefans.com/dongtai/1729549960a1206089.html, 本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容,一经查实,本站将立刻删除。
发表评论