admin管理员组

文章数量:1543694

第一步我们先导入需要用到的库

from requests_html import HTMLSession
import pymysql
import json

然后打开网站,点击查看更多

 

 点击下一页,发现网页地址栏没有变化,由此可知这是一个ajax请求,打开f12调试工具,点击Fetch/XHR然后刷新网页,点击response响应,查看信息,我们需要的数据就在这个响应中。

 点击下一页,查看payload,发现页数变化,current也会变化,所以确定页数和current有直接关系。

 点击Headers,复制requeste  url,这个就是目标网址。接下来上代码

from requests_html import HTMLSession
import pymysql
import json


class Myspider():
    def __init__(self, i):
        self.url = 'http://www.xinfadi/getPriceData.html'
        self.session = HTMLSession()
        self.data = {'limit': 20,
                     'current': i,
                     'pubDateStartTime': '',
                     'pubDateEndTime': '',
                     'prodPcatid': '',
                     'prodCatid': '',
                     'prodName': ''}
        host = "localhost"#ip地址
        port = 3306 #端口号
        db = ''#数据库名
        user = ''#数据库用户
        password = ""#数据库密码

        self.conn = pymysql.connect(host=host, port=port, database=db, user=user, password=password, charset='utf8')
        self.cursor = self.conn.cursor()

    def parse(self):
        response = self.session.post(url=self.url, data=self.data)
        response.encoding = 'utf-8'
        dict1 = json.loads(response.text)
        for dict2 in dict1['list']:
            proname = dict2['prodName']
            prodcat = dict2['prodCat']
            lowprice = dict2['lowPrice']
            higeprice = dict2['highPrice']
            avgeprice = dict2['avgPrice']
            place = dict2['place']
            unitinfo = dict2['unitInfo']
            pubdate = dict2['pubDate']
            data = (proname, prodcat, lowprice, higeprice, avgeprice, place, unitinfo, pubdate)
            # print(data[0])
            self.saveMySql(data)

    def saveMySql(self, data):
        self.cursor.execute(
            '''INSERT INTO #此处是表名(proname ,prodcat,lowprice,higeprice,avgeprice,place,unitinfo,pubdate)  VALUES ('{}', '{}', '{}', '{}', '{}', '{}','{}','{}');'''.format(
                data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7]))

        self.connmit()

    def run(self):
        self.parse()
        self.cursor.close()
        self.conn.close()#关闭数据库连接


if __name__ == '__main__':
    for i in range(2, 100):
        spider = Myspider(i)
        spider.run()

存入MySQL数据库中的结果

本文标签: 菜价爬爬Python