网站广告位制作_黑河网站制作_深圳南山做网站_富阳网站
import timeclass MainSpider(scrapy.Spider):name = "main"start_urls = ["https://www.gushiwen.cn/default_1.aspx"]page_url = "https://www.gushiwen.cn/default_%d.aspx"page_num = 2def parse(self, response):title_list = response.xpath('//div[@class="sons"]/div[@class="cont"]/div[2]/p/a/b/text()').extract()url_list = response.xpath('//div[@class="sons"]/div[@class="cont"]/div[2]/p/a/@href').extract()print(title_list)print(url_list)for title, detail_url in zip(title_list, url_list):print(title, detail_url)item = BosscrawlingItem()item['title'] = titleyield scrapy.Request(url=detail_url, callback=self.parse_detail, meta={'item': item})if self.page_num <= 3:full_page_url = format(self.page_url % self.page_num)self.page_num += 1yield scrapy.Request(url=full_page_url, callback=self.parse)def parse_detail(self, response):item = response.meta['item']detail_text = response.xpath('//div[@class="contyishang"]//text()').extract()item['detail_text'] = detail_textprint(detail_text)yield item">