class aSpider(scrapy.spiders.Spider): name="itcast" allowd_domains = ["http://www.itcast.cn"] start_urls = ["http://www.itcast.cn/channel/teacher.shtml#ac"] def parse(self,response): for site in response.xpath('//div[@class="li_txt"]'): teacher_name = site.xpath('h3/text()').extract() teacher_level = site.xpath('h4/text()').extract() teacher_info = site.xpath('p/text()').extract() print teacher_name[0] print teacher_level[0] print teacher_info[0] print"============="
1.5 items.py
1 2 3 4 5 6 7 8 9 10 11
import scrapy class aSpiderItem(scrapy.Item): # define the fields for your item here like: # name = scrapy.Field() pass classItcastItem(scrapy.Item): name = scrapy.Field() level = scrapy.Field() info = scrapy.Field()