from scrapy.spiders import SitemapSpider
class MySpider(SitemapSpider):
sitemap_urls = ['
http://www.example.com/robots.txt']
sitemap_rules = [
('/shop/', 'parse_shop'),
]
other_urls = ['
http://www.example.com/about']
def start_requests(self):
requests = list(super(MySpider, self).start_requests())
requests += [scrapy.Request(x, self.parse_other) for x in self.other_urls]
return requests
def parse_shop(self, response):
pass # ... scrape shop here ...
def parse_other(self, response):
pass # ... scrape other here ...