Scrapy: How To Manually Insert A Request From A Spider_idle Event Callback?
I've created a spider, and have linked a method to the spider_idle event. How do I add a request manually? I can't just return the item from parse -- parse is not running in this
Solution 1:
class FooSpider(BaseSpider):
def __init__(self, *args, **kwargs):
super(FooSpider, self).__init__(*args, **kwargs)
dispatcher.connect(self.dont_close_me, signals.spider_idle)
def dont_close_me(self, spider):
if spider != self:
return
self.crawler.engine.crawl(self.create_request(), spider)
raise DontCloseSpider("..I prefer live spiders.")
Update 2016:
class FooSpider(BaseSpider):
yet = False
@classmethod
def from_crawler(cls, crawler, *args, **kwargs):
from_crawler = super(FooSpider, cls).from_crawler
spider = from_crawler(crawler, *args, **kwargs)
crawler.signals.connect(spider.idle, signal=scrapy.signals.spider_idle)
return spider
def idle(self):
if not self.yet:
self.crawler.engine.crawl(self.create_request(), self)
self.yet = True
Post a Comment for "Scrapy: How To Manually Insert A Request From A Spider_idle Event Callback?"