AR
Size: a a a
AR
MМ
class IpRotationMiddleware:
# Not all methods need to be defined. If a method is not defined,
# scrapy acts as if the spider middleware does not modify the
# passed objects.
def process_spider_input(self, response, spider):
# Called for each response that goes through the spider
# middleware and into the spider.
if f:=response.xpath('body/pre'):
if f.get()[0:20] == '<pre>\nYour IP addres':
logging.warning("IP banned, rotating")
raise IpBannedException
# Should return None or raise an exception.
return None
def process_spider_exception(self, response, exception, spider):
# Called when a spider or process_spider_input() method
# (from other spider middleware) raises an exception.
# Should return either None or an iterable of Request or item objects.
if exception == IpBannedException:
new_request = response.request.replace(dont_filter=True)
yield [new_request]
raise exceptions.IgnoreRequest
SPIDER_MIDDLEWARES = {не понимаю что не так делаю
'clinicalsynopsis.middlewares.Ignore404MiddleWare': 1,
'clinicalsynopsis.middlewares.IpRotationMiddleware': 2,
}
AR
AR
AR
MМ
MМ
MМ
AR
AR
МС
class IpRotationMiddleware:
# Not all methods need to be defined. If a method is not defined,
# scrapy acts as if the spider middleware does not modify the
# passed objects.
def process_spider_input(self, response, spider):
# Called for each response that goes through the spider
# middleware and into the spider.
if f:=response.xpath('body/pre'):
if f.get()[0:20] == '<pre>\nYour IP addres':
logging.warning("IP banned, rotating")
raise IpBannedException
# Should return None or raise an exception.
return None
def process_spider_exception(self, response, exception, spider):
# Called when a spider or process_spider_input() method
# (from other spider middleware) raises an exception.
# Should return either None or an iterable of Request or item objects.
if exception == IpBannedException:
new_request = response.request.replace(dont_filter=True)
yield [new_request]
raise exceptions.IgnoreRequest
SPIDER_MIDDLEWARES = {не понимаю что не так делаю
'clinicalsynopsis.middlewares.Ignore404MiddleWare': 1,
'clinicalsynopsis.middlewares.IpRotationMiddleware': 2,
}
МС
AR
MМ
МС
MМ
МС
МС
MМ
DEBUG: Filtered duplicate request: <GET > - no more duplicates will be shown (see DUPEFILTER_DEBUG to show all duplicates)возможно добавить время через сколько перепосылать запрос?
MМ