scrapy downloading images

之前介绍了自己编写的多进程多线程实现，发现Scrapy这个框架更好，它拥有多线程的速度，并且会提示你有哪些图片没有爬取下来，完成了多少张图片的爬取之类的信息，而且使用起来非常方便，可以直接pip安装。

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import scrapy
# import codecs
import os
from bingproxy import BingProxy

class ImagesSpider(scrapy.Spider):
    name = "images"
    dir_path = "huaban_bingproxy_big_images"
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)
#    allowed_domains = ["tyst.migu.cn"]
    start_urls = []  
    bingProxy = BingProxy()

    def start_requests(self):
        with open('processing_threading_huaban_big_images_all_urls_part3.txt') as url_list:
            for url in url_list:
                url = url.strip()
                #yield scrapy.Request(url = self.bingProxy.get_proxy_url(url), meta = {"origin_rul": url}, callback = self.parse )
                if url != "" and url != None:
                    yield scrapy.Request(url = url, callback=self.parse,method="get")
    #def __init__(self, urlfile=None,*args, **kwargs):
     #   super(MusicSpider, self).__init__(*args, **kwargs)
      #  uf = codecs.open(urlfile, 'r', 'utf-8')
       # urls = [line.strip() for line in uf.readlines()]
        #self.start_urls = urls

    
    def parse(self, response):
        path = "huaban_bingproxy_big_images" +"/"+response.url.split('/')[-1] + ".png"
        # path = path.split('?')[0]
        # self.logger.info('Saving mp3 %s', path)
        with open(path, 'wb') as f:
            f.write(response.body)