Tuesday, March 5, 2013

Python image downloader

昨天下午寫出來的小玩具

BeautifulSoup 和 urllib2 的簡單練習 : )

然後我發現我還是不會用  Python 的 re .... Orz

絕對不是要拿來抓謎物喔


#!/usr/bin/python

import urllib2
import urlparse
import BeautifulSoup
import os
import time
import re
                                                                                
def find_tags(url):
    c = urllib2.urlopen(url).read()
    soup = BeautifulSoup.BeautifulSoup(c);
    return soup.findAll('img')

def get_src(url, re_):
    r = re.compile(re_)
    tags = find_tags(url)
    for tag in tags:
        img = tag['src']
        if r.search(img):
            yield img

def get_basename(url):
    return os.path.basename(urlparse.urlsplit(url)[2])

def download(url, re_=''):
    for src in get_src(url, re_):
        content = urllib2.urlopen(src).read()
        print "[+] download: " + src
        file_ = open(get_basename(src) , 'wb')
        file_.write(content)
        file_.close()
        time.sleep(1)

if __name__ == '__main__':
    download(url='http://this-plt-life.tumblr.com', re_='gif')

No comments:

Post a Comment