BeautifulSoup 和 urllib2 的簡單練習 : )
然後我發現我還是不會用 Python 的 re .... Orz
#!/usr/bin/python import urllib2 import urlparse import BeautifulSoup import os import time import re def find_tags(url): c = urllib2.urlopen(url).read() soup = BeautifulSoup.BeautifulSoup(c); return soup.findAll('img') def get_src(url, re_): r = re.compile(re_) tags = find_tags(url) for tag in tags: img = tag['src'] if r.search(img): yield img def get_basename(url): return os.path.basename(urlparse.urlsplit(url)[2]) def download(url, re_=''): for src in get_src(url, re_): content = urllib2.urlopen(src).read() print "[+] download: " + src file_ = open(get_basename(src) , 'wb') file_.write(content) file_.close() time.sleep(1) if __name__ == '__main__': download(url='http://this-plt-life.tumblr.com', re_='gif')
No comments:
Post a Comment