#!/usr/bin/env python # -*- coding: utf-8 -*- # # fetchvc.py fetch resources from verycd # # author: observer # email: jingchaohu@gmail.com # blog: http://obmem.com # last edit @ 2010.04.04 import urllib,urllib2 import re import download import os,sys import sqlite3 def ensure_dir(f): d = os.path.dirname(f) if not os.path.exists(d): os.makedirs(d) def update(num=10,off=1): # urlbase = 'http://www.verycd.com/sto/~update_all/page' urlbase = 'http://www.verycd.com/sto/~all/page' for i in range(off,num+1): print 'fetching list',i,'...' url = urlbase+str(i) res = download.httpfetch(url,needlogin=False) res2 = re.compile(r'"topic-list"(.*?)"pnav"',re.DOTALL).findall(res) if res2: res2 = res2[0] else: continue icons = re.compile(r'/topics/(\d+).*?',re.DOTALL).findall(res2) for topic,imgurl in icons: print 'fetching',topic,imgurl ico = download.httpfetch(imgurl) cpath = 'iconcache'+'/'+str(topic)[:2]+'/'+str(topic)[2:4]+'/'+str(topic)+'.jpg' ensure_dir(cpath) open(cpath,'wb').write(ico) def killq(cnt=20): conn = sqlite3.connect('/var/www/simplecd.old/verycd.sqlite3.db') conn.text_factory = str c = conn.cursor() c.execute('select verycdid from verycd order by updtime desc limit 0,?',(cnt,)) topics = [ x[0] for x in c.fetchall() ] for topic in topics: cpath = '/var/www/simplecd.old/iconcache/'+str(topic)[:2]+'/'+str(topic)[2:4]+'/'+str(topic)+'.jpg' ensure_dir(cpath) if not (os.path.exists(cpath) and len(open(cpath,'rb').read())>5): print 'patching',topic try: c.execute( 'select content from verycd where verycdid=?',(long(topic),) ) content = c.fetchone()[0] except Exception as what: print what continue imgurl = re.compile(r']*src="(.*?)"',re.DOTALL).findall(content) imgurl.extend( re.compile(r']*src=([^"].*?\.jpg)').findall(content) ) for url in imgurl: imgurl = url if "logo" in imgurl: continue try: req = urllib2.Request(imgurl) req.add_header('Referer',imgurl) img = urllib2.urlopen(req).read() import Image,StringIO im = Image.open( StringIO.StringIO(img) ) im = im.resize( (100,100) ) im = im.convert('RGB') im.save(cpath) break except Exception as what: print what else: print 'ok',topic if __name__ == '__main__': #update(off=0,num=500) import os,sys if len(sys.argv)>1: killq(sys.argv[1]) else: killq(100)