#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# fetchvc.py fetch resources from verycd
#
# author: observer
# email: jingchaohu@gmail.com
# blog: http://obmem.com
# last edit @ 2010.04.04
import urllib,urllib2
import re
import download
import os,sys
import sqlite3
def ensure_dir(f):
d = os.path.dirname(f)
if not os.path.exists(d):
os.makedirs(d)
def update(num=10,off=1):
# urlbase = 'http://www.verycd.com/sto/~update_all/page'
urlbase = 'http://www.verycd.com/sto/~all/page'
for i in range(off,num+1):
print 'fetching list',i,'...'
url = urlbase+str(i)
res = download.httpfetch(url,needlogin=False)
res2 = re.compile(r'"topic-list"(.*?)"pnav"',re.DOTALL).findall(res)
if res2:
res2 = res2[0]
else:
continue
icons = re.compile(r'/topics/(\d+).*?
',re.DOTALL).findall(res2)
for topic,imgurl in icons:
print 'fetching',topic,imgurl
ico = download.httpfetch(imgurl)
cpath = 'iconcache'+'/'+str(topic)[:2]+'/'+str(topic)[2:4]+'/'+str(topic)+'.jpg'
ensure_dir(cpath)
open(cpath,'wb').write(ico)
def killq(cnt=20):
conn = sqlite3.connect('/var/www/simplecd.old/verycd.sqlite3.db')
conn.text_factory = str
c = conn.cursor()
c.execute('select verycdid from verycd order by updtime desc limit 0,?',(cnt,))
topics = [ x[0] for x in c.fetchall() ]
for topic in topics:
cpath = '/var/www/simplecd.old/iconcache/'+str(topic)[:2]+'/'+str(topic)[2:4]+'/'+str(topic)+'.jpg'
ensure_dir(cpath)
if not (os.path.exists(cpath) and len(open(cpath,'rb').read())>5):
print 'patching',topic
try:
c.execute( 'select content from verycd where verycdid=?',(long(topic),) )
content = c.fetchone()[0]
except Exception as what:
print what
continue
imgurl = re.compile(r'
]*src="(.*?)"',re.DOTALL).findall(content)
imgurl.extend( re.compile(r'
]*src=([^"].*?\.jpg)').findall(content) )
for url in imgurl:
imgurl = url
if "logo" in imgurl:
continue
try:
req = urllib2.Request(imgurl)
req.add_header('Referer',imgurl)
img = urllib2.urlopen(req).read()
import Image,StringIO
im = Image.open( StringIO.StringIO(img) )
im = im.resize( (100,100) )
im = im.convert('RGB')
im.save(cpath)
break
except Exception as what:
print what
else:
print 'ok',topic
if __name__ == '__main__':
#update(off=0,num=500)
import os,sys
if len(sys.argv)>1:
killq(sys.argv[1])
else:
killq(100)