#!/usr/bin/env python # -*- coding: utf-8 -*- import web import re import sqlite3 import time import os,sys import MySQLdb path = os.path.dirname(os.path.realpath(sys.argv[0])) conn = sqlite3.connect(path+'/verycd.sqlite3.db') conn.text_factory = str dbl = sqlite3.connect(path+'/lock.sqlite3.db') dbl.text_factory = str try: c = conn.cursor() c.execute('delete from verycd where updtime<1971') c.commit() c.close() c = dbl.cursor() c.execute('delete from lock where updtime<1971') c.commit() c.close() except: c.close() pass def makerelated_mysql(id,related): c=db.cursor() c.execute('update verycd set related="%s" where verycdid=%s'%(related,str(id))) db.commit() c.close() def makerelated(id,conn,force=False): c = conn.cursor() related = '' try: c.execute('select title,related from verycd where verycdid=%s'%str(id)) (title,rel) = c.fetchone() except: return if rel != '' and rel != None and len(rel)>20 and (not force): print 'no need...' return #split the keywords kw = title kw = kw[:kw.rfind('[')] kw2 = title if kw.find(')'): kw = kw[:kw.find(')')] kw2 = kw.rsplit('(',1)[0] kw = kw[:kw.find('》')] kw = kw.replace('《',' ') kw = kw.replace('》',' ') kw = kw.replace('.', ' ') kw = kw.replace('(',' ') kw = kw.replace(')',' ') kw = kw.replace('-',' ') kw = kw.replace('_',' ') kw = kw.replace('[',' ') kw = kw.replace(']',' ') kw = kw.replace('&',' ') kw = kw.replace(' ',' ') kw = kw.replace(':',' ') kw = kw.replace(',',' ') kw = kw.replace(':',' ') kw = kw.replace('+',' ') print kw kws = kw.split() for i in range(len(kws)): nkw = re.compile(r'[a-zA-Z0-9_]+').search(kws[i]) if nkw and nkw.group() not in kws: kws.append(nkw.group()) zw = re.compile(r'[^a-zA-Z0-9_]+').search(kws[i]) if zw and zw.group() not in kws: kws.append(zw.group()) for x in kws: if len(x)<4: kws.remove(x) elif x.startswith('第'): kws.remove(x) elif x.isdigit(): kws.remove(x) kw = ' or '.join( [ 'title like "%'+x+'%"' for x in kws ] ) if kw: kw += ' or title like "%'+kw2+'%"' else: kw = 'title like "%'+kw2+'%"' try: c.execute('select verycdid,title from verycd where %s order by updtime desc limit 9' % kw ) ids = c.fetchall() if len(ids)<9: need = 9 - len(ids) c.execute('select verycdid,title from verycd where category1 = (select category1 from verycd where verycdid=%d) and category2 = (select category2 from verycd where verycdid=%d ) order by updtime desc limit %d;' % (int(id),int(id),need)) ids.extend(c.fetchall()) for id0 in ids: if int(id0[0])==int(id): ids.remove(id0) related = '\n'.join([ str(x[0])+'`'+str(x[1]) for x in ids ]) print '____making related for id',id,kw,'____' print related c.execute('update verycd set related="%s" where verycdid=%s'%(related,str(id))) except: pass conn.commit() c.close() return related def related(offset=0,limit=30,force=False): c = conn.cursor() c.execute('select verycdid from verycd order by updtime desc limit %d,%d'%(offset,limit)) ids = [ x[0] for x in c.fetchall() ] for i in range(len(ids)): print '::::::::::::::::::: %4.2f%% ::::::::::::::::::::::' % (float(i)/float(len(ids))*100) related = makerelated(ids[i],conn,force) # makerelated_mysql(ids[i],related) # time.sleep(1) return # do random fetch and random relate c.execute('select count(*) from verycd') num = c.fetchone()[0] import time time.sleep(1) from random import randint offset = randint(0,num) limit = 200 c.execute('select verycdid from verycd order by updtime asc limit %d,%d'%(offset,limit)) ids = [ x[0] for x in c.fetchall() ] for i in range(len(ids)): print '::::::::::::::::::: %4.2f%% ::::::::::::::::::::::' % (float(i)/float(len(ids))*100) related = makerelated(ids[i],conn,force=False) if __name__=='__main__': import sys if len(sys.argv)==3: related(offset=int(sys.argv[1]),limit=int(sys.argv[2])) elif (len(sys.argv)==4): related(offset=int(sys.argv[1]),limit=int(sys.argv[2]),force=True) # related(10,20)