mdserver-web/plugins/simdht/workers/index_worker.py

53 lines
1.7 KiB
Python
Raw Normal View History

2018-12-28 04:50:22 -05:00
#!/usr/bin/env python
2018-12-28 05:11:01 -05:00
# coding: utf8
2018-12-28 04:50:22 -05:00
"""
从MySQL数据库中读取未索引的资源更新到Sphinx的实时索引中
xiaoxia@xiaoxia.org
2015.5 created
"""
import time
import MySQLdb as mdb
import MySQLdb.cursors
SRC_HOST = '127.0.0.1'
SRC_USER = 'root'
2018-12-28 05:11:01 -05:00
SRC_PASS = 'root'
2018-12-28 04:50:22 -05:00
DST_HOST = '127.0.0.1'
DST_USER = 'root'
2018-12-28 05:11:01 -05:00
DST_PASS = 'root'
2018-12-28 04:50:22 -05:00
2018-12-28 05:11:01 -05:00
src_conn = mdb.connect(SRC_HOST, SRC_USER, SRC_PASS, 'ssbc',
charset='utf8', cursorclass=MySQLdb.cursors.DictCursor)
2018-12-28 04:50:22 -05:00
src_curr = src_conn.cursor()
src_curr.execute('SET NAMES utf8')
2018-12-28 05:11:01 -05:00
dst_conn = mdb.connect(DST_HOST, DST_USER, DST_PASS,
'rt_main', port=9306, charset='utf8')
2018-12-28 04:50:22 -05:00
dst_curr = dst_conn.cursor()
dst_curr.execute('SET NAMES utf8')
2018-12-28 05:11:01 -05:00
2018-12-28 04:50:22 -05:00
def work():
src_curr.execute('SELECT id, name, CRC32(category) AS category, length, UNIX_TIMESTAMP(create_time) AS create_time, ' +
2018-12-28 05:11:01 -05:00
'UNIX_TIMESTAMP(last_seen) AS last_seen FROM search_hash WHERE tagged=false LIMIT 10000')
2018-12-28 04:50:22 -05:00
total = src_curr.rowcount
print 'fetched', total
for one in src_curr:
ret = dst_curr.execute('insert into rt_main(id,name,category,length,create_time,last_seen) values(%s,%s,%s,%s,%s,%s)',
2018-12-28 05:11:01 -05:00
(one['id'], one['name'], one['category'], one['length'], one['create_time'], one['last_seen']))
2018-12-28 04:50:22 -05:00
if ret:
2018-12-28 05:11:01 -05:00
src_curr.execute(
'UPDATE search_hash SET tagged=True WHERE id=%s', (one['id'],))
2018-12-28 04:50:22 -05:00
print 'Indexed', one['name'].encode('utf8')
print 'Done!'
return total
if __name__ == '__main__':
while True:
if work() == 10000:
print 'Continue...'
continue
print 'Wait 10mins...'
time.sleep(600)