mdserver-web/plugins/sphinx/class/sphinx_make.py

491 lines
13 KiB
Python
Raw Permalink Normal View History

2024-05-19 02:58:14 -04:00
# coding:utf-8
import sys
import io
import os
import time
import subprocess
import re
import json
2025-07-21 07:18:03 -04:00
web_dir = os.getcwd() + "/web"
if os.path.exists(web_dir):
sys.path.append(web_dir)
os.chdir(web_dir)
import core.mw as mw
2024-05-19 02:58:14 -04:00
def getServerDir():
return mw.getServerDir() + '/mysql'
def getPluginDir():
return mw.getPluginDir() + '/mysql'
def getConf():
path = getServerDir() + '/etc/my.cnf'
return path
def getDbPort():
file = getConf()
content = mw.readFile(file)
2025-07-21 07:18:03 -04:00
rep = r'port\s*=\s*(.*)'
2024-05-19 02:58:14 -04:00
tmp = re.search(rep, content)
return tmp.groups()[0].strip()
def getSocketFile():
file = getConf()
content = mw.readFile(file)
2025-07-21 07:18:03 -04:00
rep = r'socket\s*=\s*(.*)'
2024-05-19 02:58:14 -04:00
tmp = re.search(rep, content)
return tmp.groups()[0].strip()
def pSqliteDb(dbname='databases'):
file = getServerDir() + '/mysql.db'
name = 'mysql'
conn = mw.M(dbname).dbPos(getServerDir(), name)
return conn
def pMysqlDb():
# pymysql
db = mw.getMyORM()
db.setPort(getDbPort())
db.setSocket(getSocketFile())
# db.setCharset("utf8")
db.setPwd(pSqliteDb('config').where('id=?', (1,)).getField('mysql_root'))
return db
2024-05-20 02:11:27 -04:00
class sphinxMake():
pdb = None
psdb = None
2024-05-20 08:14:48 -04:00
pkey_name_cache = {}
delta = 'sph_counter'
2024-05-21 07:43:25 -04:00
ver = ''
2024-05-20 08:14:48 -04:00
2024-05-20 02:11:27 -04:00
def __init__(self):
self.pdb = pMysqlDb()
2024-05-20 08:14:48 -04:00
def setDeltaName(self, name):
self.delta = name
return True
2024-05-21 07:43:25 -04:00
def setVersion(self, ver):
self.ver = ver
2024-05-20 11:18:48 -04:00
def createSql(self, db):
2024-05-20 08:14:48 -04:00
conf = '''
2024-05-20 11:18:48 -04:00
CREATE TABLE IF NOT EXISTS `{$DB_NAME}`.`{$TABLE_NAME}` (
2024-05-21 11:21:20 -04:00
`id` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
`table` varchar(200) NOT NULL,
`max_id` bigint(20) unsigned NOT NULL DEFAULT '0',
PRIMARY KEY (`id`),
UNIQUE KEY `table_uniq` (`table`),
KEY `table` (`table`)
) ENGINE=InnoDB AUTO_INCREMENT=1 CHARSET=utf8mb4;
2024-05-20 08:14:48 -04:00
'''
2024-05-20 11:18:48 -04:00
conf = conf.replace("{$TABLE_NAME}", self.delta)
conf = conf.replace("{$DB_NAME}", db)
2024-05-20 08:14:48 -04:00
return conf
2024-05-21 07:55:36 -04:00
2024-05-21 07:43:25 -04:00
def eqVerField(self, field):
ver = self.ver.replace(".1",'')
2024-05-21 07:55:36 -04:00
if float(ver) >= 3.6:
if field == 'sql_attr_timestamp':
return 'attr_bigint'
if field == 'sql_attr_bigint':
return 'attr_bigint'
2024-05-21 07:59:23 -04:00
if field == 'sql_attr_float':
return 'attr_float'
2024-05-21 07:55:36 -04:00
if field == 'sql_field_string':
return 'field_string'
2024-05-21 07:43:25 -04:00
if float(ver) >= 3.3:
if field == 'sql_attr_timestamp':
return 'sql_attr_bigint'
return field
2024-05-20 02:11:27 -04:00
2024-05-21 07:55:36 -04:00
def pathVerName(self):
ver = self.ver.replace(".1",'')
2024-05-21 08:42:38 -04:00
# if float(ver) >= 3.6:
# return 'datadir'
2024-05-21 07:55:36 -04:00
return 'path'
2024-05-20 08:14:48 -04:00
def getTablePk(self, db, table):
key = db+'_'+table
if key in self.pkey_name_cache:
return self.pkey_name_cache[key]
2024-05-20 02:11:27 -04:00
# SHOW INDEX FROM bbs.bbs_ucenter_vars WHERE Key_name = 'PRIMARY'
pkey_sql = "SHOW INDEX FROM {}.{} WHERE Key_name = 'PRIMARY';".format(db,table,);
pkey_data = self.pdb.query(pkey_sql)
# print(db, table)
# print(pkey_data)
2024-05-20 08:14:48 -04:00
key = ''
2024-05-20 02:11:27 -04:00
if len(pkey_data) == 1:
pkey_name = pkey_data[0]['Column_name']
sql = "select COLUMN_NAME,DATA_TYPE from information_schema.COLUMNS where `TABLE_SCHEMA`='{}' and `TABLE_NAME` = '{}' and `COLUMN_NAME`='{}';"
sql = sql.format(db,table,pkey_name,)
# print(sql)
fields = self.pdb.query(sql)
if len(fields) == 1:
# print(fields[0]['DATA_TYPE'])
if mw.inArray(['bigint','smallint','tinyint','int','mediumint'], fields[0]['DATA_TYPE']):
2024-05-20 08:14:48 -04:00
key = pkey_name
return key
2024-05-20 02:11:27 -04:00
def getTableFieldStr(self, db, table):
sql = "select COLUMN_NAME,DATA_TYPE from information_schema.COLUMNS where `TABLE_SCHEMA`='{}' and `TABLE_NAME` = '{}';"
sql = sql.format(db,table,)
fields = self.pdb.query(sql)
field_str = ''
for x in range(len(fields)):
field_str += '`'+fields[x]['COLUMN_NAME']+'`,'
field_str = field_str.strip(',')
return field_str
def makeSphinxHeader(self):
conf = '''
2024-05-19 04:24:33 -04:00
indexer
{
mem_limit = 128M
}
2024-05-19 06:10:54 -04:00
searchd
2024-05-19 04:24:33 -04:00
{
listen = 9312
listen = 9306:mysql41
log = {$server_dir}/sphinx/index/searchd.log
query_log = {$server_dir}/sphinx/index/query.log
read_timeout = 5
max_children = 0
pid_file = {$server_dir}/sphinx/index/searchd.pid
seamless_rotate = 1
preopen_indexes = 1
unlink_old = 1
#workers = threads # for RT to work
binlog_path = {$server_dir}/sphinx/index/binlog
}
2024-05-20 02:11:27 -04:00
'''
conf = conf.replace("{$server_dir}", mw.getServerDir())
return conf
2024-05-20 08:14:48 -04:00
def makeSphinxDbSourceRangeSql(self, db, table):
pkey_name = self.getTablePk(db,table)
2024-05-20 02:11:27 -04:00
sql = "SELECT min("+pkey_name+"), max("+pkey_name+") FROM "+table
return sql
2024-05-20 08:14:48 -04:00
def makeSphinxDbSourceQuerySql(self, db, table):
pkey_name = self.getTablePk(db,table)
2024-05-20 02:11:27 -04:00
field_str = self.getTableFieldStr(db,table)
# print(field_str)
if pkey_name == 'id':
sql = "SELECT " + field_str + " FROM " + table + " where id >= $start AND id <= $end"
else:
sql = "SELECT `"+pkey_name+'` as `id`,' + field_str + " FROM " + table + " where "+pkey_name+" >= $start AND "+pkey_name+" <= $end"
return sql
2024-05-20 08:14:48 -04:00
def makeSphinxDbSourceDeltaRange(self, db, table):
pkey_name = self.getTablePk(db,table)
conf = "SELECT (SELECT max_id FROM `{$SPH_TABLE}` where `table`='{$TABLE_NAME}') as min, (SELECT max({$PK_NAME}) FROM {$TABLE_NAME}) as max"
conf = conf.replace("{$DB_NAME}", db)
conf = conf.replace("{$TABLE_NAME}", table)
conf = conf.replace("{$SPH_TABLE}", self.delta)
conf = conf.replace("{$PK_NAME}", pkey_name)
return conf
2024-05-21 11:39:04 -04:00
def makeSphinxDbSourcePost(self, db, table):
2024-05-20 08:14:48 -04:00
pkey_name = self.getTablePk(db,table)
2024-05-21 11:39:04 -04:00
conf = "sql_query_post = UPDATE {$SPH_TABLE} SET max_id=(SELECT MAX({$PK_NAME}) FROM {$TABLE_NAME}) where `table`='{$TABLE_NAME}'"
2024-05-21 11:23:33 -04:00
# conf = "REPLACE INTO {$SPH_TABLE} (`table`,`max_id`) VALUES ('{$TABLE_NAME}',(SELECT MAX({$PK_NAME}) FROM {$TABLE_NAME}))"
2024-05-20 08:14:48 -04:00
conf = conf.replace("{$DB_NAME}", db)
conf = conf.replace("{$TABLE_NAME}", table)
conf = conf.replace("{$SPH_TABLE}", self.delta)
conf = conf.replace("{$PK_NAME}", pkey_name)
return conf
def makeSphinxDbSourceDelta(self, db, table):
conf = '''
source {$DB_NAME}_{$TABLE_NAME}_delta:{$DB_NAME}_{$TABLE_NAME}
{
sql_query_pre = SET NAMES utf8
sql_query_range = {$DELTA_RANGE}
sql_query = {$DELTA_QUERY}
2024-05-21 11:39:04 -04:00
{$DELTA_UPDATE}
2024-05-20 11:46:23 -04:00
{$SPH_FIELD}
2024-05-20 08:14:48 -04:00
}
index {$DB_NAME}_{$TABLE_NAME}_delta:{$DB_NAME}_{$TABLE_NAME}
{
source = {$DB_NAME}_{$TABLE_NAME}_delta
2024-05-21 07:55:36 -04:00
{$PATH_NAME} = {$server_dir}/sphinx/index/db/{$DB_NAME}.{$TABLE_NAME}/delta
2024-05-20 08:14:48 -04:00
html_strip = 1
ngram_len = 1
ngram_chars = U+3000..U+2FA1F
2024-05-21 08:27:55 -04:00
{$SPH_FIELD_INDEX}
2024-05-20 08:14:48 -04:00
}
''';
conf = conf.replace("{$server_dir}", mw.getServerDir())
2024-05-21 07:55:36 -04:00
conf = conf.replace("{$PATH_NAME}", self.pathVerName())
2024-05-20 08:14:48 -04:00
conf = conf.replace("{$DB_NAME}", db)
conf = conf.replace("{$TABLE_NAME}", table)
delta_range = self.makeSphinxDbSourceDeltaRange(db, table)
conf = conf.replace("{$DELTA_RANGE}", delta_range)
delta_query = self.makeSphinxDbSourceQuerySql(db, table)
conf = conf.replace("{$DELTA_QUERY}", delta_query)
2024-05-21 11:39:04 -04:00
delta_update = self.makeSphinxDbSourcePost(db, table)
2024-05-20 08:14:48 -04:00
conf = conf.replace("{$DELTA_UPDATE}", delta_update)
2024-05-20 11:46:23 -04:00
2024-05-21 08:27:55 -04:00
2024-05-20 11:46:23 -04:00
sph_field = self.makeSqlToSphinxTable(db, table)
2024-05-21 08:56:38 -04:00
conf = self.makeSphinxDbFieldRepalce(conf, sph_field)
2024-05-20 08:14:48 -04:00
return conf;
2024-05-21 05:12:39 -04:00
def makeSphinxDbSource(self, db, table, create_sphinx_table = False):
2024-05-20 02:11:27 -04:00
db_info = pSqliteDb('databases').field('username,password').where('name=?', (db,)).find()
port = getDbPort()
conf = '''
2024-05-19 04:24:33 -04:00
source {$DB_NAME}_{$TABLE_NAME}
{
type = mysql
sql_host = 127.0.0.1
sql_user = {$DB_USER}
sql_pass = {$DB_PASS}
sql_db = {$DB_NAME}
sql_port = {$DB_PORT}
2024-05-21 08:35:50 -04:00
sql_query_pre = SET NAMES utf8
2024-05-21 11:39:19 -04:00
2024-05-21 11:39:04 -04:00
{$UPDATE}
2024-05-21 08:40:15 -04:00
2024-05-19 04:24:33 -04:00
sql_query_range = {$DB_RANGE_SQL}
sql_range_step = 1000
sql_query = {$DB_QUERY_SQL}
{$SPH_FIELD}
}
index {$DB_NAME}_{$TABLE_NAME}
{
source = {$DB_NAME}_{$TABLE_NAME}
2024-05-21 07:55:36 -04:00
{$PATH_NAME} = {$server_dir}/sphinx/index/db/{$DB_NAME}.{$TABLE_NAME}/index
2024-05-19 04:24:33 -04:00
ngram_len = 1
ngram_chars = U+3000..U+2FA1F
2024-05-21 08:27:55 -04:00
{$SPH_FIELD_INDEX}
2024-05-19 04:24:33 -04:00
}
2024-05-20 02:11:27 -04:00
'''
conf = conf.replace("{$server_dir}", mw.getServerDir())
2024-05-21 07:55:36 -04:00
conf = conf.replace("{$PATH_NAME}", self.pathVerName())
2024-05-21 07:58:43 -04:00
2024-05-20 02:11:27 -04:00
conf = conf.replace("{$DB_NAME}", db)
conf = conf.replace("{$TABLE_NAME}", table)
conf = conf.replace("{$DB_USER}", db_info['username'])
conf = conf.replace("{$DB_PASS}", db_info['password'])
conf = conf.replace("{$DB_PORT}", port)
2024-05-20 08:14:48 -04:00
range_sql = self.makeSphinxDbSourceRangeSql(db, table)
2024-05-20 02:11:27 -04:00
conf = conf.replace("{$DB_RANGE_SQL}", range_sql)
2024-05-19 04:24:33 -04:00
2024-05-20 08:14:48 -04:00
query_sql = self.makeSphinxDbSourceQuerySql(db, table)
2024-05-20 02:11:27 -04:00
conf = conf.replace("{$DB_QUERY_SQL}", query_sql)
2024-05-19 04:24:33 -04:00
2024-05-20 08:14:48 -04:00
sph_field = self.makeSqlToSphinxTable(db, table)
2024-05-21 08:27:55 -04:00
# conf = conf.replace("{$SPH_FIELD}", sph_field)
2024-05-21 08:56:38 -04:00
conf = self.makeSphinxDbFieldRepalce(conf, sph_field)
2024-05-19 02:58:14 -04:00
2024-05-20 11:18:48 -04:00
if create_sphinx_table:
2024-05-21 11:39:04 -04:00
update = self.makeSphinxDbSourcePost(db, table)
conf = conf.replace("{$UPDATE}", update)
2024-05-21 23:37:47 -04:00
else:
conf = conf.replace("{$UPDATE}", '')
2024-05-21 11:39:04 -04:00
if create_sphinx_table:
2024-05-20 11:18:48 -04:00
sph_sql = self.createSql(db)
2024-05-21 11:21:20 -04:00
self.pdb.query(sph_sql)
sql_find = "select * from {}.{} where `table`='{}'".format(db,self.delta,table)
find_data = self.pdb.query(sql_find)
if len(find_data) == 0:
2024-05-21 11:28:55 -04:00
insert_sql = "insert into `{}`.`{}`(`table`,`max_id`) values ('{}',{}) ".format(db,self.delta,table,0)
2024-05-21 11:21:20 -04:00
# print(insert_sql)
self.pdb.execute(insert_sql)
2024-05-20 11:18:48 -04:00
conf += self.makeSphinxDbSourceDelta(db,table)
2024-05-20 08:14:48 -04:00
2024-05-21 08:40:15 -04:00
# print(ver)
# print(conf)
2024-05-20 02:11:27 -04:00
return conf
2024-05-19 04:24:33 -04:00
2024-05-21 08:56:38 -04:00
def makeSphinxDbFieldRepalce(self, content, sph_field):
ver = self.ver.replace(".1",'')
2024-05-21 08:58:22 -04:00
ver = float(ver)
2024-05-21 10:29:32 -04:00
if ver >= 3.6:
2024-05-21 08:56:38 -04:00
content = content.replace("{$SPH_FIELD}", '')
content = content.replace("{$SPH_FIELD_INDEX}", '')
else:
content = content.replace("{$SPH_FIELD}", sph_field)
content = content.replace("{$SPH_FIELD_INDEX}", '')
return content
2024-05-21 05:12:39 -04:00
def makeSqlToSphinxDb(self, db, table = [], is_delta = False):
2024-05-20 02:11:27 -04:00
conf = ''
2024-05-21 04:57:05 -04:00
2024-05-20 08:14:48 -04:00
for tn in table:
2024-05-21 04:57:05 -04:00
pkey_name = self.getTablePk(db,tn)
2024-05-20 02:11:27 -04:00
if pkey_name == '':
continue
2024-05-21 05:12:39 -04:00
conf += self.makeSphinxDbSource(db, tn,is_delta)
2024-05-20 02:11:27 -04:00
if len(table) == 0:
tables = self.pdb.query("show tables in "+ db)
for x in range(len(tables)):
key = 'Tables_in_'+db
table_name = tables[x][key]
2024-05-21 05:12:39 -04:00
pkey_name = self.getTablePk(db, table_name, is_delta)
2024-05-20 02:11:27 -04:00
if pkey_name == '':
continue
if self.makeSqlToSphinxTableIsHaveFulltext(db, table_name):
2024-05-20 08:14:48 -04:00
conf += self.makeSphinxDbSource(db, table_name)
2024-05-20 02:11:27 -04:00
return conf
def makeSqlToSphinxTableIsHaveFulltext(self, db, table):
sql = "select COLUMN_NAME,DATA_TYPE from information_schema.COLUMNS where `TABLE_SCHEMA`='{}' and `TABLE_NAME` = '{}';"
sql = sql.format(db,table,)
cols = self.pdb.query(sql)
cols_len = len(cols)
for x in range(cols_len):
data_type = cols[x]['DATA_TYPE']
column_name = cols[x]['COLUMN_NAME']
if mw.inArray(['varchar'], data_type):
return True
if mw.inArray(['text','mediumtext','tinytext','longtext'], data_type):
return True
return False
2024-05-20 08:14:48 -04:00
def makeSqlToSphinxTable(self,db,table):
pkey_name = self.getTablePk(db,table)
2024-05-20 02:11:27 -04:00
sql = "select COLUMN_NAME,DATA_TYPE from information_schema.COLUMNS where `TABLE_SCHEMA`='{}' and `TABLE_NAME` = '{}';"
sql = sql.format(db,table,)
cols = self.pdb.query(sql)
cols_len = len(cols)
conf = ''
run_pos = 0
for x in range(cols_len):
data_type = cols[x]['DATA_TYPE']
column_name = cols[x]['COLUMN_NAME']
# print(column_name+":"+data_type)
# if mw.inArray(['tinyint'], data_type):
# conf += 'sql_attr_bool = '+ column_name + "\n"
if pkey_name == column_name:
# run_pos += 1
# conf += '\tsql_attr_bigint = '+column_name+"\n"
continue
2024-05-19 04:24:33 -04:00
2024-05-20 02:11:27 -04:00
if mw.inArray(['enum'], data_type):
run_pos += 1
2024-05-21 07:58:43 -04:00
conf += '\t'+self.eqVerField('sql_attr_string')+' = '+ column_name + "\n"
2024-05-20 02:11:27 -04:00
continue
2024-05-19 04:24:33 -04:00
2024-05-20 02:11:27 -04:00
if mw.inArray(['decimal'], data_type):
run_pos += 1
2024-05-21 07:58:43 -04:00
conf += '\t'+self.eqVerField('sql_attr_float')+' = '+ column_name + "\n"
continue
2024-05-19 07:49:15 -04:00
2024-05-20 02:11:27 -04:00
if mw.inArray(['bigint','smallint','tinyint','int','mediumint'], data_type):
run_pos += 1
2024-05-21 07:58:43 -04:00
conf += '\t'+self.eqVerField('sql_attr_bigint')+' = '+ column_name + "\n"
2024-05-20 02:11:27 -04:00
continue
2024-05-19 07:49:15 -04:00
2024-05-20 02:11:27 -04:00
if mw.inArray(['float'], data_type):
run_pos += 1
2024-05-21 07:58:43 -04:00
conf += '\t'+self.eqVerField('sql_attr_float')+' = '+ column_name + "\n"
2024-05-20 02:11:27 -04:00
continue
2024-05-19 07:49:15 -04:00
2024-05-20 02:11:27 -04:00
if mw.inArray(['char'], data_type):
2024-05-21 07:58:43 -04:00
conf += '\t'+self.eqVerField('sql_attr_string')+' = '+ column_name + "\n"
2024-05-20 02:11:27 -04:00
continue
2024-05-19 04:24:33 -04:00
2024-05-20 02:11:27 -04:00
if mw.inArray(['varchar'], data_type):
run_pos += 1
2024-05-21 07:58:43 -04:00
conf += '\t'+self.eqVerField('sql_field_string')+' = '+ column_name + "\n"
2024-05-20 02:11:27 -04:00
continue
2024-05-19 04:24:33 -04:00
2024-05-20 02:11:27 -04:00
if mw.inArray(['text','mediumtext','tinytext','longtext'], data_type):
run_pos += 1
2024-05-21 07:58:43 -04:00
conf += '\t'+self.eqVerField('sql_field_string')+' = '+ column_name + "\n"
2024-05-20 02:11:27 -04:00
continue
2024-05-19 04:24:33 -04:00
2024-05-20 02:11:27 -04:00
if mw.inArray(['datetime','date'], data_type):
run_pos += 1
2024-05-21 07:43:25 -04:00
conf += '\t'+self.eqVerField('sql_attr_timestamp')+' = '+ column_name + "\n"
2024-05-20 02:11:27 -04:00
continue
2024-05-19 05:57:27 -04:00
2024-05-20 02:11:27 -04:00
return conf
2024-05-19 04:24:33 -04:00
2024-05-21 04:57:05 -04:00
def checkDbName(self, db):
filter_db = ['information_schema','performance_schema','sys','mysql']
if db in filter_db:
return False
return True
2024-05-21 05:12:39 -04:00
def makeSqlToSphinx(self, db, tables = [], is_delta = False):
2024-05-21 04:57:05 -04:00
conf = ''
conf += self.makeSphinxHeader()
2024-05-21 05:12:39 -04:00
conf += self.makeSqlToSphinxDb(db, tables, is_delta)
2024-05-21 04:57:05 -04:00
return conf
2024-05-19 04:24:33 -04:00
2024-05-20 02:11:27 -04:00
def makeSqlToSphinxAll(self):
filter_db = ['information_schema','performance_schema','sys','mysql']
2024-05-19 04:24:33 -04:00
2024-05-20 02:11:27 -04:00
dblist = self.pdb.query('show databases')
2024-05-19 04:24:33 -04:00
2024-05-20 02:11:27 -04:00
conf = ''
conf += self.makeSphinxHeader()
2024-05-19 05:57:27 -04:00
2024-05-20 02:11:27 -04:00
# conf += makeSqlToSphinxDb(pdb, 'bbs')
for x in range(len(dblist)):
dbname = dblist[x]['Database']
if mw.inArray(filter_db, dbname):
continue
conf += self.makeSqlToSphinxDb(dbname)
return conf
2024-05-19 05:57:27 -04:00