数据准备:
--汉字和拼音以及拼音首字母的对照表create table pinyin (hz varchar(1),py varchar(6),zm varchar(1));--索引以及唯一约束,视情况怎么加create index IDx_pinyin_hz on pinyin(hz);--create unique index IDx_pinyin_hz_py on pinyin(hz,py);--create unique index IDx_pinyin_hz_zm on pinyin(hz,zm);--未收录汉字插入以下表create table new_discover (hz varchar(1) primary key,zm varchar(1));
函数准备:
--创建输出type和函数,函数返回是数组CREATE TYPE t_py_zm as (c1 text[],c2 text[]);CREATE OR REPLACE FUNCTION get_py_zm(i_hz text) RETURNS SetoF t_py_zm AS$BODY$DECLAREv_hz text;i int;v_sql1 text;v_sql2 text;v_sql3 text;v_sql4 text;v_sql text;v_max_ID int;v_ID int;BEGIN--创建临时表用来存储每个汉字和字母set clIEnt_min_messages = warning;CREATE TEMPORARY table IF NOT EXISTS tmp_get_py_zm (ID int,zm varchar(1)) ON COMMIT DELETE ROWS;truncate table tmp_get_py_zm;i := 0;--拆分输入参数为每个字符并插入到临时表里for v_hz in select regexp_split_to_table(i_hz,'') loop if ascii(v_hz) > 255 then insert into tmp_get_py_zm select i,py,zm from pinyin where hz=v_hz; else insert into tmp_get_py_zm values(i,v_hz,v_hz); end if; if not found then perform 1 from new_discover where hz = v_hz; if not found then insert into new_discover(hz) values(v_hz); end if; insert into tmp_get_py_zm values(i,'?','?'); end if; i := i+1;end loop;select max(ID) into v_max_ID from tmp_get_py_zm;if v_max_ID > 0 then v_sql1 := ''; v_sql3 := ''; v_sql4 := ''; v_ID := 0; for v_ID in select generate_serIEs(0,v_max_ID) loop if v_ID <> v_max_ID then v_sql1 := v_sql1||'(select py,zm from tmp_get_py_zm where ID='||v_ID||') as t'||v_ID||','; v_sql3 := v_sql3||'t'||v_ID||'.py::text||'; v_sql4 := v_sql4||'t'||v_ID||'.zm::text||'; else v_sql1 := v_sql1||'(select py,zm from tmp_get_py_zm where ID='||v_ID||') as t'||v_ID; v_sql3 := v_sql3||'t'||v_ID||'.py::text'; v_sql4 := v_sql4||'t'||v_ID||'.zm::text'; v_sql := 'select array_agg('||v_sql3||'),array_agg('||v_sql4||') from '||v_sql1; end if; end loop;else v_sql := 'select array_agg(py::text),array_agg(zm::text) from tmp_get_py_zm';end if;return query execute v_sql;return;END;$BODY$ LANGUAGE plpgsql VolATILE COST 100 ROWS 1000;ALTER FUNCTION get_py_zm(text) OWNER TO postgres;--上面这个函数比我预期的功能要强大,除了输出汉字的拼音缩写外还提供了全拼,效果如下postgres=# select * from get_py_zm('我爱你'); c1 | c2 ----------+------- {woaini} | {wan}(1 row)
我的需求只需要输出简写,故外面再套一层循环更新函数并转换数组为字符类型,省得去更新原来的函数
CREATE OR REPLACE FUNCTION f_update_pinyin() RETURNS VOID AS $BODY$declarev_value text;i_hn text;rec record;begin--where后面的条件是筛选出不全为拼音的数据,也可以不用该条件全量更新,tmp_kenyon是我要更新的表for rec in select num,name from tmp_kenyon where pym !~ E'[A-Z][A-Z][A-Z]' and pym !~ E'[A-Z][A-Z]' loop i_hn:=rec.name; select c2[1]::text into v_value from get_py_zm(i_hn); update tmp_kenyon set pym = v_value where num = rec.num; end loop;return;end;$BODY$ LANGUAGE plpgsql VolATILE COST 100;ALTER FUNCTION f_update_pin() OWNER TO postgres; --调用该函数时直接使用select f_update_pin();--如果遇到词库没有的汉字需要在new_discovery里面更新并最终补充到pinyin词库表里去pinyin词库表整理在了下面的云盘地址里,目前收录了将近7000个常用汉字,地址在: http://pan.baIDu.com/s/1pJ6spSn 导入方式
[postgres@db1 ~]$ psql
psql (9.2.4)
Type "help" for help.
postgres=# \i /home/postgres/py.sql
参考: http://blog.163.com/digoal@126/blog/static/163877040201241452827379/ 总结
以上是内存溢出为你收集整理的PostgreSQL的汉字转拼音全部内容,希望文章能够帮你解决PostgreSQL的汉字转拼音所遇到的程序开发问题。
如果觉得内存溢出网站内容还不错,欢迎将内存溢出网站推荐给程序员好友。
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)