379 lines
1.0 MiB
JavaScript
379 lines
1.0 MiB
JavaScript
|
/*
|
|||
|
*基于 https://github.com/sxei/pinyinjs 修改
|
|||
|
*调用接口从window.pinyinUtil改为window.pinyinUtilx
|
|||
|
*输出格式从字符串转为数组
|
|||
|
*/
|
|||
|
var pinyin_dict_polyphone = {"阿Q":"ā ","阿爸":"ā ","阿鼻":"ā ","阿呆":"ā ","阿弟":"ā dì","阿爹":"ā ","阿斗":"ā dǒu","阿飞":"ā ","阿哥":"ā ","阿公":"ā ","阿訇":"ā ","阿胶":"ē ","阿姐":"ā ","阿拉":"ā lā","阿妈":"ā ","阿妹":"ā ","阿曼":"ā ","阿门":"ā ","阿蒙":"ā méng","阿片":"ā piàn","阿婆":"ā ","哎呀":" yā","哎哟":" yō","哀号":" háo","哀家":" jiā","哀矜":" jīn","哀乐":" yuè","哀思":" sī","嗳气":"ài ","癌症":" zhèng","嗳酸":"ǎi ","矮个":" gè","爱称":" chēng","爱好":" hào","隘巷":" xiàng","碍难":" nán","安度":" dù","安打":" dǎ","安分":" fèn","安好":" hǎo","安家":" jiā","安拉":" lā","安乐":" lè","安排":" pái","安宁":" níng","安上":" shàng","安详":" xiáng","俺家":" jiā","按脉":" mài","按摩":" mó","按期":" qī","按说":" shuō","按压":" yā","按语":" yǔ","暗藏":" cáng","暗处":" chù","暗场":" chǎng","暗地":" dì","暗号":" hào","暗合":" hé","暗语":" yǔ","暗中":" zhōng","昂藏":" cáng","暗转":" zhuǎn","凹版":"āo ","凹洞":"āo ","凹度":"āo dù","凹镜":"āo ","凹凸":"āo ","凹洼":"āo ","凹陷":"āo ","熬熬":"áo áo","熬心":"áo ","熬煎":"áo ","熬夜":"áo ","鳌头":" tóu","拗断":"ǎo ","拗口":"ào ","傲骨":" gǔ","奥博":"ào ","奥兰":"ào ","奥秘":"ào mì","奥义":"ào ","奥妙":"ào ","奥援":"ào ","懊丧":" sàng","奥旨":"ào ","八风":" fēng","八节":" jié","八角":" jiǎo","八邪":" xié","巴结":" jì","扒车":"bā chē","扒糕":"pá ","扒灰":"pá ","扒开":"bā ","扒拉":"bā lā","扒皮":"bā ","扒窃":"pá ","扒手":"pá ","吧嗒":"bā dā","吧唧":"bā ","吧女":"bā nǚ","吧台":"bā tái","笆斗":" dǒu","拔地":" dì","拔毒":" dú","拔节":" jié","把柄":"bǎ ","把持":"bǎ ","把舵":"bǎ ","把风":"bǎ fēng","把关":"bǎ ","把家":"bǎ jiā","把酒":"bǎ ","把揽":"bǎ ","把牢":"bǎ ","把门":"bǎ ","把脉":"bǎ mài","把式":"bǎ ","把势":"bǎ ","把手":"bǎ ","把守":"bǎ ","把头":"bǎ tóu","把玩":"bǎ ","把稳":"bǎ ","把握":"bǎ ","把晤":"bǎ ","把盏":"bǎ ","把戏":"bǎ xì","把住":"bǎ ","把子":"bà ","靶场":" chǎng","靶台":" tái","坝地":" dì","白地":" dì","白发":" fà","白丁":" dīng","白干":" qián","白骨":" gǔ","白净":" jìng","白卷":" juàn","白露":" lù","白票":" piào","白区":" qū","白食":" shí","白蛇":" shé","白术":" zhú","白苏":" sū","白汤":" tāng","白体":" tǐ","白头":" tóu","白条":" tiáo","白相":" xiàng","白血":" xuè","白种":" zhǒng","百分":" fēn","百度":" dù","百合":" hé","百会":" huì","百万":" wàn","百戏":" xì","百叶":" yè","柏林":"bó ","柏乡":"bǎi ","柏油":"bǎi ","摆弄":" nòng","摆正":" zhèng","败北":" běi","败毒":" dú","败坏":" huài","败家":" jiā","败将":" jiàng","败露":" lù","败落":" luò","败兴":" xìng","败血":" xuè","败亡":" wáng","拜别":" bié","拜读":" dú","拜倒":" dǎo","拜佛":" fó","拜服":" fú","拜见":" jiàn","拜祭":" jì","拜会":" huì","拜节":" jié","扳倒":" dǎo","班禅":" chán","班车":" chē","班长":" zhǎng","班期":" qī","斑纹":" wén","搬家":" jiā","板车":" chē","搬弄":" nòng","板结":" jié","板刷":" shuā","板正":" zhèng","板铺":" pù","办差":" chāi","版筑":" zhù","办好":" hǎo","半大":" dà","半价":" jià","半空":" kōng","半票":" piào","半数":" shù","半拉":" lǎ","半休":" xiū","半载":" zǎi","伴读":" dú","伴同":" tóng","伴郎":" láng","拌和":"bàn huò","扮戏":" xì","扮相":" xiàng","拌蒜":"bàn ","拌匀":"bàn ","绊倒":" dǎo","拌嘴":"bàn ","拌种":"bàn zhǒng","帮会":" huì","帮教":" jiào","帮同":" tóng","帮佣":" yōng","帮助":" zhù","绑票":" piào","绑扎":" zhā","榜额":"bǎng ","榜首":"bǎng ","榜书":"bǎng ","榜文":"bǎng ","榜眼":"bǎng ","榜样":"bǎng ","膀臂":" bì","棒喝":" hè","棒头":" tóu","磅礴":"páng ","<EFBFBD><EFBFBD>
|
|||
|
var pinyin_dict_withtone = "yī,dīng zhēng,kǎo qiǎo yú,qī,shàng,xià,hǎn,wàn mò,zhàng,sān,shàng shǎng,xià,qí jī,bù fǒu,yǔ yù yú,miǎn,gài,chǒu,chǒu,zhuān,qiě jū,pī,shì,shì,qiū,bǐng,yè,cóng,dōng,sī,chéng,diū,qiū,liǎng,diū,yǒu,liǎng,yán,bìng,sāng sàng,gǔn,jiū,gè gě,yā,pán,zhōng zhòng,jǐ,jiè,fēng,guàn kuàng,chuàn,chǎn,lín,zhuó,zhǔ,bā,wán,dān,wéi wèi,zhǔ,jǐng,lì lí,jǔ,piě,fú,yí jí,yì,nǎi,wǔ,jiǔ,jiǔ,tuō zhé,me yāo mó ma,yì,yī,zhī,wū,zhà,hū,fá,lè yuè,yín,pīng,pāng,qiáo,hǔ,guāi,chéng shèng,chéng shèng,yǐ,háo yǐ,yǐ,miē niè,jiǔ,qǐ,yě,xí,xiāng,gài,jiǔ,xià,hù,shū,dǒu,shǐ,jī,náng,jiā,none,shí,none,hū,mǎi,luàn,none,rǔ,xué,yǎn,fǔ,shā,nǎ,qián,suǒ,yú,zhù,zhě,qián gān,zhì luàn,guī,qián,luàn,lǐn lìn,yì,jué,le liǎo,gè mā,yú yǔ,zhēng,shì,shì,èr,chù,yú,kuī,yú,yún,hù,qí,wǔ,jǐng,sì,suì,gèn,gèn,yà,xiē suò,yà,qí zhāi,yā yà,jí qì,tóu,wáng wú,kàng,dà,jiāo,hài,yì,chǎn,hēng pēng,mǔ,ye,xiǎng,jīng,tíng,liàng,xiǎng,jīng,yè,qīn qìng,bó,yòu,xiè,dǎn dàn,lián,duǒ,wěi mén,rén,rén,jí,jí,wáng,yì,shén shí,rén,lè,dīng,zè,jǐn jìn,pū pú,chóu qiú,bā,zhǎng,jīn,jiè,bīng,réng,cóng zòng,fó,jīn sǎn,lún,bīng,cāng,zī zǐ zǎi,shì,tā,zhàng,fù,xiān,xiān,tuō chà duó,hóng,tóng,rèn,qiān,gǎn hàn,yì gē,bó,dài,líng lǐng lìng,yǐ,chào,cháng zhǎng,sā,cháng,yí,mù,mén,rèn,fǎn,chào miǎo,yǎng áng,qián,zhòng,pǐ pí,wò,wǔ,jiàn,jià jiè jie,yǎo fó,fēng,cāng,rèn rén,wáng,fèn bīn,dī,fǎng,zhōng,qǐ,pèi,yú,diào,dùn,wěn,yì,xǐn,kàng,yī,jí,ài,wǔ,jì qí,fú,fá,xiū xǔ,jìn yín,pī,dǎn,fū,tǎng,zhòng,yōu,huǒ,huì kuài,yǔ,cuì,yún,sǎn,wěi,chuán zhuàn,chē jū,yá,qiàn,shāng,chāng,lún,cāng chen,xùn,xìn,wěi,zhù,chǐ,xián xuán,nú nǔ,bó bǎi bà,gū gù,nǐ,nǐ nì,xiè,bàn,xù,líng,zhòu,shēn,qū,sì cì,bēng,sì shì,qié jiā gā,pī,yì,sì,yǐ chì,zhēng,diàn tián,hān gàn,mài,dàn,zhù,bù,qū,bǐ,zhāo shào,cǐ,wèi,dī,zhù,zuǒ,yòu,yǎng,tǐ tī bèn,zhàn diān,hé hē hè,bì,tuó,shé,yú,yì dié,fó fú bì bó,zuò,gōu kòu,nìng,tóng,nǐ,xiān,qú,yōng yòng,wǎ,qiān,yòu,kǎ,bāo,pèi,huí huái,gé,lǎo,xiáng,gé,yáng,bǎi,fǎ,mǐng,jiā,èr nài,bìng,jí,hěn,huó,guǐ,quán,tiāo,jiǎo,cì,yì,shǐ,xíng,shēn,tuō,kǎn,zhí,gāi,lái,yí,chǐ,kuǎ,gōng,lì,yīn,shì,mǐ,zhū,xù,yòu,ān,lù,móu,ér,lún,dòng tóng tǒng,chà,chì,xùn,gōng gòng,zhōu,yī,rú,cún jiàn,xiá,sì,dài,lǚ,ta,jiǎo yáo,zhēn,cè zè zhāi,qiáo,kuài,chái,nìng,nóng,jǐn,wǔ,hóu hòu,jiǒng,chěng tǐng,zhèn zhēn,zuò,hào,qīn,lǚ,jú,shù dōu,tǐng,shèn,tuó tuì,bó,nán,xiāo,biàn pián,tuǐ,yǔ,xì,cù,é,qiú,xú,guàng,kù,wù,jùn,yì,fǔ,liáng,zǔ,qiào xiào,lì,yǒng,hùn,jìng,qiàn,sàn,pěi,sú,fú,xī,lǐ,fǔ,pīng,bǎo,yú yù shù,sì qí,xiá,xìn shēn,xiū,yǔ,dì,chē jū,chóu,zhì,yǎn,liǎ,lì,lái,sī,jiǎn,xiū,fǔ,huò,jù,xiào,pái,jiàn,biào,chù tì,fèi,fèng,yà,ǎn,bèi,yù,xīn,bǐ,hǔ chí,chāng,zhī,bìng,jiù,yáo,cuì zú,liǎ,wǎn,lái,cāng,zǒng,gè gě,guān,bèi,tiǎn,shū,shū,mén,dǎo dào,tán tàn,jué juè,chuí,xìng,péng,tǎng cháng,hòu,yǐ,qī,tì,gàn,liàng jìng,jiè,suī,chàng chāng,jié,fǎng,zhí,kōng kǒng,juàn,zōng,jù,qiàn,ní,lún,zhuō,wō wēi,luǒ,sōng,lèng,hùn,dōng,zì,bèn,wǔ,jù,nǎi,cǎi,jiǎn,zhài,yē,zhí,shà,qīng,nìng,yīng,chēng chèn,qián,yǎn,ruǎn,zhòng tóng,chǔn,jiǎ jià,jì jié,wěi,yú,bǐng bìng,ruò,tí,wēi,piān,yàn,fēng,tǎng dàng,wò,è,xié,chě,shěng,kǎn,dì,zuò,chā,tíng,bèi,xiè,huáng,yǎo,zhàn,chǒu qiào,ān,yóu,jiàn,xū,zhā,cī,fù,bī,zhì,zǒng,miǎn,jí,yǐ,xiè,xún,cāi sī,duān,cè zè zhāi,zhēn,ǒu,tōu,tōu,bèi,zá zǎ,lǚ lóu,jié,wěi,fèn,cháng,kuǐ guī,sǒu,zhì sī,sù,xiā,fù,yuàn yuán,rǒng,lì,nù,yùn,jiǎng gòu,mà,bàng,diān,táng,hào,jié,xī xì,shān,qiàn j
|
|||
|
|
|||
|
/**
|
|||
|
* 汉字与拼音互转工具,根据导入的字典文件的不同支持不同
|
|||
|
* 对于多音字目前只是将所有可能的组合输出,准确识别多音字需要完善的词库,而词库文件往往比字库还要大,所以不太适合web环境。
|
|||
|
* @start 2016-09-26
|
|||
|
* @last 2016-09-29
|
|||
|
*/
|
|||
|
;(function(global, factory) {
|
|||
|
if (typeof module === "object" && typeof module.exports === "object") {
|
|||
|
module.exports = factory(global);
|
|||
|
} else {
|
|||
|
factory(global);
|
|||
|
}
|
|||
|
})(typeof window !== "undefined" ? window : this, function(window) {
|
|||
|
|
|||
|
var toneMap =
|
|||
|
{
|
|||
|
"ā": "a1",
|
|||
|
"á": "a2",
|
|||
|
"ǎ": "a3",
|
|||
|
"à": "a4",
|
|||
|
"ō": "o1",
|
|||
|
"ó": "o2",
|
|||
|
"ǒ": "o3",
|
|||
|
"ò": "o4",
|
|||
|
"ē": "e1",
|
|||
|
"é": "e2",
|
|||
|
"ě": "e3",
|
|||
|
"è": "e4",
|
|||
|
"ī": "i1",
|
|||
|
"í": "i2",
|
|||
|
"ǐ": "i3",
|
|||
|
"ì": "i4",
|
|||
|
"ū": "u1",
|
|||
|
"ú": "u2",
|
|||
|
"ǔ": "u3",
|
|||
|
"ù": "u4",
|
|||
|
"ü": "v0",
|
|||
|
"ǖ": "v1",
|
|||
|
"ǘ": "v2",
|
|||
|
"ǚ": "v3",
|
|||
|
"ǜ": "v4",
|
|||
|
"ń": "n2",
|
|||
|
"ň": "n3",
|
|||
|
"": "m2"
|
|||
|
};
|
|||
|
|
|||
|
var dict = {}; // 存储所有字典数据
|
|||
|
var pinyinUtilx =
|
|||
|
{
|
|||
|
/**
|
|||
|
* 解析各种字典文件,所需的字典文件必须在本JS之前导入
|
|||
|
*/
|
|||
|
parseDict: function()
|
|||
|
{
|
|||
|
// 如果导入了 pinyin_dict_firstletter.js
|
|||
|
if(window.pinyin_dict_firstletter)
|
|||
|
{
|
|||
|
dict.firstletter = pinyin_dict_firstletter;
|
|||
|
}
|
|||
|
// 如果导入了 pinyin_dict_notone.js
|
|||
|
if(window.pinyin_dict_notone)
|
|||
|
{
|
|||
|
dict.notone = {};
|
|||
|
dict.py2hz = pinyin_dict_notone; // 拼音转汉字
|
|||
|
for(var i in pinyin_dict_notone)
|
|||
|
{
|
|||
|
var temp = pinyin_dict_notone[i];
|
|||
|
for(var j=0, len=temp.length; j<len; j++)
|
|||
|
{
|
|||
|
if(!dict.notone[temp[j]]) dict.notone[temp[j]] = i; // 不考虑多音字
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
// 如果导入了 pinyin_dict_withtone.js
|
|||
|
if(window.pinyin_dict_withtone)
|
|||
|
{
|
|||
|
dict.withtone = {}; // 汉字与拼音映射,多音字用空格分开,类似这种结构:{'大': 'da tai'}
|
|||
|
var temp = pinyin_dict_withtone.split(',');
|
|||
|
for(var i=0, len = temp.length; i<len; i++)
|
|||
|
{
|
|||
|
// 这段代码耗时28毫秒左右,对性能影响不大,所以一次性处理完毕
|
|||
|
dict.withtone[String.fromCharCode(i + 19968)] = temp[i]; // 这里先不进行split(' '),因为一次性循环2万次split比较消耗性能
|
|||
|
}
|
|||
|
|
|||
|
// 拼音 -> 汉字
|
|||
|
if(window.pinyin_dict_notone)
|
|||
|
{
|
|||
|
// 对于拼音转汉字,我们优先使用pinyin_dict_notone字典文件
|
|||
|
// 因为这个字典文件不包含生僻字,且已按照汉字使用频率排序
|
|||
|
dict.py2hz = pinyin_dict_notone; // 拼音转汉字
|
|||
|
}
|
|||
|
else
|
|||
|
{
|
|||
|
// 将字典文件解析成拼音->汉字的结构
|
|||
|
// 与先分割后逐个去掉声调相比,先一次性全部去掉声调然后再分割速度至少快了3倍,前者大约需要120毫秒,后者大约只需要30毫秒(Chrome下)
|
|||
|
var notone = pinyinUtilx.removeTone(pinyin_dict_withtone).split(',');
|
|||
|
var py2hz = {}, py, hz;
|
|||
|
for(var i=0, len = notone.length; i<len; i++)
|
|||
|
{
|
|||
|
hz = String.fromCharCode(i + 19968); // 汉字
|
|||
|
py = notone[i].split(' '); // 去掉了声调的拼音数组
|
|||
|
for(var j=0; j<py.length; j++)
|
|||
|
{
|
|||
|
py2hz[py[j]] = (py2hz[py[j]] || '') + hz;
|
|||
|
}
|
|||
|
}
|
|||
|
dict.py2hz = py2hz;
|
|||
|
}
|
|||
|
}
|
|||
|
},
|
|||
|
/**
|
|||
|
* 根据汉字获取拼音,如果不是汉字直接返回原字符
|
|||
|
* @param chinese 要转换的汉字
|
|||
|
* @param splitter 分隔字符,默认用空格分隔
|
|||
|
* @param withtone 返回结果是否包含声调,默认是
|
|||
|
* @param polyphone 是否支持多音字,默认否
|
|||
|
*/
|
|||
|
getPinyin: function(chinese, splitter, withtone, polyphone)
|
|||
|
{
|
|||
|
if(!chinese || /^ +$/g.test(chinese)) return [];
|
|||
|
splitter = splitter == undefined ? ' ' : splitter;
|
|||
|
withtone = withtone == undefined ? true : withtone;
|
|||
|
polyphone = polyphone == undefined ? false : polyphone;
|
|||
|
var result = [];
|
|||
|
if(dict.withtone) // 优先使用带声调的字典文件
|
|||
|
{
|
|||
|
var noChinese = '';
|
|||
|
for (var i=0, len = chinese.length; i < len; i++)
|
|||
|
{
|
|||
|
var pinyin = dict.withtone[chinese[i]];
|
|||
|
if(pinyin)
|
|||
|
{
|
|||
|
// 如果不需要多音字,默认返回第一个拼音,后面的直接忽略
|
|||
|
// 所以这对数据字典有一定要求,常见字的拼音必须放在最前面
|
|||
|
if(!polyphone) pinyin = pinyin.replace(/ .*$/g, '');
|
|||
|
if(!withtone) pinyin = this.removeTone(pinyin); // 如果不需要声调
|
|||
|
//空格,把noChinese作为一个词插入
|
|||
|
noChinese && ( result.push( noChinese), noChinese = '' );
|
|||
|
result.push( pinyin );
|
|||
|
}
|
|||
|
else if ( !chinese[i] || /^ +$/g.test(chinese[i]) ){
|
|||
|
//空格,把noChinese作为一个词插入
|
|||
|
noChinese && ( result.push( noChinese), noChinese = '' );
|
|||
|
}
|
|||
|
else{
|
|||
|
noChinese += chinese[i];
|
|||
|
}
|
|||
|
}
|
|||
|
if ( noChinese ){
|
|||
|
result.push( noChinese);
|
|||
|
noChinese = '';
|
|||
|
}
|
|||
|
}
|
|||
|
else if(dict.notone) // 使用没有声调的字典文件
|
|||
|
{
|
|||
|
if(withtone) console.warn('pinyin_dict_notone 字典文件不支持声调!');
|
|||
|
if(polyphone) console.warn('pinyin_dict_notone 字典文件不支持多音字!');
|
|||
|
var noChinese = '';
|
|||
|
for (var i=0, len = chinese.length; i < len; i++)
|
|||
|
{
|
|||
|
var temp = chinese.charAt(i),
|
|||
|
pinyin = dict.notone[temp];
|
|||
|
if ( pinyin ){ //插入拼音
|
|||
|
//空格,把noChinese作为一个词插入
|
|||
|
noChinese && ( result.push( noChinese), noChinese = '' );
|
|||
|
result.push( pinyin );
|
|||
|
}
|
|||
|
else if ( !temp || /^ +$/g.test(temp) ){
|
|||
|
//空格,插入之前的非中文字符
|
|||
|
noChinese && ( result.push( noChinese), noChinese = '' );
|
|||
|
}
|
|||
|
else {
|
|||
|
//非空格,关联到noChinese中
|
|||
|
noChinese += temp;
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
if ( noChinese ){
|
|||
|
result.push( noChinese );
|
|||
|
noChinese = '';
|
|||
|
}
|
|||
|
}
|
|||
|
else
|
|||
|
{
|
|||
|
throw '抱歉,未找到合适的拼音字典文件!';
|
|||
|
}
|
|||
|
if(!polyphone) return result;
|
|||
|
else
|
|||
|
{
|
|||
|
if(window.pinyin_dict_polyphone) return parsePolyphone(chinese, result, splitter, withtone);
|
|||
|
else return handlePolyphone(result, ' ', splitter);
|
|||
|
}
|
|||
|
},
|
|||
|
/**
|
|||
|
* 获取汉字的拼音首字母
|
|||
|
* @param str 汉字字符串,如果遇到非汉字则原样返回
|
|||
|
* @param polyphone 是否支持多音字,默认false,如果为true,会返回所有可能的组合数组
|
|||
|
*/
|
|||
|
getFirstLetter: function(str, polyphone)
|
|||
|
{
|
|||
|
polyphone = polyphone == undefined ? false : polyphone;
|
|||
|
if(!str || /^ +$/g.test(str)) return '';
|
|||
|
if(dict.firstletter) // 使用首字母字典文件
|
|||
|
{
|
|||
|
var result = [];
|
|||
|
for(var i=0; i<str.length; i++)
|
|||
|
{
|
|||
|
var unicode = str.charCodeAt(i);
|
|||
|
var ch = str.charAt(i);
|
|||
|
if(unicode >= 19968 && unicode <= 40869)
|
|||
|
{
|
|||
|
ch = dict.firstletter.all.charAt(unicode-19968);
|
|||
|
if(polyphone) ch = dict.firstletter.polyphone[unicode] || ch;
|
|||
|
}
|
|||
|
result.push(ch);
|
|||
|
}
|
|||
|
if(!polyphone) return result.join(''); // 如果不用管多音字,直接将数组拼接成字符串
|
|||
|
else return handlePolyphone(result, '', ''); // 处理多音字,此时的result类似于:['D', 'ZC', 'F']
|
|||
|
}
|
|||
|
else
|
|||
|
{
|
|||
|
var py = this.getPinyin(str, ' ', false, polyphone);
|
|||
|
py = py instanceof Array ? py : [py];
|
|||
|
var result = [];
|
|||
|
for(var i=0; i<py.length; i++)
|
|||
|
{
|
|||
|
result.push(py[i].replace(/(^| )(\w)\w*/g, function(m,$1,$2){return $2.toUpperCase();}));
|
|||
|
}
|
|||
|
if(!polyphone) return result[0];
|
|||
|
else return simpleUnique(result);
|
|||
|
}
|
|||
|
},
|
|||
|
/**
|
|||
|
* 拼音转汉字,只支持单个汉字,返回所有匹配的汉字组合
|
|||
|
* @param pinyin 单个汉字的拼音,可以包含声调
|
|||
|
*/
|
|||
|
getHanzi: function(pinyin)
|
|||
|
{
|
|||
|
if(!dict.py2hz)
|
|||
|
{
|
|||
|
throw '抱歉,未找到合适的拼音字典文件!';
|
|||
|
}
|
|||
|
return dict.py2hz[this.removeTone(pinyin)] || '';
|
|||
|
},
|
|||
|
/**
|
|||
|
* 获取某个汉字的同音字,本方法暂时有问题,待完善
|
|||
|
* @param hz 单个汉字
|
|||
|
* @param sameTone 是否获取同音同声调的汉字,必须传进来的拼音带声调才支持,默认false
|
|||
|
*/
|
|||
|
getSameVoiceWord: function(hz, sameTone)
|
|||
|
{
|
|||
|
sameTone = sameTone || false
|
|||
|
return this.getHanzi(this.getPinyin(hz, ' ', false))
|
|||
|
},
|
|||
|
/**
|
|||
|
* 去除拼音中的声调,比如将 xiǎo míng tóng xué 转换成 xiao ming tong xue
|
|||
|
* @param pinyin 需要转换的拼音
|
|||
|
*/
|
|||
|
removeTone: function(pinyin)
|
|||
|
{
|
|||
|
return pinyin.replace(/[āáǎàōóǒòēéěèīíǐìūúǔùüǖǘǚǜńň]/g, function(m){ return toneMap[m][0]; });
|
|||
|
},
|
|||
|
/**
|
|||
|
* 将数组拼音转换成真正的带标点的拼音
|
|||
|
* @param pinyinWithoutTone 类似 xu2e这样的带数字的拼音
|
|||
|
*/
|
|||
|
getTone: function(pinyinWithoutTone)
|
|||
|
{
|
|||
|
var newToneMap = {};
|
|||
|
for(var i in toneMap) newToneMap[toneMap[i]] = i;
|
|||
|
return (pinyinWithoutTone || '').replace(/[a-z]\d/g, function(m) {
|
|||
|
return newToneMap[m] || m;
|
|||
|
});
|
|||
|
}
|
|||
|
};
|
|||
|
|
|||
|
|
|||
|
/**
|
|||
|
* 处理多音字,将类似['D', 'ZC', 'F']转换成['DZF', 'DCF']
|
|||
|
* 或者将 ['chang zhang', 'cheng'] 转换成 ['chang cheng', 'zhang cheng']
|
|||
|
*/
|
|||
|
function handlePolyphone(array, splitter, joinChar)
|
|||
|
{
|
|||
|
splitter = splitter || '';
|
|||
|
var result = [''], temp = [];
|
|||
|
for(var i=0; i<array.length; i++)
|
|||
|
{
|
|||
|
temp = [];
|
|||
|
var t = array[i].split(splitter);
|
|||
|
for(var j=0; j<t.length; j++)
|
|||
|
{
|
|||
|
for(var k=0; k<result.length; k++)
|
|||
|
temp.push(result[k] + (result[k]?joinChar:'') + t[j]);
|
|||
|
}
|
|||
|
result = temp;
|
|||
|
}
|
|||
|
return simpleUnique(result);
|
|||
|
}
|
|||
|
|
|||
|
/**
|
|||
|
* 根据词库找出多音字正确的读音
|
|||
|
* 这里只是非常简单的实现,效率和效果都有一些问题
|
|||
|
* 推荐使用第三方分词工具先对句子进行分词,然后再匹配多音字
|
|||
|
* @param chinese 需要转换的汉字
|
|||
|
* @param result 初步匹配出来的包含多个发音的拼音结果
|
|||
|
* @param splitter 返回结果拼接字符
|
|||
|
*/
|
|||
|
function parsePolyphone(chinese, result, splitter, withtone)
|
|||
|
{
|
|||
|
var poly = window.pinyin_dict_polyphone;
|
|||
|
var max = 7; // 最多只考虑7个汉字的多音字词,虽然词库里面有10个字的,但是数量非常少,为了整体效率暂时忽略之
|
|||
|
var temp = poly[chinese];
|
|||
|
if(temp) // 如果直接找到了结果
|
|||
|
{
|
|||
|
temp = temp.split(' ');
|
|||
|
for(var i=0; i<temp.length; i++)
|
|||
|
{
|
|||
|
result[i] = temp[i] || result[i];
|
|||
|
if(!withtone) result[i] = pinyinUtilx.removeTone(result[i]);
|
|||
|
}
|
|||
|
return result;
|
|||
|
}
|
|||
|
for(var i=0; i<chinese.length; i++)
|
|||
|
{
|
|||
|
temp = '';
|
|||
|
for(var j=0; j<max && (i+j)<chinese.length; j++)
|
|||
|
{
|
|||
|
if(!/^[\u2E80-\u9FFF]+$/.test(chinese[i+j])) break; // 如果碰到非汉字直接停止本次查找
|
|||
|
temp += chinese[i+j];
|
|||
|
var res = poly[temp];
|
|||
|
if(res) // 如果找到了多音字词语
|
|||
|
{
|
|||
|
res = res.split(' ');
|
|||
|
for(var k=0; k<=j; k++)
|
|||
|
{
|
|||
|
if(res[k]) result[i+k] = withtone ? res[k] : pinyinUtilx.removeTone(res[k]);
|
|||
|
}
|
|||
|
break;
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
// 最后这一步是为了防止出现词库里面也没有包含的多音字词语
|
|||
|
for(var i=0; i<result.length; i++)
|
|||
|
{
|
|||
|
result[i] = result[i].replace(/ .*$/g, '');
|
|||
|
}
|
|||
|
return result;
|
|||
|
}
|
|||
|
|
|||
|
// 简单数组去重
|
|||
|
function simpleUnique(array)
|
|||
|
{
|
|||
|
var result = [];
|
|||
|
var hash = {};
|
|||
|
for(var i=0; i<array.length; i++)
|
|||
|
{
|
|||
|
var key = (typeof array[i]) + array[i];
|
|||
|
if(!hash[key])
|
|||
|
{
|
|||
|
result.push(array[i]);
|
|||
|
hash[key] = true;
|
|||
|
}
|
|||
|
}
|
|||
|
return result;
|
|||
|
}
|
|||
|
|
|||
|
pinyinUtilx.parseDict();
|
|||
|
pinyinUtilx.dict = dict;
|
|||
|
window.pinyinUtilx = pinyinUtilx;
|
|||
|
|
|||
|
});
|