python调用NLPIR - ICTCLAS2013实现中文分词
#-*- encoding: utf-8 -*-
import NLPIR
import os class C_NLPIR_ICTCLAS2013:
def __init__(self,s_code='GBK'):
dataurl = os.path.join(os.path.dirname(__file__))
isinit = 0
if s_code == 'GBK':
isinit = NLPIR.NLPIR_Init(dataurl,NLPIR.GBK_CODE)
elif s_code == 'UTF-8':
isinit = NLPIR.NLPIR_Init(dataurl,NLPIR.UTF8_CODE)
elif s_code == 'BIG5':
isinit = NLPIR.NLPIR_Init(dataurl,NLPIR.BIG5_CODE)
elif s_code == 'GBK_FANTI':
if isinit:
print 'NLPIR 初始化成功'
print 'NLPIR 初始化失败' def stringSeg(self, s_string, i_bPOStagged=0):
Function: Process one string;
Parameters: @s_string - The string to be analyed,
@i_bPOStagged: Judge whether need POS tagging, 0 for no tag; 1 for tagging; default:0.
Return Value: the pointer of result buffer.
return NLPIR.NLPIR_ParagraphProcess(s_string, i_bPOStagged) def fileSeg(self,s_sourceFile,s_targetFile, i_bPOStagged=0):
Function: Process one text file and save the result into one file;
Parameters: @s_sourceFile - The source file name to be analysized,
@s_targetFile - The result file name to store the results.
@i_bPOStagged: Judge whether need POS tagging, 0 for no tag; 1 for tagging; default:0.
Return Value: the processing speed if processing succeed. Otherwise return false.
return NLPIR.NLPIR_FileProcess(s_sourceFile, s_targetFile, i_bPOStagged) def importUserDict(self,s_userDictFile):
Functin: Import user-defined dictionary from a text file;
Parameters: @s_userDictFile - the filename saved user dictionary text;
Return Value: The number of lexical entry imported successfully
???: What's the writting style of the userDicFile ?
return NLPIR.NLPIR_ImportUserDict(s_userDictFile) def addUserWord(self,s_word):
Function: Add a word to the user dictionary;
Parameters: @s_Word - the word added.
Return Value: 1 if add succeed. Otherwise return 0.
return NLPIR.NLPIR_AddUserWord(s_word) def saveTheUserDict(self):
Function: Save the user dictionary to disk.
Parameters: none;
Return Value: 1 if save succeed,otherwise return 0.
???: Where's the file_direction of "disk" ?
return NLPIR.NLPIR_SaveTheUsrDic() def delUserWord(self,s_word):
Function: Delete a word from the user dictionary;
Parameters: @s_word - the word to be deleted;
Return Value: -1 if the word not exist in the user dictionary, otherwise the handle of the word deleted.
return NLPIR.NLPIR_DelUsrWord(s_word) def exit(self):
Return value: true if succeed, otherwise false.
return NLPIR.NLPIR_Exit() if __name__ == '__main__': O_C_NLPIR_ICTCLAS2013 = C_NLPIR_ICTCLAS2013('UTF-8')
from nlpirpy_ext.C_NLPIR_ICTCLAS2013 import C_NLPIR_ICTCLAS2013 if __name__ == '__main__': o_C_NLPIR_ICTCLAS2013 = C_NLPIR_ICTCLAS2013('UTF-8')
raw_input('\n~!') s_test = '1989年春夏之交的政治风波1989年政治风波24小时降雪量24小时降雨量863计划ABC防护训练APEC会议BB机BP机C2系统C3I系统C3系统C4ISR系统C4I系统CCITT建议'
result = o_C_NLPIR_ICTCLAS2013.stringSeg(s_test) raw_input(result)
[2]NLPIR(ICTCLAS2013) Python版,
