ICTCLAS.ORG
2008
中科院计算所ICTCLAS2008接口文档...
1
目录...
3
1.C++接口...
4
1.1
ICTCLAS_Init.
4
1.2
ICTCLAS_Exit.
5
1.3
ICTCLAS_ImportUserDict.
6
1.4
ICTCLAS_ParagraphProcess.
8
1.5
ICTCLAS_ParagraphProcessA.
9
1.6 ICTCLAS_FileProcess.
11
1.7 ICTCLAS_GetParagraphProcessAWordCount
12
1.8 ICTCLAS_ ParagraphProcessAW...
15
2.JNI接口...
15
2.1
ICTCLAS_Init.
15
2.2
ICTCLAS_Exit.
17
2.3
ICTCLAS_ImportUserDict.
18
2.4
ICTCLAS_ParagraphProcess.
20
2.5
ICTCLAS_FileProcess.
21
2.6
ICTCLAS_IsWord.
22
2.7
ICTCLAS_GetUniProb.
24
1.C++接口
Init the analyzer and prepare necessary data for
ICTCLAS according the configure file.
bool
ICTCLAS_Init(const char * sInitDirPath=0);
Routine
|
Required Header
|
ICTCLAS_Init
|
<ICTCLAS30.h>
|
Return Value
Return true if init succeed. Otherwise return false.
Parameters
sInitDirPath: Initial Directory Path, where file
Configure.xml and Data directory stored.
the default value is 0, it indicates the initial
directory is current working directory path
Remarks
The ICTCLAS_Init function must be invoked before
any operation with ICTCLAS. The whole system need call the function only once
before starting ICTCLAS. When stopping the system and make no more operation,
ICTCLAS_Exit should be invoked to destroy all working buffer. Any operation
will fail if init do not succeed.
ICTCLAS_Init fails
mainly because of two reasons: 1) Required data is incompatible or missing 2)
Configure file missing or invalid parameters. Moreover, you could learn more
from the log file ictclas.log in the default directory.
Example
#include "ICTCLAS30.h"
#include <stdio.h>
#include <string.h>
int main(int argc, char* argv[])
{
//Sample1: Sentence or paragraph lexical analysis with only one result
char sSentence[2000];
const char * sResult;
if(!ICTCLAS_Init())
{
printf("Init fails\n");
return -1;
}
printf("Input sentence now('q' to quit)!\n");
scanf("%s",sSentence);
while(_stricmp(sSentence,"q")!=0)
{
sResult =
ICTCLAS_ParagraphProcess(sString,0);
printf("%s\nInput string now('q' to quit)!\n", sResult);
scanf("%s",sSentence);
}
ICTCLAS_Exit();
return 0;
}
Output
Exit the program and free all resources and destroy
all working buffer used in ICTCLAS.
bool
ICTCLAS_Exit();
Routine
|
Required Header
|
ICTCLAS_Exit
|
<ICTCLAS30.h>
|
Return Value
Return true if succeed. Otherwise return false.
Parameters
none
Remarks
The ICTCLAS_Exit function must be invoked while
stopping the system and make no more operation. And call ICTCLAS_Init function
to restart ICTCLAS.
Example
#include "ICTCLAS30.h"
#include <stdio.h>
#include <string.h>
int main(int argc, char* argv[])
{
//Sample1: Sentence or paragraph lexical analysis with only one result
char sSentence[2000];
const char * sResult;
if(!ICTCLAS_Init())
{
printf("Init fails\n");
return -1;
}
printf("Input sentence now('q' to quit)!\n");
scanf("%s",sSentence);
while(_stricmp(sSentence,"q")!=0)
{
sResult =
ICTCLAS_ParagraphProcess(sString,1);
printf("%s\nInput string now('q' to quit)!\n", sResult);
scanf("%s",sSentence);
}
ICTCLAS_Exit();
return 0;
}
Output
Import user-defined dictionary from a text file.
unsigned
int ICTCLAS_ImportUserDict(const char *sFilename);
Routine
|
Required Header
|
ICTCLAS_ImportUserDict
|
<ICTCLAS30.h>
|
Return Value
The number of lexical entry imported successfully
Parameters
sFilename: Text filename for user dictionary
Remarks
The ICTCLAS_ImportUserDict function works properly
only if ICTCLAS_Init succeeds.
The text dictionary file foramt see User-defined Lexicon.
You only need to invoke the function while you want to
make some change in your customized lexicon or first use the lexicon. After you
import once and make no change again, ICTCLAS will load the lexicon
automatically if you set UserDict "on" in the configure file. While
you turn UserDict "off", user-defined lexicon would not be applied.
Example
#include <string.h>
int main(int argc, char* argv[])
{
//Sample1: Sentence or paragraph lexical analysis with only one result
char sSentence[2000]="张华平于
1978年3月9日
出生于江西省波阳县。";
const char * sResult;
if(!ICTCLAS_Init())
{
printf("Init fails\n");
return -1;
}
//Sample4: User-defined dictionary
sResult=ICTCLAS_ParagraphProcess("1989年春夏之交的政治风波1989年政治风波24小时降雪量24小时降雨量863计划ABC防护训练APEC会议BB机BP机C2系统C3I系统C3系统C4ISR系统C4I系统CCITT建议",1);
printf("Before Adding User-defined lexicon, the result
is:\n%s\n",sResult);
unsigned int
nItems=ICTCLAS_ImportUserDict("userdict.txt");//Import user
dictionary
printf("%d user-defined lexical entries added!\n",nItems);
sResult=ICTCLAS_ParagraphProcess("1989年春夏之交的政治风波1989年政治风波24小时降雪量24小时降雨量863计划ABC防护训练APEC会议BB机BP机C2系统C3I系统C3系统C4ISR系统C4I系统CCITT建议",1);
printf("After Adding User-defined lexicon, the result
is:\n%s\n",sResult);
ICTCLAS_Exit();
return 0;
}
Output
Before Adding User-defined lexicon, the result is:
1989年/t 春/tg 夏/tg 之/uzhi 交/ng 的/ude1 政治/n 风波/n 1989年/t 政治/n 风波/n
24/m 小时/n 降雪/vn 量/n 24/m 小时/q 降雨量/n
863/m 计划ABC防护训练APEC会议BB机B
P机C2系统C3I系统C3系统C4ISR系统C4I/nt 系统/n CCITT/x 建议/n
14321 user-defined lexical entries added!
After Adding User-defined lexicon, the result is:
1989年春夏之交的政治风波/n 1989年政治风波/n
24小时降雪量/n 24小时降雨量/n 863计划/n ABC防护训练/vn APE
C会议/nz BB机/n BP机/n C2系统/n C3I系统/n C3系统/n C4ISR系统/n C4I系统/n
CCITT建议/t
Process a paragraph, and return the result buffer pointer
const char *
ICTCLAS_ParagraphProcess(const char *sParagraph,int bPOStagged=1);
Routine
|
Required Header
|
ICTCLAS_ParagraphProcess
|
<ICTCLAS30.h>
|
Return Value
Return the pointer of result buffer.
Parameters
sParagraph: The source paragraph
bPOStagged: Judge whether need POS tagging, 0 for no
tag; 1 for tagging; default:1.
Remarks
The ICTCLAS_ParagraphProcess function works
properly only if ICTCLAS_Init succeeds.
Example
#include "ICTCLAS30.h"
#include <stdio.h>
#include <string.h>
int main(int argc, char* argv[])
{
//Sample1: Sentence or paragraph lexical analysis with only one result
char sSentence[2000];
const char *sResult;
if(!ICTCLAS_Init())
{
printf("Init fails\n");
return -1;
}
printf("Input sentence now('q' to quit)!\n");
scanf("%s",sSentence);
while(_stricmp(sSentence,"q")!=0)
{
sResult=ICTCLAS_ParagraphProcess(sSentence,1);
printf("%s\nInput string now('q' to quit)!\n",sResult);
scanf("%s",sSentence);
}
ICTCLAS_Exit();
return 0;
}
Output
result_t *
ICTCLAS_ParagraphProcessA(const char *sParagraph,int *pResultCount);
Routine
|
Required Header
|
ICTCLAS_ParagraphProcessA
|
<ICTCLAS30.h>
|
Return Value
the pointer of result vector, it is managed by system,
user cannot alloc and free it
struct result_t{
int start; //start position,词语在输入句子中的开始位置
int length; //length,词语的长度
unsigned char POS_id;//word type,词性ID值,可以快速的获取词性表
int word_ID; //如果是未登录词,设成0或者-1
};
Parameters
sParagraph: The source paragraph
pResultCount: pointer to result vector size
Remarks
The ICTCLAS_ParagraphProcessA function works
properly only if ICTCLAS_Init succeeds.
Example
#include "ICTCLAS30.h"
#include <stdio.h>
#include <string.h>
int main(int argc, char* argv[])
{
//Sample1: Sentence or paragraph lexical analysis with only one result
char sSentence[2000];
const result_t *pVecResult;
int nCount;
if(!ICTCLAS_Init())
{
printf("Init fails\n");
return -1;
}
printf("Input sentence now!\n");
scanf("%s",sSentence);
while(_stricmp(sSentence,"q")!=0)
{
pVecResult=ICTCLAS_ParagraphProcessA(sInput,&nCount);
for (int i=0;i<nCount;i++)
{
printf("Start=%d Length=%d Word_ID=%d POS_ID=%d\n",
pVecResult[i].start,
pVecResult[i].length,
pVecResult[i].word_ID,
pVecResult[i].POS_id);
}
}
ICTCLAS_Exit();
return 0;
}
Output
Process a text file
bool
ICTCLAS_FileProcess(const char *sSourceFilename,const char *sResultFilename,int
bPOStagged=1);
Routine
|
Required Header
|
ICTCLAS_FileProcess
|
<ICTCLAS30.h>
|
Return Value
Return true if processing succeed. Otherwise return
false.
Parameters
sSourceFilename: The source file name to be
analysized;
sResultFilename: The result file name to store the results.
bPOStagged:
Judge whether need POS tagging, 0 for no tag; 1 for tagging; default:1.
Remarks
The ICTCLAS_FileProcess function works properly
only if ICTCLAS_Init succeeds.
The output format is customized in ICTCLAS configure.
Example
#include "ICTCLAS30.h"
int main(int argc, char* argv[])
{
//Sample2: File text lexical analysis
if(!ICTCLAS_Init())
{
printf("Init fails\n");
return -1;
}
printf("Input sentence now('q' to quit)!\n");
ICTCLAS_FileProcess("Test.txt","Test_result.txt",1);
ICTCLAS_Exit();
return 0;
}
Output
Get ProcessAWordCount, API for C#
int
ICTCLAS_GetParagraphProcessAWordCount(const char *sParagraph);
Routine
|
Required Header
|
ICTCLAS_FileProcess
|
<ICTCLAS30.h>
|
Return Value
Return the paragraph word count.
Parameters
sParagraph:
The source paragraph
Remarks
TheICTCLAS_GetParagraphProcessAWordCount function works properly
only if ICTCLAS_Init succeeds.
The output format is customized in ICTCLAS configure.
Example
using System;
using System.IO;
using System.Runtime.InteropServices;
namespace win_csharp
{
[StructLayout(LayoutKind.Explicit)]
public
struct result_t
{
[FieldOffset(0)]
public int start;
[FieldOffset(4)]
public int length;
[FieldOffset(8)]
public int POS_id;
[FieldOffset(12)]
public int word_ID;
}
///
<summary>
///
Class1 的摘要说明。
///
</summary>
class
Class1
{
const
string path = @"ICTCLAS30.dll";
[DllImport(path,CharSet=CharSet.Ansi,EntryPoint="ICTCLAS_Init")]
public
static extern bool ICTCLAS_Init(String sInitDirPath);
[DllImport(path,CharSet=CharSet.Ansi,EntryPoint="ICTCLAS_ParagraphProcess")]
public
static extern String ICTCLAS_ParagraphProcess(String sParagraph,int
bPOStagged);
[DllImport(path,CharSet=CharSet.Ansi,EntryPoint="ICTCLAS_Exit")]
public
static extern bool ICTCLAS_Exit();
[DllImport(path,CharSet=CharSet.Ansi,EntryPoint="ICTCLAS_ImportUserDict")]
public
static extern int ICTCLAS_ImportUserDict(String sFilename);