The cjknormalization.c program builds upon basic.c and shows how you can perform normalization of Chinese, Japanese, and Korean input text before you process it in an eduction session.
#include <stdlib.h> #include <stdio.h> #include <sys/types.h> #include <sys/stat.h> #include <malloc.h> #include <edk.h> #include <string.h> #ifdef _WIN32 #define stat _stat #define off_t _off_t #endif // _WIN32 #define BUFLEN 5120 // Helper function void displayusageinfo() { EDK_VERSION_INFO versionInfo; EdkGetVersion(&versionInfo); if (versionInfo.vChangeSet) printf("INFO: Eduction SDK Sample for SDK version v%s.%i\n", versionInfo.versionString, versionInfo.vChangeSet); else printf("INFO: Eduction SDK Sample for SDK version v%s\n", versionInfo.versionString); printf("INFO: SDK Built: %s\n", versionInfo.buildTime); printf("INFO: Copyright %s\n", versionInfo.copyright); printf("INFO: Usage: cjknormalization.exe <grammarpath> <documentpath> <licensepath>\n"); printf("INFO: Parameters:\n"); printf("INFO: <grammarpath> Path to the grammar file to be used.\n"); printf("INFO: <documentpath> Path to the document to be parsed.\n"); printf("INFO: <licensepath> Path to the license file to be used.\n"); } // Helper function int fileExists(const char * const szFileName) { struct stat buf; int exists; if (!szFileName) return 0; exists = stat(szFileName, &buf) == 0; return exists; } // Helper function int checkargs(const int argc, char **argv) { if (argc != 4) { printf("FAIL: Program requires four arguments.\n"); displayusageinfo(); return 0; } printf("INFO: Grammar Path: %s\n", argv[1]); printf("INFO: Document Path: %s\n", argv[2]); printf("INFO: License Path: %s\n", argv[3]); if (!fileExists(argv[1])) { printf("FAIL: Grammar path does not exist.\n"); return 0; } if (!fileExists(argv[2])) { printf("FAIL: Document path does not exist.\n"); return 0; } if (!fileExists(argv[3])) { printf("FAIL: License path does not exist.\n"); return 0; } return 1; } // Helper function char *readFile(const char * const fn) { struct stat fnInfo; off_t len; FILE *f; char *buf; size_t itemsRead; if (stat(fn, &fnInfo)) { printf("FAIL: Unable to get file size for \"%s\".\n", fn); return NULL; } len = fnInfo.st_size; if (!len) { printf("FAIL: Zero byte file size for \"%s\".\n", fn); return NULL; } f = fopen(fn, "rb"); if (!f) { printf("FAIL: Unable to open file \"%s\".\n", fn); return NULL; } buf = (char*)malloc(len+1); itemsRead = fread(buf, 1, len, f); *(buf+len) = '\0'; fclose(f); if (itemsRead < (size_t)len) { free(buf); printf("FAIL: Unable to read \"%s\".\n", fn); return NULL; } return buf; } // Main function int main(int argc, char ** argv) { int32_t nErrCode; EdkEngineHandle pEngine; char *license; const char* szErrorMsg; EdkSessionHandle pSession; char* buf = NULL; off_t fileSize; const char *szEntityName, *szEntityText, *szOrigText; size_t textSize, textLength, origSize, origLength, origOffset, offsetLength; double score; const char* szCJKNormalizedText = NULL; const char* szCJKNormalizationOptions = "HWNum,HWAlpha"; printf("INFO: Program loaded.\n"); if (!checkargs(argc, argv)) return -1; printf("INFO: Parameters valid.\n"); if (( nErrCode = EdkEngineCreate( &pEngine )) != EdkSuccess) { printf("Unable to create the EDK Engine. Error code: %d\n" , nErrCode); return -1; } printf("INFO: Engine created.\n"); license = readFile(argv[3]); if (!license) { EdkEngineDestroy( pEngine ); return -1; } printf("INFO: License read.\n"); // Set the license to the eduction engine if ((EdkSetLicenseKey( pEngine, license) != EdkSuccess)) { nErrCode = EdkGetLastEngineError(pEngine, &szErrorMsg); printf("FAIL: %s (%d)\n" , szErrorMsg, nErrCode); free(license); EdkEngineDestroy(pEngine); return -1; } free(license); printf("INFO: License validated.\n"); // Load resource file // Call this function repeatedly until all required resource files are loaded if ((EdkLoadResourceFile(pEngine, argv[1]) != EdkSuccess)) { nErrCode = EdkGetLastEngineError(pEngine, &szErrorMsg); printf("FAIL: %s (%d)\n" , szErrorMsg, nErrCode); EdkEngineDestroy(pEngine); return -1; } printf("INFO: Resource file loaded.\n"); // Add an entity to match to against // Call this function repeatedly to add all desired entities // The entities to be added must be defined in the resource files added above if ((EdkAddTargetEntity(pEngine, "e/e") != EdkSuccess)) { nErrCode = EdkGetLastEngineError(pEngine, &szErrorMsg); printf("FAIL: %s (%d)\n", szErrorMsg, nErrCode); EdkEngineDestroy(pEngine); return -1; } printf("INFO: Grammar(s) initialized.\n"); // Create an eduction session assosicated with this eduction engine // Multiple sessions can be created and concurrent processing in multithreaded applications if (EdkSessionCreate(pEngine, &pSession) != EdkSuccess) { nErrCode = EdkGetLastEngineError(pEngine, &szErrorMsg); printf("FAIL: %s (%d)\n" , szErrorMsg, nErrCode); EdkEngineDestroy(pEngine); return -1; } printf("INFO: Session created.\n"); buf = readFile(argv[2]); if (!buf) { EdkSessionDestroy(pSession); EdkEngineDestroy(pEngine); return -1; } printf("INFO: Data file opened and %d byte block reads initiated.\n", BUFLEN); if (EdkCJKNormalizeText(pEngine, buf, &szCJKNormalizedText, szCJKNormalizationOptions) != EdkSuccess) { nErrCode = EdkGetLastEngineError(pEngine, &szErrorMsg); printf("FAIL: %s (%d)\n" , szErrorMsg, nErrCode); EdkSessionDestroy(pSession); EdkEngineDestroy(pEngine); return -1; } printf("INFO: Input buffer normalized.\n"); // Add input data // EdkAddInputText is called repeatedly for as many times as needed until all the input has been exhausted // The input data must be UTF-8 encoded. // Note: An alternative method of adding input data is to create a data input stream printf("INFO: Adding data block to engine.\n"); if ((EdkAddInputText( pSession, szCJKNormalizedText, strlen(szCJKNormalizedText), true)) != EdkSuccess) { nErrCode = EdkGetLastSessionError(pSession, &szErrorMsg); printf("FAIL: %s (%d)\n" , szErrorMsg, nErrCode); EdkSessionDestroy(pSession); EdkEngineDestroy(pEngine); return -1; } printf("INFO: Data block added.\n"); // Get a match // This is called repeatedly to get all matches while (EdkGetNextMatch(pSession) == EdkSuccess) { // While we have a match, obtain all required information about the match EdkGetMatchEntityName(pSession, &szEntityName); EdkGetMatchOrigOffset(pSession, &origOffset); EdkGetMatchOrigOffsetLength(pSession, &offsetLength); EdkGetMatchScore(pSession, &score); EdkGetMatchTextSize(pSession, &textSize); EdkGetMatchTextLength(pSession, &textLength); EdkGetMatchOrigSize(pSession, &origSize); EdkGetMatchOrigLength(pSession, &origLength); EdkGetMatchOrigText(pSession, &szOrigText); EdkGetMatchText(pSession, &szEntityText); printf("INFO: EntityName=\"%s\" Offset=\"%u\" OffsetLength=\"%u\"\n", szEntityName, origOffset, offsetLength); printf("INFO: Score=\"%04.2f\" NormalizedTextSize=\"%u\" NormalizedTextLength=\"%u\"\n", score, textSize, textLength); printf("INFO: OriginalTextSize=\"%u\" OriginalTextLength=\"%u\"\n", origSize, origLength); printf("INFO: Original Text=\"%s\"\n", szOrigText); printf("INFO: Normalized Text=\"%s\"\n", szEntityText); } printf("INFO: Matching on block complete.\n"); nErrCode = EdkGetLastSessionError(pSession, &szErrorMsg); if ((nErrCode != EdkNoMatch)) { printf("FAIL: %s (%d)\n" , szErrorMsg, nErrCode); EdkSessionDestroy(pSession); EdkEngineDestroy(pEngine); return -1; } // Destroy the session handle and release the resource EdkSessionDestroy(pSession); // Ensure that all session handles have been destroyed before calling this EdkEngineDestroy(pEngine); printf("PASS: Program completed without an error.\n"); return 0; }
|