The redaction.c program demonstrates the use of the EdkGetRedactedText
function. It shows how to set up an eduction engine with the specified grammars and entities, and how you can use that engine to produce redacted output when given an input buffer.
#include <stdlib.h> #include <stdio.h> #include <sys/types.h> #include <sys/stat.h> #include <malloc.h> #include <edk.h> #include <string.h> #ifdef _WIN32 #define stat _stat #define off_t _off_t #endif // _WIN32 #define BUFLEN 5120 // Helper function void displayusageinfo() { EDK_VERSION_INFO versionInfo; EdkGetVersion(&versionInfo); if (versionInfo.vChangeSet) printf("INFO: Eduction SDK Sample for SDK version v%s.%i\n", versionInfo.versionString, versionInfo.vChangeSet); else printf("INFO: Eduction SDK Sample for SDK version v%s\n", versionInfo.versionString); printf("INFO: SDK Built: %s\n", versionInfo.buildTime); printf("INFO: Copyright %s\n", versionInfo.copyright); printf("INFO: Usage: sample1 <grammarpath> <entity> <documentpath> <licensepath>\n"); printf("INFO: Parameters:\n"); printf("INFO: <grammarpath> Path to the grammar file that defines matchable\n"); printf("INFO: entities. The grammar file can be in uncompiled (XML)\n"); printf("INFO: or compiled (ECR) format.\n"); printf("INFO: <entities> Comma deliminted list of entities in the grammar file\n"); printf("INFO: to be used for matching.\n"); printf("INFO: <documentpath> Path to the document to be parsed.\n"); printf("INFO: <licensepath> Path to the license file to be used.\n"); } // Helper function int fileExists(const char * const szFileName) { struct stat buf; int exists; if (!szFileName) return 0; exists = stat(szFileName, &buf) == 0; return exists; } // Helper function int checkargs(const int argc, char **argv) { if (argc != 5) { printf("FAIL: Program requires four arguments.\n"); displayusageinfo(); return 0; } printf("INFO: Grammar Path: %s\n", argv[1]); printf("INFO: Entities: %s\n", argv[2]); printf("INFO: Document Path: %s\n", argv[3]); printf("INFO: License Path: %s\n", argv[4]); if (!fileExists(argv[1])) { printf("FAIL: Grammar path does not exist.\n"); return 0; } if (!fileExists(argv[3])) { printf("FAIL: Document path does not exist.\n"); return 0; } if (!fileExists(argv[4])) { printf("FAIL: License path does not exist.\n"); return 0; } return 1; } // Helper function char *readFile(const char * const fn) { struct stat fnInfo; off_t len; FILE *f; char *buf; size_t itemsRead; if (stat(fn, &fnInfo)) { printf("FAIL: Unable to get file size for \"%s\".\n", fn); return NULL; } len = fnInfo.st_size; if (!len) { printf("FAIL: Zero byte file size for \"%s\".\n", fn); return NULL; } f = fopen(fn, "rb"); if (!f) { printf("FAIL: Unable to open file \"%s\".\n", fn); return NULL; } buf = (char*)malloc(len+1); itemsRead = fread(buf, 1, len, f); *(buf+len) = '\0'; fclose(f); if (itemsRead < (size_t)len) { free(buf); printf("FAIL: Unable to read \"%s\".\n", fn); return NULL; } return buf; } // Helper function int readFirst(const char * const fn, FILE **f, char * const buf, const size_t bufLen, off_t * const fileSize, size_t * const bytesRead) { struct stat fnInfo; size_t itemsToRead, itemsRead; if (stat(fn, &fnInfo)) { printf("FAIL: Unable to get file size for \"%s\".\n", fn); return 0; } *fileSize = fnInfo.st_size; if (!*fileSize) { printf("FAIL: Zero byte file size for \"%s\".\n", fn); return 0; } *f = fopen(fn, "rb"); if (!*f) { printf("FAIL: Unable to open file \"%s\".\n", fn); return 0; } itemsToRead = (off_t)bufLen < *fileSize ? bufLen : (size_t)*fileSize; itemsRead = fread(buf, 1, itemsToRead, *f); if (itemsRead < itemsToRead) { fclose(*f); *f = NULL; printf("FAIL: Unable to read \"%s\".\n", fn); return 0; } *bytesRead = itemsRead; return 1; } // Helper function int readNext(const char * const fn, FILE *f, char * const buf, const size_t bufLen, const size_t bytesRemaining, size_t * const bytesRead) { size_t itemsToRead = bufLen < bytesRemaining ? bufLen : bytesRemaining; size_t itemsRead; if (!itemsToRead) { *bytesRead = 0; return 1; } itemsRead = fread(buf, 1, itemsToRead, f); if (itemsRead < itemsToRead) { printf("FAIL: Unable to continue reading \"%s\".\n", fn); return 0; } *bytesRead = itemsRead; return 1; } // Main function int main(int argc, char ** argv) { int32_t nErrCode; EdkEngineHandle pEngine; char *license; const char* szErrorMsg; EdkSessionHandle pSession; size_t bytesRead, bytesRemaining; FILE *f; char buf[BUFLEN], componentText[128]; off_t fileSize; const char *szEntityName, *szEntityText, *szOrigText; size_t textSize, textLength, origSize, origLength, origOffset, offsetLength; double score; size_t nComponents, nComponent; const char* output; printf("INFO: Program loaded.\n"); if (!checkargs(argc, argv)) return -1; printf("INFO: Parameters valid.\n"); if (( nErrCode = EdkEngineCreate( &pEngine )) != EdkSuccess) { printf("Unable to create the EDK Engine. Error code: %d\n" , nErrCode); return -1; } printf("INFO: Engine created.\n"); license = readFile(argv[4]); if (!license) { EdkEngineDestroy( pEngine ); return -1; } printf("INFO: License read.\n"); // Set the license to the eduction engine if ((EdkSetLicenseKey( pEngine, license) != EdkSuccess)) { nErrCode = EdkGetLastEngineError(pEngine, &szErrorMsg); printf("FAIL: %s (%d)\n" , szErrorMsg, nErrCode); free(license); EdkEngineDestroy(pEngine); return -1; } free(license); printf("INFO: License validated.\n"); // Configure the eduction engine // Settings include: // EnableComponents // EnableUniqueMatches // MaxMatchLength // MaxMatchesPerDoc // MatchWholeWord // TokenWithPunctuation // AllowOverlaps // AllowMultipleResults // MatchCases // Locale EdkSetEnableComponents(pEngine, true); // Load resource file // Call this function repeatedly until all required resource files are loaded if ((EdkLoadResourceFile(pEngine, argv[1]) != EdkSuccess)) { nErrCode = EdkGetLastEngineError(pEngine, &szErrorMsg); printf("FAIL: %s (%d)\n" , szErrorMsg, nErrCode); EdkEngineDestroy(pEngine); return -1; } printf("INFO: Resource file loaded.\n"); // Add an entity to match to against // Call this function repeatedly to add all desired entities // The entities to be added must be defined in the resource files added above if ((EdkAddTargetEntity(pEngine, argv[2]) != EdkSuccess)) { nErrCode = EdkGetLastEngineError(pEngine, &szErrorMsg); printf("FAIL: %s (%d)\n", szErrorMsg, nErrCode); EdkEngineDestroy(pEngine); return -1; } printf("INFO: Grammar(s) initialized.\n"); if (!readFirst(argv[3], &f, buf, BUFLEN, &fileSize, &bytesRead)) { EdkSessionDestroy(pSession); EdkEngineDestroy(pEngine); return -1; } bytesRemaining = (size_t)fileSize - bytesRead; printf("INFO: Data file opened and %d byte block reads initiated.\n", BUFLEN); if(EdkGetRedactedText(pEngine, buf, &output) != EdkSuccess) { nErrCode = EdkGetLastEngineError(pEngine, &szErrorMsg); printf("FAIL: %s (%d)\n", szErrorMsg, nErrCode); EdkEngineDestroy(pEngine); return -1; } fclose(f); printf("INFO: Redacted text: %s\n", output); // Ensure that all session handles have been destroyed before calling this EdkEngineDestroy(pEngine); printf("PASS: Program completed without an error.\n"); return 0; }
|