最笨的方式.不过能工作#include
#include
#include
#define NUM_SENTENCE1024
#define MAX_WORD_LEN32
#define MAX_WORD_NUM1024
#define DICT_DEF_OVER"DICTIONARY_DEFINE_OVER"
struct context {
char ** dicts[MAX_WORD_LEN];
int nwords[MAX_WORD_LEN];
char * origin[NUM_SENTENCE];
char * corrected[NUM_SENTENCE];
};
const char * get_one_word(const char * str, char * word, int len)
{
int n = 0;
const char * p = NULL;
while (*str) {
if (!isalpha(*str)) {
if (n != 0)
break;
}
else {
if (n < len) {
if (p == NULL) p = str;
word[n] = *str;
}
n++;
}
str++;
}
return p;
}
int parse_file(const char * file, struct context * c)
{
FILE * pf = fopen(file, "r");
if (pf == NULL)
return -1;
char buf[1024];
char word[MAX_WORD_LEN];
int len;
const char * p;
int i;
/* read dicts */
while (!feof(pf)) {
memset(buf, 0, 1024);
fgets(buf, 1024, pf);
if (strncmp(buf, DICT_DEF_OVER, strlen(DICT_DEF_OVER)) == 0)
break;
p = buf;
memset(word, 0, MAX_WORD_LEN);
while ((p = get_one_word(p, word, MAX_WORD_LEN)) != NULL) {
len = strlen(word);
p += len;
if (len >= MAX_WORD_LEN) {
printf("omit word: %sn", word);
continue;
}
if (c->dicts[len] == NULL)
c->dicts[len] = (char **) malloc(sizeof(char*) * MAX_WORD_NUM);
if (c->nwords[len] >= MAX_WORD_NUM) {
printf("too much words, omit: %sn", word);
continue;
}
/* check if it's existed */
int found = 0;
for (i=0; i
nwords[len]; i++) {
if (strcmp(c->dicts[len][i], word) == 0) {
found = 1;
break;
}
}
if (found == 0)
c->dicts[len][c->nwords[len]++] = strdup(word);
memset(word, 0, MAX_WORD_LEN);
}
}
/* read origin text */
int n = 0;
while (!feof(pf)) {
memset(buf, 0, 1024);
fgets(buf, 1024, pf);
if (n < NUM_SENTENCE)
c->origin[n++] = strdup(buf);
else break;
}
fclose(pf);
return 0;
}
void correct_words(struct context * c)
{
int i,j,k;
char word[MAX_WORD_LEN] = {0};
const char * p;
int len;
for (i=0; i
if (c->origin[i]) {
c->corrected[i] = strdup(c->origin[i]);
p = c->corrected[i];
while ((p = get_one_word(p, word, MAX_WORD_LEN)) != NULL) {
len = strlen(word);
for (j=0; j
nwords[len]; j++) {
if (word[0] == c->dicts[len][j][0] word[len-1] == c->dicts[len][j][len-1]) {
char * p2 = (char*) p;
for (k=1; k
p2[k] = c->dicts[len][j][k];
break;
}
}
p+= len;
memset(word, 0, MAX_WORD_LEN);
}
}
}
}
int main(int argc, char ** argv)
{
if (argc < 2) return printf("ERROR: need a filenmaen");
struct context c;
memset(&c, 0, sizeof(c));
if (parse_file(argv[1], c) != 0)
return -1;
correct_words(&c);
/* print result */
int i;
for (i=0; i
if ( c.corrected[i] ) {
printf("%s", c.corrected[i]);
}
}
/* release resouces, did not implement anyway */
}