这是我对c/c++论坛上的一道面试题给出的答案。可以支持两个连续的'?'通配符,也分别支持'?'放在模式匹配字符串首尾的情况。 对模式匹配字符串和输入字符串的处理都只需要一次遍历。 程序的设计原理就是实现了一个确定性有穷自动机(DFA),这个DFA由一个state数组来具体实现。每个state数组元素对该一个通配符被匹配的情况。如果通配符之后的字符串被匹配,就转移到下一个状态。反之,则只是将匹配的字符串数加1。在输入字符串结束后,如果DFA的状态不是接收状态,则认为字符串不匹配。 程序的总体结构比较简单,便于扩充和维护。 关于这个主题的讨论请见这里:http://bbs.chinaunix.net/forum/viewtopic.php?t=598235
题目要求见注释:
/* * Design and write, using C (without C++ or language extentions), an * implementation for the following function prototype: * * bool matchAndTranslate(const char* inputString, const char* translationEntry, * char* outputString); * * where * inputString is a null-terminated string with a maximum length of 64 * characters in the character set of * * translationEntry is a null-terminated string with a maximum length of 195 * characters in the form of /matchingPattern/translationPattern/ * * matchingPattern is a character string with a maximum length of 64 characters * in the character set of * * '*' is a wildcard in the matchingPattern for 0 or more characters in the * set * * '?' is a wildcard in the matchingPattern for any single character in the * set * * the maximum allowable number of wildcards in matchingPattern is 8 * * translationPattern is a character string with a maximum length of 128 * characters in the character set of * * %% in the translationPattern designates a translation to % * * %1,%2,..,%8 in the translationPattern designates a translation to the first * to eighth corresponding wildcard match in matchingPattern if exists or to a * null string '' if the wildcard match does not exist. There can be * multiple %1, %2, etc. in translationPattern. * * outputString is a buffer of sufficient length provided by caller for output. * * * Function description: * * This function matches the entire inputString to the matchingPattern. * * If a match if found, the inputString will be translated according to the * translationPattern to form the outputString and a true (i.e. 1) will be * returned. * * If a match is not found, a false (i.e. 0) will be returned. * * This function assumes the caller will ensure the inputString, the * translationEntry, and the size of the outputString conform to its * specifications and therefore will not verify. * * This function prefers as few characters as possible in '*' wildcard match. * * * Sample input: * * AAB1CDAB12CD081112AB3CD_OUT * AAB23CD03022002AB3CD3081112_OUT * 2003A02A08AB3CD_OUT * 2003A02A08AB34CD88AB3CD99_ERR * 2003A02A08AB37CD88AB3C992_ERR * * * Sample translationTable: * * / *A*AB?CD*_* /MATCH_[%1]_[%2]_[%3]_[%4]_[%5]/ * * * Corresponding output and outputString: * * true, MATCH_[]_[]_[1]_[AB12CD081112AB3CD]_[OUT] * true, MATCH_[]_[AB23CD03022002]_[3]_[3081112]_[OUT] * true, MATCH_[2003]_[02A08]_[3]_[]_[OUT] * true, MATCH_[2003]_[02A08AB34CD88]_[3]_[99]_[ERR] * false, */
#define _GNU_SOURCE #include #include #include
#define TRUE 1 #define FALSE 0
/* * word: point to the first matched character in inputString * n_word: the length of string word * match_str: point to the first character in translationEntry * n_match_str: the length of string match_str * question_mark_flag: the number of characters that the matched state for '?' * should accept * op: function for corresponding state */ struct state { int n_word; const char *word; int n_match_str; const char *match_str; int question_mark_flag; void (*op)(struct state *this, int *current_state, const char **current_char); };
int accepting_state = 0; char *output_fmt;
// start state void op_start(struct state *this, int *current_state, const char **current_char) { if((NULL == this->match_str) && (*current_state != accepting_state)) { *current_state += 1; *current_char -= 1; return; }
if(!strncmp(*current_char, this->match_str, this->n_match_str) && (*current_state != accepting_state)) { *current_char += this->n_match_str - 1; *current_state += 1; } }
// state for '*' void op_asterisk(struct state *this, int *current_state, const char **current_char) { if (NULL == this->word) this->word = *current_char;
if(!strncmp(*current_char, this->match_str, this->n_match_str) && (*current_state != accepting_state)) { *current_char += this->n_match_str - 1; *current_state += 1; } else { this->n_word++; } }
// state for '?' void op_qusetion_mark(struct state *this, int *current_state, const char **current_char) { struct state *pre;
if (NULL == this->word) this->word = *current_char;
if(!strncmp(*current_char, this->match_str, this->n_match_str) && (this->question_mark_flag == this->n_word) && (*current_state != accepting_state)) { *current_char += this->n_match_str - 1; *current_state += 1; } else if(this->question_mark_flag != this->n_word) { this->n_word++; } else { // back to previous state *current_state -= 1; pre = this - 1; pre->n_word += pre->n_match_str + this->n_word + 1;
// reset current state this->word = NULL; this->n_word = 0; } }
void init_state(struct state *st, int *idx, int *n_str, const char *str_pos) { st[*idx].match_str = (0 == *n_str) ? NULL : str_pos; st[*idx].n_match_str = *n_str; (*idx)++; *n_str = 0; }
void init_dfa(struct state *st, const char *translationEntry) { const char *ch = translationEntry; int idx = 0; // state array index int i = 0; int counter = 0;
ch++; // pass by the first character '/' while('/' != *ch) { if('*' == *ch) { init_state(st, &idx, &i, ch - i); st[idx].op = op_asterisk; } else if('?' == *ch) { counter++; } else { if(counter > 0) { init_state(st, &idx, &i, ch - i - counter); st[idx].op = op_qusetion_mark; st[idx].question_mark_flag = counter; counter = 0; } i++; } ch++; }
if(counter > 0) { init_state(st, &idx, &i, ch - i - counter); st[idx].op = op_qusetion_mark; st[idx].question_mark_flag = counter; }
// accepting_state is the last state accepting_state = idx; // st[0] is the start state st[0].op = op_start; // match pattern string for the accepting_state st[accepting_state].match_str = (0 == i) ? NULL : ch - i; st[accepting_state].n_match_str = i; // output format string output_fmt = strndup(ch + 1, strlen(translationEntry) - (ch - translationEntry + 1) - 1); }
void output_result(struct state *st, char *outputString) { int i = 0, j; const char *cp = output_fmt; int is_matched = FALSE;
while('' != *cp) { if(TRUE == is_matched) { j = atoi(cp); strncpy(outputString + i, st[j].word, st[j].n_word); i += st[j].n_word; is_matched = FALSE; } else if('%' == *cp) { is_matched = TRUE; } else *(outputString + i++) = *cp;
cp++; } }
int matchAndTranslate(const char* inputString, const char* translationEntry, char* outputString) { struct state st[8]; const char *cp = inputString; int current_state = 0;
bzero(st, sizeof(st)); init_dfa(st, translationEntry);
while(*cp != '') { st[current_state].op(&st[current_state], ¤t_state, &cp); cp++; }
if (current_state != accepting_state) { free(output_fmt); return FALSE; } else { output_result(st, outputString); free(output_fmt); return TRUE; } }
int main() { char *in_str[] = { "AAB1CDAB12CD081112AB3CD_OUT", "AAB23CD03022002AB3CD3081112_OUT", "2003A02A08AB3CD_OUT", "2003A02A08AB34CD88AB3CD99_ERR", "2003A02A08AB37CD88AB3C992_ERR" };
char *pattern[] = { "/*A*AB?CD*_OU?/MATCH_[%1]_[%2]_[%3]_[%4]_[%5]/", "/*A*AB?CD*_*/MATCH_[%1]_[%2]_[%3]_[%4]_[%5]/", }; char out_str[1024]; int i,j; int ret;
for (j = 0; j < 2; j++) { printf("pattern: %s\n", pattern[j]); for(i = 0; i < 5; i++) { bzero(out_str, sizeof(out_str)); ret = matchAndTranslate(in_str[i], pattern[j], out_str); if(FALSE == ret) printf("false, \n"); else printf("true, %s\n", out_str); } printf("\n"); } }
|