BM算法

发布时间:2014-10-23 23:25:18
来源:分享查询网

比KMP更优秀的字符串匹配算法: void getBadCharacter(char* pattern_string, int pattern_string_length, int* bad_characters, int bad_characters_length) { for (int index = 0; index < bad_characters_length; ++index) { bad_characters[index] = pattern_string_length; } for(int index = 0; index < pattern_string_length; ++index) { bad_characters[ pattern_string[index] ] = pattern_string_length - 1 - index; } } void getSuffixes(char* pattern_string, int* suffixes, int length) { int left = length - 1; int right = 0; suffixes[length - 1] = length; for (int index = length - 2; index >= 0; --index) { if (index > left && suffixes[length - 1 - right + index] < index - left) { suffixes[index] = suffixes[length - 1 - right + index]; } else { if (left > index) left = index; right = index; while (left >= 0 && pattern_string[left] == pattern_string[length - 1 - right + left]) { --left; } suffixes[index] = right - left; } } } void getGoodSuffixes(char* pattern_string, int* good_suffixes, int length) { int* suffixes = new int[length]; getSuffixes(pattern_string, suffixes, length); for (int index = 0; index < length; ++index) { good_suffixes[index] = length; } int matched_index = 0; for(int index = length - 1; index >= 0; --index) { if (index + 1 == suffixes[index]) { while (matched_index < length - 1 - index) { if (good_suffixes[matched_index] == length) { good_suffixes[matched_index] = length - 1 - index; } ++matched_index; } } } for (int index = 0; index < length - 1; ++index) { good_suffixes[ length - 1 - suffixes[index] ] = length - 1 - index; } delete[] suffixes; } int BMMatch(char* string_to_match, char* pattern_string) { int pattern_string_length = strlen(pattern_string); int bad_characters[128]; getBadCharacter(pattern_string, pattern_string_length, bad_characters, 128); int* good_suffixes = new int[pattern_string_length]; getGoodSuffixes(pattern_string, good_suffixes, pattern_string_length); int target_string_length = strlen(string_to_match); int target_string_index = 0; while (target_string_index <= target_string_length - pattern_string_length) { int pattern_string_index = pattern_string_length - 1; while (pattern_string_index >= 0 && string_to_match[target_string_index + pattern_string_index] == pattern_string[pattern_string_index]) { --pattern_string_index; } if (pattern_string_index < 0) { //target_string_index = good_suffixes[0]; return target_string_index; } else { int bad_characters_shift = bad_characters[target_string_index + pattern_string_index] - target_string_length + 1 + pattern_string_index; int good_suffixes_shitf = good_suffixes[pattern_string_index]; target_string_index = bad_characters_shift > good_suffixes_shitf ? bad_characters_shift : good_suffixes_shitf; } } delete[] good_suffixes; return -1; }注释什么的都没完成啊,待扩展...

返回顶部
查看电脑版