基础查找算法
朴素查找算法
这是最直观的查找方法,也是最容易理解的算法:
#include <stdio.h> #include <string.h> // 朴素查找算法的完整实现 char* naive_strstr(const char *haystack, const char *needle) { // 输入验证 if (haystack == NULL || needle == NULL) { return NULL; } // 空字符串处理 if (*needle == '\0') { return (char*)haystack; // 空串是任何字符串的子串 } // 主查找逻辑 const char *h = haystack; // haystack指针 const char *n = needle; // needle指针 const char *current_haystack_start = haystack; // 外层循环:遍历haystack的每个可能起始位置 while (*current_haystack_start != '\0') { h = current_haystack_start; n = needle; // 内层循环:比较当前起始位置的字符串 while (*h != '\0' && *n != '\0' && *h == *n) { h++; n++; } // 检查是否找到完整匹配 if (*n == '\0') { // needle的所有字符都匹配了 return (char*)current_haystack_start; } // 如果haystack剩余长度小于needle长度,提前结束 size_t remaining_len = strlen(current_haystack_start); size_t needle_len = strlen(needle); if (remaining_len < needle_len) { break; } current_haystack_start++; // 移动到下一个起始位置 } return NULL; // 未找到 } // 带详细调试信息的朴素查找 char* naive_strstr_debug(const char *haystack, const char *needle, int *comparisons, int *attempts) { *comparisons = 0; *attempts = 0; if (haystack == NULL || needle == NULL) return NULL; if (*needle == '\0') return (char*)haystack; size_t haystack_len = strlen(haystack); size_t needle_len = strlen(needle); printf("查找开始:\n"); printf(" Haystack: \"%s\" (长度: %zu)\n", haystack, haystack_len); printf(" Needle: \"%s\" (长度: %zu)\n", needle, needle_len); printf(" 最大比较次数: %zu\n\n", haystack_len - needle_len + 1); for (size_t i = 0; i <= haystack_len - needle_len; i++) { (*attempts)++; printf("尝试 %d: 起始位置 %zu\n", *attempts, i); size_t j; for (j = 0; j < needle_len; j++) { (*comparisons)++; printf(" 比较 haystack[%zu]='%c' 和 needle[%zu]='%c'", i + j, haystack[i + j], j, needle[j]); if (haystack[i + j] != needle[j]) { printf(" -> 不匹配\n"); break; } printf(" -> 匹配\n"); } if (j == needle_len) { printf(" 找到匹配!位置: %zu\n", i); return (char*)(haystack + i); } } printf("未找到匹配\n"); return NULL; } // 性能分析函数 void analyze_naive_search() { printf("=== 朴素查找算法性能分析 ===\n\n"); // 测试用例 struct { const char *haystack; const char *needle; const char *description; } test_cases[] = { {"ABCDABDABCDABCDABD", "ABCDABD", "最佳情况:快速找到"}, {"AAAAAAAAAAAAAB", "AAAB", "最坏情况:大量回退"}, {"Hello World", "World", "正常情况"}, {"abc", "abcd", "needle比haystack长"}, {"", "test", "空haystack"}, {"test", "", "空needle"} }; for (int i = 0; i < sizeof(test_cases)/sizeof(test_cases[0]); i++) { printf("测试 %d: %s\n", i+1, test_cases[i].description); printf("Haystack: \"%s\"\n", test_cases[i].haystack); printf("Needle: \"%s\"\n", test_cases[i].needle); int comparisons, attempts; char *result = naive_strstr_debug(test_cases[i].haystack, test_cases[i].needle, &comparisons, &attempts); printf("结果: %s\n", result ? "找到" : "未找到"); printf("总尝试次数: %d, 总比较次数: %d\n\n", attempts, comparisons); // 复杂度分析 size_t n = strlen(test_cases[i].haystack); size_t m = strlen(test_cases[i].needle); printf("理论复杂度:\n"); printf(" 最佳情况: O(n+m)\n"); printf(" 最坏情况: O(n*m)\n"); printf(" 平均情况: O(n+m)\n\n"); } }朴素算法的可视化演示
void visualize_search_process() { printf("=== 朴素查找算法可视化演示 ===\n\n"); const char *haystack = "ABABDABACDABABCABAB"; const char *needle = "ABABCABAB"; printf("Haystack: %s\n", haystack); printf("Needle: %s\n\n", needle); size_t h_len = strlen(haystack); size_t n_len = strlen(needle); printf("查找过程:\n"); printf("索引: "); for (size_t i = 0; i < h_len; i++) { printf("%2zu", i); } printf("\nHaystack: "); for (size_t i = 0; i < h_len; i++) { printf("%2c", haystack[i]); } printf("\n\n"); for (size_t i = 0; i <= h_len - n_len; i++) { printf("尝试从位置 %zu 开始:\n", i); printf("Haystack: "); // 显示当前对齐 for (size_t j = 0; j < h_len; j++) { if (j == i) printf("["); printf("%c", haystack[j]); if (j == i + n_len - 1) printf("]"); } printf("\n"); printf("Needle: "); for (size_t j = 0; j < i; j++) printf(" "); for (size_t j = 0; j < n_len; j++) { printf("%c", needle[j]); } printf("\n"); // 进行比较 int match = 1; for (size_t j = 0; j < n_len; j++) { if (haystack[i + j] != needle[j]) { match = 0; printf("在位置 %zu 不匹配: '%c' != '%c'\n\n", j, haystack[i + j], needle[j]); break; } } if (match) { printf("完全匹配!找到位置: %zu\n\n", i); break; } } }4.1.2 KMP算法
KMP算法通过利用已匹配的部分信息,避免不必要的回溯,显著提高了查找效率。
4.1.2.1 前缀函数
c
#include <stdlib.h> #include <string.h> // 计算部分匹配表(LPS:最长前缀后缀) int* compute_lps(const char *pattern, int *lps_length) { int pattern_len = strlen(pattern); int *lps = (int*)malloc(pattern_len * sizeof(int)); if (lps == NULL) return NULL; // lps[0] 总是 0 lps[0] = 0; // length 表示当前最长前缀后缀的长度 int length = 0; int i = 1; while (i < pattern_len) { if (pattern[i] == pattern[length]) { // 字符匹配,长度加1 length++; lps[i] = length; i++; } else { if (length != 0) { // 回退到前一个最长前缀后缀的长度 // 这个步骤是关键,避免了从头开始匹配 length = lps[length - 1]; } else { // 没有匹配的前缀,设置lps[i]为0 lps[i] = 0; i++; } } } *lps_length = pattern_len; return lps; } // 可视化LPS表计算过程 void visualize_lps_computation(const char *pattern) { printf("=== LPS表计算过程可视化 ===\n\n"); printf("模式串: %s\n", pattern); printf("长度: %zu\n\n", strlen(pattern)); int len = strlen(pattern); int *lps = (int*)malloc(len * sizeof(int)); lps[0] = 0; printf("初始化: lps[0] = 0\n\n"); int length = 0; for (int i = 1; i < len; i++) { printf("计算 lps[%d]:\n", i); printf(" 当前字符: pattern[%d] = '%c'\n", i, pattern[i]); printf(" 当前最长前缀后缀长度: length = %d\n", length); printf(" 比较: pattern[%d]('%c') vs pattern[%d]('%c')", i, pattern[i], length, pattern[length]); if (pattern[i] == pattern[length]) { length++; lps[i] = length; printf(" -> 匹配,length++ = %d\n", length); } else { if (length != 0) { printf(" -> 不匹配,回退: length = lps[%d-1] = %d\n", length, lps[length - 1]); length = lps[length - 1]; i--; // 重新尝试当前i } else { lps[i] = 0; printf(" -> 不匹配,lps[%d] = 0\n", i); } } } printf("\n最终LPS表:\n"); printf("索引: "); for (int i = 0; i < len; i++) { printf("%3d", i); } printf("\n字符: "); for (int i = 0; i < len; i++) { printf("%3c", pattern[i]); } printf("\nLPS : "); for (int i = 0; i < len; i++) { printf("%3d", lps[i]); } printf("\n\n"); // 解释LPS表的含义 printf("LPS表解释:\n"); for (int i = 0; i < len; i++) { if (lps[i] > 0) { printf(" lps[%d]=%d: 前缀 \"%.*s\" 也是后缀\n", i, lps[i], lps[i], pattern); } } free(lps); }4.1.2.2 KMP查找算法实现
c // KMP算法主函数 char* kmp_search(const char *text, const char *pattern) { if (text == NULL || pattern == NULL) return NULL; int text_len = strlen(text); int pattern_len = strlen(pattern); // 特殊情况处理 if (pattern_len == 0) return (char*)text; if (pattern_len > text_len) return NULL; // 计算LPS表 int lps_len; int *lps = compute_lps(pattern, &lps_len); if (lps == NULL) return NULL; printf("KMP查找开始:\n"); printf("文本: \"%s\" (长度: %d)\n", text, text_len); printf("模式: \"%s\" (长度: %d)\n", pattern, pattern_len); printf("LPS表已计算完成\n\n"); int i = 0; // text的索引 int j = 0; // pattern的索引 int comparisons = 0; while (i < text_len) { comparisons++; printf("比较: text[%d]='%c' vs pattern[%d]='%c'", i, text[i], j, pattern[j]); if (pattern[j] == text[i]) { printf(" -> 匹配\n"); i++; j++; if (j == pattern_len) { printf("\n找到匹配!位置: %d\n", i - j); printf("总比较次数: %d\n", comparisons); free(lps); return (char*)(text + (i - j)); } } else { printf(" -> 不匹配\n"); if (j != 0) { // 利用LPS表跳转,避免回溯 printf(" 使用LPS表跳转: j = lps[%d-1] = %d\n", j, lps[j - 1]); j = lps[j - 1]; } else { i++; } } } printf("\n未找到匹配\n"); printf("总比较次数: %d\n", comparisons); free(lps); return NULL; } // 优化版本的KMP(减少打印输出) char* kmp_search_optimized(const char *text, const char *pattern) { if (text == NULL || pattern == NULL) return NULL; int n = strlen(text); int m = strlen(pattern); if (m == 0) return (char*)text; if (m > n) return NULL; // 计算LPS int *lps = (int*)malloc(m * sizeof(int)); if (lps == NULL) return NULL; // 计算LPS(内联版本) lps[0] = 0; int len = 0; int i = 1; while (i < m) { if (pattern[i] == pattern[len]) { len++; lps[i] = len; i++; } else { if (len != 0) { len = lps[len - 1]; } else { lps[i] = 0; i++; } } } // KMP搜索 i = 0; // text索引 int j = 0; // pattern索引 while (i < n) { if (pattern[j] == text[i]) { i++; j++; if (j == m) { free(lps); return (char*)(text + (i - j)); } } else { if (j != 0) { j = lps[j - 1]; } else { i++; } } } free(lps); return NULL; }