-
Notifications
You must be signed in to change notification settings - Fork 35
/
BoyerMoore.h
127 lines (103 loc) · 2.51 KB
/
BoyerMoore.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
/* adapted from http://en.wikipedia.org/wiki/Boyer–Moore_string_search_algorithm */
#ifndef BOYER_MOORE_H
#define BOYER_MOORE_H
#include <stdint.h>
#include <limits.h>
#include <string.h>
#define ALPHABET_SIZE (1 << CHAR_BIT)
static void compute_prefix(const uint8_t* str, size_t size, int result[]) {
size_t q;
int k;
result[0] = 0;
k = 0;
for (q = 1; q < size; q++) {
while (k > 0 && str[k] != str[q])
k = result[k-1];
if (str[k] == str[q])
k++;
result[q] = k;
}
}
static void prepare_badcharacter_heuristic(const uint8_t *str, size_t size, int result[ALPHABET_SIZE]) {
size_t i;
for (i = 0; i < ALPHABET_SIZE; i++)
result[i] = -1;
for (i = 0; i < size; i++)
result[str[i]] = i;
}
void prepare_goodsuffix_heuristic(const uint8_t *normal, const size_t size, int result[]) {
const uint8_t *left = normal;
const uint8_t *right = left + size;
uint8_t * reversed = new uint8_t[size+1];
uint8_t *tmp = reversed + size;
size_t i;
/* reverse string */
*tmp = 0;
while (left < right)
*(--tmp) = *(left++);
int * prefix_normal = new int[size];
int * prefix_reversed = new int[size];
compute_prefix(normal, size, prefix_normal);
compute_prefix(reversed, size, prefix_reversed);
for (i = 0; i <= size; i++) {
result[i] = size - prefix_normal[size-1];
}
for (i = 0; i < size; i++) {
const int j = size - prefix_reversed[i];
const int k = i - prefix_reversed[i]+1;
if (result[j] > k)
result[j] = k;
}
delete[] reversed;
delete[] prefix_normal;
delete[] prefix_reversed;
}
/*
* Boyer-Moore search algorithm
*/
const uint8_t *boyermoore_search(const uint8_t *haystack, size_t haystack_len, const uint8_t *needle, size_t needle_len) {
/*
* Simple checks
*/
if(haystack_len == 0)
return NULL;
if(needle_len == 0)
return NULL;
if(needle_len > haystack_len)
return NULL;
/*
* Initialize heuristics
*/
int badcharacter[ALPHABET_SIZE];
int * goodsuffix = new int[needle_len+1];
prepare_badcharacter_heuristic(needle, needle_len, badcharacter);
prepare_goodsuffix_heuristic(needle, needle_len, goodsuffix);
/*
* Boyer-Moore search
*/
size_t s = 0;
while(s <= (haystack_len - needle_len))
{
size_t j = needle_len;
while(j > 0 && needle[j-1] == haystack[s+j-1])
j--;
if(j > 0)
{
int k = badcharacter[haystack[s+j-1]];
int m;
if(k < (int)j && (m = j-k-1) > goodsuffix[j])
s+= m;
else
s+= goodsuffix[j];
}
else
{
delete[] goodsuffix;
return haystack + s;
}
}
delete[] goodsuffix;
/* not found */
return NULL;
}
#endif /* BoyerMoore.h */