Rabin-Karp算法(也可以叫Karp-Rabin算法),由Richard M. Karp和Michael O. Rabin在1987年发表,它也是用来解决多模式串匹配问题的。它的实现方式有点与众不同,首先是计算两个字符串的哈希值,然后通过比较这两个哈希值的大小来判断是否出现匹配。
哈希算法可以有很多种不同的形式,它可能包含ASCII码字符以便对数字进行转化,但也可能是别的形式。我们唯一需要的就是将一个字符串(模式串)转化成为能够快速进行比较的哈希值。以"hello world"为例,假设它的哈希值hash('hello world')=12345。那么如果hash('he')=1,那么我们就可以说模式串"he"包含在文本"hello world"中。由此,我们可以每次从文本中取出长度为m(m为模式串的长度)的子串,然后将该子串进行哈希,并将其哈希值与模式串的哈希值进行比较。
示例代码:
package main
import (
"fmt"
)
const base = 16777619
func Search(txt string, patterns []string) []string {
in := indices(txt, patterns)
matches := make([]string, len(in))
i := 0
for j, p := range patterns {
if _, ok := in[j]; ok {
matches[i] = p
i++
}
}
return matches
}
func indices(txt string, patterns []string) map[int]int {
n, m := len(txt), minLen(patterns)
matches := make(map[int]int)
if n < m || len(patterns) == 0 {
return matches
}
var mult uint32 = 1 // mult = base^(m-1)
for i := 0; i < m-1; i++ {
mult = (mult * base)
}
hp := hashPatterns(patterns, m)
h := hash(txt[:m])
for i := 0; i < n-m+1 && len(hp) > 0; i++ {
if i > 0 {
h = h - mult*uint32(txt[i-1])
h = h*base + uint32(txt[i+m-1])
}
if mps, ok := hp[h]; ok {
for _, pi := range mps {
pat := patterns[pi]
e := i + len(pat)
if _, ok := matches[pi]; !ok && e <= n && pat == txt[i:e] {
matches[pi] = i
}
}
}
}
return matches
}
func hash(s string) uint32 {
var h uint32
for i := 0; i < len(s); i++ {
h = (h*base + uint32(s[i]))
}
return h
}
func hashPatterns(patterns []string, l int) map[uint32][]int {
m := make(map[uint32][]int)
for i, t := range patterns {
h := hash(t[:l])
if _, ok := m[h]; ok {
m[h] = append(m[h], i)
} else {
m[h] = []int{i}
}
}
return m
}
func minLen(patterns []string) int {
if len(patterns) == 0 {
return 0
}
m := len(patterns[0])
for i := range patterns {
if m > len(patterns[i]) {
m = len(patterns[i])
}
}
return m
}
func main() {
txt := "Australia is a country and continent surrounded by the Indian and Pacific oceans."
patterns := []string{"and", "the", "surround", "Pacific", "Germany"}
matches := Search(txt, patterns)
fmt.Println(matches)
}
输出:
[and the surround Pacific]