16番会議室「玉石混淆みんなで作るSample蔵」に寄せられたサンプル
"EBMユニット(implementation)"
この発言は #01367 ぜえた さんの高速(?)複数文字列検索(拡張BM法) に対するコメントです
// 拡張 Boyer-Moore法を使った複数文字列検索。(implementation)
implementation
constructor TEBMSearch.Create;
begin
inherited Create;
FSearch := NonAnsiSearch;
CmpFlags := 0;
FStep := 4096;
FPatterns := TStringList.Create;
TStringList(FPatterns).OnChange := PatternsChanged;
end;
destructor TEBMSearch.Destroy; //override;
begin
FreeMem(FAccept);
FreeMem(FSkip);
FreeMem(FNext);
FPatterns.Free;
inherited Destroy;
end;
procedure TEBMSearch.SearchInit(S, E: PChar);
begin
if not FTableValid then CreateTable;
FStartPos := S;
FEndPos := E;
if FMinLen <> MaxInt then
FCurPos := S + FMinLen - 1
else
FCurPos := E;
FStepPos := FCurPos + FStep;
if FStepPos > FEndPos then
FStepPos := FEndPos;
FCurState := 0;
FFoundPatternIndex := -1;
end;
function TEBMSearch.NonAnsiSearch: Boolean;
label 0;
var
s: Integer;
p, q: PChar;
begin
s := FCurState;
p := FCurPos;
q := FFoundPos;
if s > 0 then goto 0;
while True do begin
repeat
Dec(p, s);
if p >= FStepPos then begin
if FStepPos >= FEndPos then begin
FFoundPatternIndex := -1;
Result := False;
Exit;
end;
if not DoProgress then begin
Result := False;
Exit;
end;
Inc(FStepPos, FStep);
if FStepPos > FEndPos then FStepPos := FEndPos;
end;
s := FNext[0, p^];
until s > 0;
q := p;
repeat
if FAccept[s] >= 0 then begin
FFoundPatternIndex := FAccept[s];
FCurState := s;
FCurPos := p;
FFoundPos := q;
Result := True;
Exit;
end;
0:
Dec(q);
if q < FStartPos then begin
s := FSkip[s];
Break;
end;
s := FNext[s, q^];
until s < 0;
end;
end;
function TEBMSearch.AnsiSearch: Boolean;
var
substr: string;
L2: Cardinal;
ByteType : TMbcsByteType;
begin
while True do begin
Result := NonAnsiSearch;
if not Result then Exit;
substr := FPatterns[FFoundPatternIndex];
L2 := Length(substr);
ByteType := StrByteType(FStartPos, Integer(FFoundPos-FStartPos));
if (ByteType <> mbTrailByte) and
(CompareString(LOCALE_USER_DEFAULT, CmpFlags,
FFoundPos, L2, Pointer(substr), L2) = 2) then Exit;
end;
end;
procedure FillInt(var X; Count: Integer; Value: Integer);
//X->eax; Count->edx; Value->ecx
asm
push edi
mov edi, eax
mov eax, ecx
mov ecx, edx
rep stosd
pop edi
end;
procedure TEBMSearch.CreateTable;
type
TNode = record
index: Integer;
state: Integer;
end;
PQueue = ^TQueue;
TQueue = array[0..0] of TNode;
TAttr = record
depth: Integer;
fail: Integer;
skip: Integer;
end;
PStateAttr = ^TStateAttr;
TStateAttr = array[0..0] of TAttr;
var
count: Integer;
d: Integer; //depth of trie
q: PQueue; //queue
i: Integer; //index of queue
j: Integer; //index of queue
s: Integer; //state
f: Integer; //free state
g: PStateAttr;//state attribute
k: Integer;
str: string;
c: Char;
sum: Integer; //sum of length of strings
min: Integer; //min of length of strings
len: Integer;
skip: Integer;
begin
FTableValid := False;
count := FPatterns.Count;
sum := 0;
min := High(Integer);
for i := 0 to count - 1 do begin
len := Length(FPatterns[i]);
if len = 0 then Continue;
Inc(sum, len);
if len < min then min := len;
end;
FMinLen := min;
if count = 0 then Exit;
ReallocMem(FNext, SizeOf(FNext[-1]) * (sum + 2));
FillInt(FNext[-1], SizeOf(FNext[0]) div SizeOf(Integer), 0);
FillInt(FNext[0], SizeOf(FNext[0]) div SizeOf(Integer) * (sum + 1), -min);
ReallocMem(FAccept, SizeOf(Integer) * (sum + 1));
FillInt(FAccept^, sum + 1, -1);
GetMem(q, SizeOf(q[0]) * (count + 1));
try
j := 0;
for i := 0 to count - 1 do begin
if FPatterns[i] = '' then Continue;
q[j].index := i;
q[j].state := 0;
Inc(j);
end;
q[j].index := -1;
GetMem(g, SizeOf(g[0]) * (sum + 1));
try
FillInt(g^, SizeOf(g[0]) div SizeOf(Integer) * (sum + 1), 0);
for i := 0 to sum do g[i].skip := -min;
g[0].fail := -1;
f := 1;
d := 0;
repeat
i := 0;
j := 0;
repeat
str := FPatterns[q[i].index];
if soIgnoreCase in FOptions then str := UpperCase(str);
len := Length(str);
c := str[len - d];
s := q[i].state;
g[s].depth := d;
k := g[s].fail;
if FNext[s,c] < 0 then begin
FNext[s,c] := f;
while FNext[k,c] < 0 do begin
if g[k].depth - d > FNext[k,c] then
FNext[k,c] := g[k].depth - d;
k := g[k].fail;
end;
g[f].fail := FNext[k,c];
Inc(f);
end;
if d < len - 1 then begin
q[j].index := q[i].index;
q[j].state := FNext[s,c];
Inc(j);
end else begin
k := FNext[s,c];
FAccept[k] := q[i].index;
repeat
k := g[k].fail;
if g[k].depth - d - 1 > g[k].skip then
g[k].skip := g[k].depth - d - 1;
until k = 0;
end;
Inc(i);
until q[i].index = -1;
q[j].index := -1;
Inc(d);
until j = 0;
ReallocMem(FSkip, SizeOf(FSkip[0]) * f);
for s := 0 to f - 1 do begin
skip := g[s].skip;
FSkip^[s] := skip;
for c := #0 to #255 do begin
k := FNext[s,c];
if k > 0 then begin
if skip > g[k].skip then g[k].skip := skip;
end else begin
if k < skip then FNext[s,c] := skip;
end;
end;
end;
if soIgnoreCase in FOptions then
for s := 0 to f - 1 do
for c := 'a' to 'z' do
FNext[s, c] := FNext[s, Chr(Ord(c) - Ord('a') + Ord('A'))];
finally
FreeMem(g);
end;
finally
FreeMem(q);
end;
ReallocMem(FNext, SizeOf(FNext[0]) * (f + 1));
FTableValid := True;
end;
procedure TEBMSearch.SetOptions(Value: TSearchOptions);
const
CmpFlagsList: array[Boolean] of DWORD = (0, NORM_IGNORECASE);
begin
if FOptions = Value then Exit;
if (soIgnoreCase in FOptions) xor (soIgnoreCase in Value) then
FTableValid := False;
if soAnsi in Value then
FSearch := AnsiSearch
else
FSearch := NonAnsiSearch;
CmpFlags := CmpFlagsList[soIgnoreCase in FOptions];
FOptions := Value;
end;
procedure TEBMSearch.SetPatterns(Value: TStrings);
begin
FPatterns.Assign(Value);
end;
procedure TEBMSearch.PatternsChanged(Sender: TObject);
begin
Update;
end;
procedure TEBMSearch.Update; //virtual;
begin
FTableValid := False;
end;
function TEBMSearch.DoProgress: Boolean; //virtual;
begin
Result := True;
if Assigned(FOnProgress) then FOnProgress(Self, Result);
end;
end.
ぜえた (QZC05100)
Original document by ぜえた 氏 ID:(QZC05100)
ここにあるドキュメントは NIFTY SERVEの Delphi Users' Forum の16番会議室「玉石混淆みんなで作るSample蔵」に投稿されたサンプルです。これらのサンプルはボーランド株式会社がサポートする公式のものではありません。また、必ずしも動作が検証されているものではありません。これらのサンプルを使用したことに起因するいかなる損害も投稿者、およびフォーラムスタッフはその責めを負いません。使用者のリスクの範疇でご使用下さい。
Copyright 1996-2002 Delphi Users' Forum
|