Skip to content

Commit 3b7350f

Browse files
authored
String Search algorithm
This code might be a bit confusion when without explanation. Please visit my homepage for the algorithm explanation: http://www.algoonline.net/String_Search_algorithm/string_search_algorithm.htm Put a thumb up thanks.
1 parent 71f2c1b commit 3b7350f

File tree

1 file changed

+257
-0
lines changed

1 file changed

+257
-0
lines changed

String_search_3.txt

+257
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,257 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using System.Text;
5+
using System.Threading.Tasks;
6+
7+
namespace String_Search
8+
{
9+
class Program
10+
{
11+
struct P_Bad_Character_struct
12+
{
13+
public char P_Char;
14+
public int Bad_Character_Shift;
15+
16+
public P_Bad_Character_struct(char input1, int input2)
17+
{
18+
P_Char = input1;
19+
Bad_Character_Shift = input2;
20+
}
21+
}
22+
23+
static void Main(string[] args)
24+
{
25+
//string T = "GTTATAGCTGATCGCGGCGTAGCGGCGATAT"; //original
26+
//string T = "GTTATAGCTGATCCCGGCGTAGCGGCGATATCTCCCCC";
27+
//string T = "GTTAGAGCTGATCGCGGCGTAGCGGCGATATCGAGCGGCGCCTCATAGTAGATA";
28+
string T = "TGCATGTTAGAGTGATGAAGCGATAAAAGGTAGGTAGCGGCGTAGGAAAACCGTGATAGTAGAAAAATATAGATAAGATACGCAATTACA"; //
29+
//string T = "CGTGCCTACTTACTTACTTACTTACGCGAA"; //
30+
//string P = "CTTACTTAC";
31+
//string P = "GTAGCGGCG"; //original
32+
//string T = "GTTATAGCTGATCGCGGCGTAGCGGCGAA"; //original
33+
//string P = "GTTA";
34+
//string P = "CGTAG";
35+
//string P = "GCGG";
36+
//string P = "CGTA";
37+
//string P = "GCGA";
38+
//string P = "TGATCGC";
39+
//string P = "GCGATAT";
40+
//string P = "ATAGTAGATA";
41+
//string P = "TAGATAAGATA";
42+
//string P = "AAA";
43+
//string P = "CGCAATTACA"; //10 characters
44+
//string P = "CC";
45+
//string P = "A";
46+
//string P = "TC";
47+
//string P = "CCTC";
48+
//string P = "CTCCCCC";
49+
//string P = "ATC";
50+
//string P = "AAAA";
51+
string P = "AAAAA";
52+
53+
int P_position_in_T = string_search(P, T);
54+
if (P_position_in_T == -1)
55+
Console.WriteLine("P string not exist in string T");
56+
else //P_position_in_T >= 0
57+
Console.WriteLine("String P is found inside string T, between " + P_position_in_T + " to " + (P_position_in_T + P.Length - 1));
58+
59+
Console.Read();
60+
}
61+
62+
private static int string_search(string P, string T)
63+
{
64+
string matched_string = "";
65+
int start_P_in_T = 0;
66+
int end_P_in_T = P.Length - 1;
67+
int T_ptr;
68+
69+
//Check string 'P' & 'T' are not empty and T string is longer than P string, or else definitely will not be any matches and no reason to proceed the checking
70+
if (!String.IsNullOrEmpty(P) && !String.IsNullOrEmpty(T) && T.Length >= P.Length)
71+
{
72+
if (P.Length == 1) //Special case: if string 'P' only consist of 1 character
73+
{
74+
T_ptr = 0;
75+
while (T_ptr < T.Length && P[0] != T[T_ptr])
76+
T_ptr++;
77+
78+
if (T_ptr != T.Length)
79+
return T_ptr;
80+
else
81+
return -1;
82+
}
83+
84+
int P_ptr; //Pointer showing position of target search string 'P' last character now
85+
T_ptr = P.Length - 1; //Pointer showing position of target search string 'P' last character in string 'T' now
86+
87+
//To optimize quick shifting, contruct string 'P' number of shift array table
88+
//First case: when only last character of string 'P' matching string 'T', how many characters shift string 'P' should be in string 'T'
89+
P_ptr = P.Length - 2;
90+
int only_last_character_of_P_matching_shift;
91+
while (P_ptr >= 0 && P[P_ptr] != P.Last())
92+
P_ptr--;
93+
only_last_character_of_P_matching_shift = P.Length - 1 - P_ptr;
94+
95+
//Second case: when string 'P' suffix partially matching string 'T', 2 characters & more. How many characters shift string 'P' should be in string 'T'
96+
matched_string = P.Substring(P.Length - 2, 2);
97+
int matched_string_ptr = 1;
98+
99+
//P_ptr pointer continue from only_last_character_of_P_matching case
100+
int[] P_Good_Suffix_shift_table = new int[P.Length - 2];
101+
102+
//P_Good_Suffix_Rule
103+
finding_good_suffix_shift:
104+
if (P_ptr >= 0)
105+
{
106+
finding_good_suffix_shift_2:
107+
if (matched_string[matched_string_ptr] == P[P_ptr])
108+
{
109+
if (P_ptr == 0 || matched_string_ptr == 0)
110+
goto found_good_suffix_shift;
111+
112+
matched_string_ptr--;
113+
}
114+
else
115+
{
116+
if (matched_string_ptr != matched_string.Count() - 1)
117+
{
118+
matched_string_ptr = matched_string.Count() - 1;
119+
goto finding_good_suffix_shift_2;
120+
}
121+
}
122+
123+
P_ptr--;
124+
125+
goto finding_good_suffix_shift;
126+
}
127+
128+
found_good_suffix_shift:
129+
if (P_ptr > 0) //Match string are found in the mid of remaining P string
130+
{
131+
P_Good_Suffix_shift_table[matched_string.Length - 2] = P.Length - matched_string.Length - P_ptr;
132+
P_ptr--;
133+
134+
//matched_string increase length by 1
135+
matched_string = P.Substring(P.Length - matched_string.Length - 1, matched_string.Length + 1); //matched_string increase length by 1
136+
matched_string_ptr = 0; //search suffix from previous shorter suffix position, 1 character to the left
137+
goto finding_good_suffix_shift; //should goto finding_good_suffix_shift_2
138+
}
139+
else if (P_ptr == 0) //Match string are found in the start of remaining P string
140+
{
141+
int P_Good_Suffix_shift = P.Length - matched_string.Count() + matched_string_ptr;
142+
for (int i = matched_string.Length - 2; i < P.Length - 2; i++)
143+
P_Good_Suffix_shift_table[i] = P_Good_Suffix_shift;
144+
//goto P_Bad_Character_Rule;
145+
}
146+
else //When P_ptr < 0, no match found in remaining P string
147+
{
148+
for (int i = matched_string.Length - 2; i < P.Length - 2; i++)
149+
P_Good_Suffix_shift_table[i] = P.Length;
150+
//goto P_Bad_Character_Rule;
151+
}
152+
153+
154+
//Third case: when string 'P' last character does not match string 'T'
155+
//P_Bad_Character_Rule
156+
P_ptr = P.Length - 2;
157+
List<P_Bad_Character_struct> P_Bad_Character_shift_table = new List<P_Bad_Character_struct>();
158+
while (P_ptr >= 0 && P[P_ptr] == P.Last())
159+
P_ptr--;
160+
//P_ptr == -1, special case when string 'P' only consist of only one duplicated character
161+
if (P_ptr != -1) //string 'P' has character different from P.Last()
162+
{
163+
P_Bad_Character_shift_table.Add(new P_Bad_Character_struct(P[P_ptr], P.Length - 1 - P_ptr)); //Add P second last character into the table
164+
P_ptr--;
165+
166+
while (P_ptr >= 0) //when string P.length >= 3
167+
{
168+
int i = 0;
169+
while (i < P_Bad_Character_shift_table.Count() && P_Bad_Character_shift_table[i].P_Char != P[P_ptr])
170+
i++;
171+
172+
if (i == P_Bad_Character_shift_table.Count() && P[P_ptr] != P.Last()) //P current character not occur inside P_Bad_Character_shift_table, add this new character & it position
173+
P_Bad_Character_shift_table.Add(new P_Bad_Character_struct(P[P_ptr], P.Length - 1 - P_ptr));
174+
175+
P_ptr--;
176+
}
177+
178+
for (int i = 0; i < P_Bad_Character_shift_table.Count(); i++)
179+
Console.WriteLine("P_Bad_Character_shift_table.P_Char[" + i + "] = " + P_Bad_Character_shift_table[i].P_Char
180+
+ ", Bad_Character_Shift = " + P_Bad_Character_shift_table[i].Bad_Character_Shift);
181+
for (int i = 0; i < P_Good_Suffix_shift_table.Count(); i++)
182+
Console.WriteLine("P_Good_Suffix_shift_table[" + i + "] = " + P_Good_Suffix_shift_table[i]);
183+
}
184+
185+
//Start string 'P' against string 'T' comparison
186+
int matched_string_length = 0;
187+
P_ptr = P.Length - 1;
188+
189+
string_compare_P_against_T:
190+
if (T_ptr < T.Length)
191+
{
192+
//String compare from string 'P' last character toward first character against string 'T'. Stop whenever there is a mismatch.
193+
while (P_ptr >= 0 && P[P_ptr] == T[T_ptr])
194+
{
195+
matched_string_length++;
196+
T_ptr--;
197+
P_ptr--;
198+
}
199+
200+
if (matched_string_length == 0) //Bad Character Rule
201+
{
202+
if (P_Bad_Character_shift_table.Count() == 0)
203+
{
204+
//special case when string 'P' only consist of only one duplicated character
205+
start_P_in_T += P.Length;
206+
end_P_in_T += P.Length;
207+
}
208+
else
209+
{
210+
int i = 0;
211+
while (i < P_Bad_Character_shift_table.Count() && P_Bad_Character_shift_table[i].P_Char != T[end_P_in_T])
212+
i++;
213+
214+
if (i == P_Bad_Character_shift_table.Count())
215+
{
216+
//string 'T' character correspond to string 'P' last character position cannot be found in string 'P'
217+
start_P_in_T += P.Length;
218+
end_P_in_T += P.Length;
219+
}
220+
else
221+
{
222+
start_P_in_T += P_Bad_Character_shift_table[i].Bad_Character_Shift;
223+
end_P_in_T += P_Bad_Character_shift_table[i].Bad_Character_Shift;
224+
}
225+
}
226+
Console.Write("Bad Character Rule, ");
227+
}
228+
else if (matched_string_length == 1)
229+
{
230+
//Only P last character match
231+
start_P_in_T += only_last_character_of_P_matching_shift;
232+
end_P_in_T += only_last_character_of_P_matching_shift;
233+
Console.Write("Only P last character match, ");
234+
}
235+
else if (matched_string_length == P.Length) //found string 'P' matching string 'T' in this position
236+
return start_P_in_T; //return the position of string 'P' in string 'T'
237+
else //P against T matched string is 2 characters or more but not exactly matched, only partially match
238+
{
239+
//Good Suffix Rule
240+
start_P_in_T += P_Good_Suffix_shift_table[matched_string_length - 2];
241+
end_P_in_T += P_Good_Suffix_shift_table[matched_string_length - 2];
242+
Console.Write("Good Suffix Rule, ");
243+
}
244+
245+
T_ptr = end_P_in_T;
246+
P_ptr = P.Length - 1;
247+
matched_string_length = 0;
248+
Console.WriteLine("start_P_in_T = " + start_P_in_T);
249+
250+
goto string_compare_P_against_T;
251+
}
252+
}
253+
254+
return -1; //found string 'P' not matching string 'T'
255+
}
256+
}
257+
}

0 commit comments

Comments
 (0)