|
| 1 | +using System; |
| 2 | +using System.Collections.Generic; |
| 3 | +using System.Linq; |
| 4 | +using System.Text; |
| 5 | +using System.Threading.Tasks; |
| 6 | + |
| 7 | +namespace String_Search |
| 8 | +{ |
| 9 | + class Program |
| 10 | + { |
| 11 | + struct P_Bad_Character_struct |
| 12 | + { |
| 13 | + public char P_Char; |
| 14 | + public int Bad_Character_Shift; |
| 15 | + |
| 16 | + public P_Bad_Character_struct(char input1, int input2) |
| 17 | + { |
| 18 | + P_Char = input1; |
| 19 | + Bad_Character_Shift = input2; |
| 20 | + } |
| 21 | + } |
| 22 | + |
| 23 | + static void Main(string[] args) |
| 24 | + { |
| 25 | + //string T = "GTTATAGCTGATCGCGGCGTAGCGGCGATAT"; //original |
| 26 | + //string T = "GTTATAGCTGATCCCGGCGTAGCGGCGATATCTCCCCC"; |
| 27 | + //string T = "GTTAGAGCTGATCGCGGCGTAGCGGCGATATCGAGCGGCGCCTCATAGTAGATA"; |
| 28 | + string T = "TGCATGTTAGAGTGATGAAGCGATAAAAGGTAGGTAGCGGCGTAGGAAAACCGTGATAGTAGAAAAATATAGATAAGATACGCAATTACA"; // |
| 29 | + //string T = "CGTGCCTACTTACTTACTTACTTACGCGAA"; // |
| 30 | + //string P = "CTTACTTAC"; |
| 31 | + //string P = "GTAGCGGCG"; //original |
| 32 | + //string T = "GTTATAGCTGATCGCGGCGTAGCGGCGAA"; //original |
| 33 | + //string P = "GTTA"; |
| 34 | + //string P = "CGTAG"; |
| 35 | + //string P = "GCGG"; |
| 36 | + //string P = "CGTA"; |
| 37 | + //string P = "GCGA"; |
| 38 | + //string P = "TGATCGC"; |
| 39 | + //string P = "GCGATAT"; |
| 40 | + //string P = "ATAGTAGATA"; |
| 41 | + //string P = "TAGATAAGATA"; |
| 42 | + //string P = "AAA"; |
| 43 | + //string P = "CGCAATTACA"; //10 characters |
| 44 | + //string P = "CC"; |
| 45 | + //string P = "A"; |
| 46 | + //string P = "TC"; |
| 47 | + //string P = "CCTC"; |
| 48 | + //string P = "CTCCCCC"; |
| 49 | + //string P = "ATC"; |
| 50 | + //string P = "AAAA"; |
| 51 | + string P = "AAAAA"; |
| 52 | + |
| 53 | + int P_position_in_T = string_search(P, T); |
| 54 | + if (P_position_in_T == -1) |
| 55 | + Console.WriteLine("P string not exist in string T"); |
| 56 | + else //P_position_in_T >= 0 |
| 57 | + Console.WriteLine("String P is found inside string T, between " + P_position_in_T + " to " + (P_position_in_T + P.Length - 1)); |
| 58 | + |
| 59 | + Console.Read(); |
| 60 | + } |
| 61 | + |
| 62 | + private static int string_search(string P, string T) |
| 63 | + { |
| 64 | + string matched_string = ""; |
| 65 | + int start_P_in_T = 0; |
| 66 | + int end_P_in_T = P.Length - 1; |
| 67 | + int T_ptr; |
| 68 | + |
| 69 | + //Check string 'P' & 'T' are not empty and T string is longer than P string, or else definitely will not be any matches and no reason to proceed the checking |
| 70 | + if (!String.IsNullOrEmpty(P) && !String.IsNullOrEmpty(T) && T.Length >= P.Length) |
| 71 | + { |
| 72 | + if (P.Length == 1) //Special case: if string 'P' only consist of 1 character |
| 73 | + { |
| 74 | + T_ptr = 0; |
| 75 | + while (T_ptr < T.Length && P[0] != T[T_ptr]) |
| 76 | + T_ptr++; |
| 77 | + |
| 78 | + if (T_ptr != T.Length) |
| 79 | + return T_ptr; |
| 80 | + else |
| 81 | + return -1; |
| 82 | + } |
| 83 | + |
| 84 | + int P_ptr; //Pointer showing position of target search string 'P' last character now |
| 85 | + T_ptr = P.Length - 1; //Pointer showing position of target search string 'P' last character in string 'T' now |
| 86 | + |
| 87 | + //To optimize quick shifting, contruct string 'P' number of shift array table |
| 88 | + //First case: when only last character of string 'P' matching string 'T', how many characters shift string 'P' should be in string 'T' |
| 89 | + P_ptr = P.Length - 2; |
| 90 | + int only_last_character_of_P_matching_shift; |
| 91 | + while (P_ptr >= 0 && P[P_ptr] != P.Last()) |
| 92 | + P_ptr--; |
| 93 | + only_last_character_of_P_matching_shift = P.Length - 1 - P_ptr; |
| 94 | + |
| 95 | + //Second case: when string 'P' suffix partially matching string 'T', 2 characters & more. How many characters shift string 'P' should be in string 'T' |
| 96 | + matched_string = P.Substring(P.Length - 2, 2); |
| 97 | + int matched_string_ptr = 1; |
| 98 | + |
| 99 | + //P_ptr pointer continue from only_last_character_of_P_matching case |
| 100 | + int[] P_Good_Suffix_shift_table = new int[P.Length - 2]; |
| 101 | + |
| 102 | + //P_Good_Suffix_Rule |
| 103 | + finding_good_suffix_shift: |
| 104 | + if (P_ptr >= 0) |
| 105 | + { |
| 106 | + finding_good_suffix_shift_2: |
| 107 | + if (matched_string[matched_string_ptr] == P[P_ptr]) |
| 108 | + { |
| 109 | + if (P_ptr == 0 || matched_string_ptr == 0) |
| 110 | + goto found_good_suffix_shift; |
| 111 | + |
| 112 | + matched_string_ptr--; |
| 113 | + } |
| 114 | + else |
| 115 | + { |
| 116 | + if (matched_string_ptr != matched_string.Count() - 1) |
| 117 | + { |
| 118 | + matched_string_ptr = matched_string.Count() - 1; |
| 119 | + goto finding_good_suffix_shift_2; |
| 120 | + } |
| 121 | + } |
| 122 | + |
| 123 | + P_ptr--; |
| 124 | + |
| 125 | + goto finding_good_suffix_shift; |
| 126 | + } |
| 127 | + |
| 128 | + found_good_suffix_shift: |
| 129 | + if (P_ptr > 0) //Match string are found in the mid of remaining P string |
| 130 | + { |
| 131 | + P_Good_Suffix_shift_table[matched_string.Length - 2] = P.Length - matched_string.Length - P_ptr; |
| 132 | + P_ptr--; |
| 133 | + |
| 134 | + //matched_string increase length by 1 |
| 135 | + matched_string = P.Substring(P.Length - matched_string.Length - 1, matched_string.Length + 1); //matched_string increase length by 1 |
| 136 | + matched_string_ptr = 0; //search suffix from previous shorter suffix position, 1 character to the left |
| 137 | + goto finding_good_suffix_shift; //should goto finding_good_suffix_shift_2 |
| 138 | + } |
| 139 | + else if (P_ptr == 0) //Match string are found in the start of remaining P string |
| 140 | + { |
| 141 | + int P_Good_Suffix_shift = P.Length - matched_string.Count() + matched_string_ptr; |
| 142 | + for (int i = matched_string.Length - 2; i < P.Length - 2; i++) |
| 143 | + P_Good_Suffix_shift_table[i] = P_Good_Suffix_shift; |
| 144 | + //goto P_Bad_Character_Rule; |
| 145 | + } |
| 146 | + else //When P_ptr < 0, no match found in remaining P string |
| 147 | + { |
| 148 | + for (int i = matched_string.Length - 2; i < P.Length - 2; i++) |
| 149 | + P_Good_Suffix_shift_table[i] = P.Length; |
| 150 | + //goto P_Bad_Character_Rule; |
| 151 | + } |
| 152 | + |
| 153 | + |
| 154 | + //Third case: when string 'P' last character does not match string 'T' |
| 155 | + //P_Bad_Character_Rule |
| 156 | + P_ptr = P.Length - 2; |
| 157 | + List<P_Bad_Character_struct> P_Bad_Character_shift_table = new List<P_Bad_Character_struct>(); |
| 158 | + while (P_ptr >= 0 && P[P_ptr] == P.Last()) |
| 159 | + P_ptr--; |
| 160 | + //P_ptr == -1, special case when string 'P' only consist of only one duplicated character |
| 161 | + if (P_ptr != -1) //string 'P' has character different from P.Last() |
| 162 | + { |
| 163 | + P_Bad_Character_shift_table.Add(new P_Bad_Character_struct(P[P_ptr], P.Length - 1 - P_ptr)); //Add P second last character into the table |
| 164 | + P_ptr--; |
| 165 | + |
| 166 | + while (P_ptr >= 0) //when string P.length >= 3 |
| 167 | + { |
| 168 | + int i = 0; |
| 169 | + while (i < P_Bad_Character_shift_table.Count() && P_Bad_Character_shift_table[i].P_Char != P[P_ptr]) |
| 170 | + i++; |
| 171 | + |
| 172 | + if (i == P_Bad_Character_shift_table.Count() && P[P_ptr] != P.Last()) //P current character not occur inside P_Bad_Character_shift_table, add this new character & it position |
| 173 | + P_Bad_Character_shift_table.Add(new P_Bad_Character_struct(P[P_ptr], P.Length - 1 - P_ptr)); |
| 174 | + |
| 175 | + P_ptr--; |
| 176 | + } |
| 177 | + |
| 178 | + for (int i = 0; i < P_Bad_Character_shift_table.Count(); i++) |
| 179 | + Console.WriteLine("P_Bad_Character_shift_table.P_Char[" + i + "] = " + P_Bad_Character_shift_table[i].P_Char |
| 180 | + + ", Bad_Character_Shift = " + P_Bad_Character_shift_table[i].Bad_Character_Shift); |
| 181 | + for (int i = 0; i < P_Good_Suffix_shift_table.Count(); i++) |
| 182 | + Console.WriteLine("P_Good_Suffix_shift_table[" + i + "] = " + P_Good_Suffix_shift_table[i]); |
| 183 | + } |
| 184 | + |
| 185 | + //Start string 'P' against string 'T' comparison |
| 186 | + int matched_string_length = 0; |
| 187 | + P_ptr = P.Length - 1; |
| 188 | + |
| 189 | + string_compare_P_against_T: |
| 190 | + if (T_ptr < T.Length) |
| 191 | + { |
| 192 | + //String compare from string 'P' last character toward first character against string 'T'. Stop whenever there is a mismatch. |
| 193 | + while (P_ptr >= 0 && P[P_ptr] == T[T_ptr]) |
| 194 | + { |
| 195 | + matched_string_length++; |
| 196 | + T_ptr--; |
| 197 | + P_ptr--; |
| 198 | + } |
| 199 | + |
| 200 | + if (matched_string_length == 0) //Bad Character Rule |
| 201 | + { |
| 202 | + if (P_Bad_Character_shift_table.Count() == 0) |
| 203 | + { |
| 204 | + //special case when string 'P' only consist of only one duplicated character |
| 205 | + start_P_in_T += P.Length; |
| 206 | + end_P_in_T += P.Length; |
| 207 | + } |
| 208 | + else |
| 209 | + { |
| 210 | + int i = 0; |
| 211 | + while (i < P_Bad_Character_shift_table.Count() && P_Bad_Character_shift_table[i].P_Char != T[end_P_in_T]) |
| 212 | + i++; |
| 213 | + |
| 214 | + if (i == P_Bad_Character_shift_table.Count()) |
| 215 | + { |
| 216 | + //string 'T' character correspond to string 'P' last character position cannot be found in string 'P' |
| 217 | + start_P_in_T += P.Length; |
| 218 | + end_P_in_T += P.Length; |
| 219 | + } |
| 220 | + else |
| 221 | + { |
| 222 | + start_P_in_T += P_Bad_Character_shift_table[i].Bad_Character_Shift; |
| 223 | + end_P_in_T += P_Bad_Character_shift_table[i].Bad_Character_Shift; |
| 224 | + } |
| 225 | + } |
| 226 | + Console.Write("Bad Character Rule, "); |
| 227 | + } |
| 228 | + else if (matched_string_length == 1) |
| 229 | + { |
| 230 | + //Only P last character match |
| 231 | + start_P_in_T += only_last_character_of_P_matching_shift; |
| 232 | + end_P_in_T += only_last_character_of_P_matching_shift; |
| 233 | + Console.Write("Only P last character match, "); |
| 234 | + } |
| 235 | + else if (matched_string_length == P.Length) //found string 'P' matching string 'T' in this position |
| 236 | + return start_P_in_T; //return the position of string 'P' in string 'T' |
| 237 | + else //P against T matched string is 2 characters or more but not exactly matched, only partially match |
| 238 | + { |
| 239 | + //Good Suffix Rule |
| 240 | + start_P_in_T += P_Good_Suffix_shift_table[matched_string_length - 2]; |
| 241 | + end_P_in_T += P_Good_Suffix_shift_table[matched_string_length - 2]; |
| 242 | + Console.Write("Good Suffix Rule, "); |
| 243 | + } |
| 244 | + |
| 245 | + T_ptr = end_P_in_T; |
| 246 | + P_ptr = P.Length - 1; |
| 247 | + matched_string_length = 0; |
| 248 | + Console.WriteLine("start_P_in_T = " + start_P_in_T); |
| 249 | + |
| 250 | + goto string_compare_P_against_T; |
| 251 | + } |
| 252 | + } |
| 253 | + |
| 254 | + return -1; //found string 'P' not matching string 'T' |
| 255 | + } |
| 256 | + } |
| 257 | +} |
0 commit comments