From 98f5214cd9acde9ab06337fc8b5208fbe2ce1de3 Mon Sep 17 00:00:00 2001 From: Aman-Godara Date: Wed, 23 Jun 2021 20:43:41 +0530 Subject: [PATCH 1/7] implemented count function --- src/stdlib_strings.f90 | 96 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 95 insertions(+), 1 deletion(-) diff --git a/src/stdlib_strings.f90 b/src/stdlib_strings.f90 index 6bfd8a630..ec02491bc 100644 --- a/src/stdlib_strings.f90 +++ b/src/stdlib_strings.f90 @@ -12,7 +12,7 @@ module stdlib_strings public :: strip, chomp public :: starts_with, ends_with - public :: slice, find, replace_all + public :: slice, find, replace_all, count !> Remove leading and trailing whitespace characters. @@ -93,6 +93,17 @@ module stdlib_strings module procedure :: replace_all_char_char_char end interface replace_all + !> Returns the number of times substring 'pattern' has appeared in the + !> input string 'string' + !> [Specifications](link to the specs - to be completed) + !> Version: experimental + interface count + module procedure :: count_string_string + module procedure :: count_string_char + module procedure :: count_char_string + module procedure :: count_char_char + end interface count + contains @@ -649,4 +660,87 @@ pure function replace_all_char_char_char(string, pattern, replacement) result(re end function replace_all_char_char_char + !> Returns the number of times substring 'pattern' has appeared in the + !> input string 'string' + !> Returns an integer + elemental function count_string_string(string, pattern, consider_overlapping) result(res) + type(string_type), intent(in) :: string + type(string_type), intent(in) :: pattern + logical, intent(in), optional :: consider_overlapping + integer :: res + + res = count(char(string), char(pattern), consider_overlapping) + + end function count_string_string + + !> Returns the number of times substring 'pattern' has appeared in the + !> input string 'string' + !> Returns an integer + elemental function count_string_char(string, pattern, consider_overlapping) result(res) + type(string_type), intent(in) :: string + character(len=*), intent(in) :: pattern + logical, intent(in), optional :: consider_overlapping + integer :: res + + res = count(char(string), pattern, consider_overlapping) + + end function count_string_char + + !> Returns the number of times substring 'pattern' has appeared in the + !> input string 'string' + !> Returns an integer + elemental function count_char_string(string, pattern, consider_overlapping) result(res) + character(len=*), intent(in) :: string + type(string_type), intent(in) :: pattern + logical, intent(in), optional :: consider_overlapping + integer :: res + + res = count(string, char(pattern), consider_overlapping) + + end function count_char_string + + !> Returns the number of times substring 'pattern' has appeared in the + !> input string 'string' + !> Returns an integer + elemental function count_char_char(string, pattern, consider_overlapping) result(res) + character(len=*), intent(in) :: string + character(len=*), intent(in) :: pattern + logical, intent(in), optional :: consider_overlapping + integer :: lps_array(len(pattern)) + integer :: res, s_i, p_i, length_string, length_pattern + logical :: consider_overlapping_ + + consider_overlapping_ = optval(consider_overlapping, .true.) + res = 0 + length_string = len(string) + length_pattern = len(pattern) + + if (length_pattern > 0 .and. length_pattern <= length_string) then + lps_array = compute_lps(pattern) + + s_i = 1 + p_i = 1 + do while(s_i <= length_string) + if (string(s_i:s_i) == pattern(p_i:p_i)) then + if (p_i == length_pattern) then + res = res + 1 + if (consider_overlapping_) then + p_i = lps_array(p_i) + else + p_i = 0 + end if + end if + s_i = s_i + 1 + p_i = p_i + 1 + else if (p_i > 1) then + p_i = lps_array(p_i - 1) + 1 + else + s_i = s_i + 1 + end if + end do + end if + + end function count_char_char + + end module stdlib_strings From 579a95acb8e3ef3eb6c6577deb49a15f19365b56 Mon Sep 17 00:00:00 2001 From: Aman-Godara Date: Sun, 4 Jul 2021 20:11:47 +0530 Subject: [PATCH 2/7] added test cases for count function --- src/stdlib_strings.f90 | 2 +- src/tests/string/test_string_functions.f90 | 46 +++++++++++++++++++++- 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/src/stdlib_strings.f90 b/src/stdlib_strings.f90 index ec02491bc..27f2e8c17 100644 --- a/src/stdlib_strings.f90 +++ b/src/stdlib_strings.f90 @@ -95,7 +95,7 @@ module stdlib_strings !> Returns the number of times substring 'pattern' has appeared in the !> input string 'string' - !> [Specifications](link to the specs - to be completed) + !> [Specifications](../page/specs/stdlib_strings.html#count) !> Version: experimental interface count module procedure :: count_string_string diff --git a/src/tests/string/test_string_functions.f90 b/src/tests/string/test_string_functions.f90 index d0afb745a..8948067c7 100644 --- a/src/tests/string/test_string_functions.f90 +++ b/src/tests/string/test_string_functions.f90 @@ -4,7 +4,7 @@ module test_string_functions use stdlib_error, only : check use stdlib_string_type, only : string_type, assignment(=), operator(==), & to_lower, to_upper, to_title, to_sentence, reverse - use stdlib_strings, only: slice, find, replace_all + use stdlib_strings, only: slice, find, replace_all, count use stdlib_optval, only: optval use stdlib_ascii, only : to_string implicit none @@ -378,6 +378,49 @@ subroutine test_replace_all end subroutine test_replace_all + subroutine test_count + type(string_type) :: test_string_1, test_string_2, test_pattern_1, test_pattern_2 + test_string_1 = "DNA sequence: AGAGAGAGTCCTGTCGAGA" + test_string_2 = "DNA sequence: GTCCTGTCCTGTCAGA" + test_pattern_1 = "AGA" + test_pattern_2 = "GTCCTGTC" + + ! all 2 as string_type + call check(all(count([test_string_1, test_string_2], test_pattern_1) == [4, 1]), & + & 'count: all 2 as string_type, test case 1') + call check(all(count(test_string_1, [test_pattern_1, test_pattern_2], .false.) == [3, 1]), & + & 'count: all 2 as string_type, test case 2') + call check(count(test_string_2, test_pattern_1, .false.) == 1, & + & 'count: all 2 as string_type, test case 3') + call check(all(count([test_string_2, test_string_2, test_string_1], [test_pattern_2, & + & test_pattern_2, test_pattern_1], [.true., .false., .false.]) == [2, 1, 3]), & + & 'count: all 2 as string_type, test case 4') + call check(all(count([[test_string_1, test_string_2], [test_string_1, test_string_2]], [[test_pattern_1, & + & test_pattern_2], [test_pattern_2, test_pattern_1]], .true.) == [[4, 2], [1, 1]]), & + & 'count: all 2 as string_type, test case 5') + + ! 1 string_type and 1 character scalar + call check(all(count(test_string_1, ["AGA", "GTC"], [.true., .false.]) == [4, 2]), & + & 'count: 1 string_type and 1 character scalar, test case 1') + call check(all(count([test_string_1, test_string_2], ["CTC", "GTC"], [.true., .false.]) & + & == [0, 3]), 'count: 1 string_type and 1 character scalar, test case 2') + call check(all(count(["AGAGAGAGTCCTGTCGAGA", "AGAGAGAGTCCTGTCGAGA"], test_pattern_1, & + & [.false., .true.]) == [3, 4]), & + & 'count: 1 string_type and 1 character scalar, test case 3') + call check(count(test_string_1, "GAG") == 4, & + & 'count: 1 string_type and 1 character scalar, test case 4') + call check(count("DNA sequence: GTCCTGTCCTGTCAGA", test_pattern_2, .false.) == 1, & + & 'count: 1 string_type and 1 character scalar, test case 5') + + ! all 2 character scalar + call check(all(count("", ["mango", "trees"], .true.) == [0, 0]), & + & 'count: all 2 character scalar, test case 1') + call check(count("", "", .true.) == 0, 'count: all 2 character scalar, test case 2') + call check(all(count(["mango", "trees"], "", .true.) == [0, 0]), & + & 'count: all 2 character scalar, test case 3') + + end subroutine test_count + end module test_string_functions @@ -394,5 +437,6 @@ program tester call test_slice_gen call test_find call test_replace_all + call test_count end program tester From 1300b4309e227a0fe7e4208cd16d9b9453d5c8fc Mon Sep 17 00:00:00 2001 From: Aman-Godara Date: Sun, 4 Jul 2021 20:15:10 +0530 Subject: [PATCH 3/7] documented count function --- doc/specs/stdlib_strings.md | 58 +++++++++++++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 3 deletions(-) diff --git a/doc/specs/stdlib_strings.md b/doc/specs/stdlib_strings.md index d5f22519c..b8463c985 100644 --- a/doc/specs/stdlib_strings.md +++ b/doc/specs/stdlib_strings.md @@ -280,7 +280,7 @@ end program demo_slice Returns the starting index of the `occurrence`th occurrence of the substring `pattern` in the input string `string`. Default value of `occurrence` is set to `1`. -If `consider_overlapping` is not provided or is set to `.true.` the function counts two overlapping occurrences of substring as two different occurrences. +If `consider_overlapping` is not provided or is set to `.true.` the function counts two overlapping occurrences of substring `pattern` as two different occurrences. If `occurrence`th occurrence is not found, function returns `0`. #### Syntax @@ -308,7 +308,7 @@ Elemental function #### Result value -The result is a scalar of integer type or integer array of rank equal to the highest rank among all dummy arguments. +The result is a scalar of integer type or an integer array of rank equal to the highest rank among all dummy arguments. #### Example @@ -381,4 +381,56 @@ program demo_replace_all ! string <-- "technology here, technology there, technology everywhere" end program demo_replace_all -``` \ No newline at end of file +``` + + + +### `count` + +#### Description + +Returns the number of times substring `pattern` has occurred in the input string `string`. +If `consider_overlapping` is not provided or is set to `.true.` the function counts two overlapping occurrences of substring `pattern` as two different occurrences. + +#### Syntax + +`string = [[stdlib_strings(module):count(interface)]] (string, pattern [, consider_overlapping])` + +#### Status + +Experimental + +#### Class + +Elemental function + +#### Argument + +- `string`: Character scalar or [[stdlib_string_type(module):string_type(type)]]. + This argument is intent(in). +- `pattern`: Character scalar or [[stdlib_string_type(module):string_type(type)]]. + This argument is intent(in). +- `consider_overlapping`: logical. + This argument is intent(in) and optional. + +#### Result value + +The result is a scalar of integer type or an integer array of rank equal to the highest rank among all dummy arguments. + +#### Example + +```fortran +program demo_count + use stdlib_string_type, only: string_type, assignment(=) + use stdlib_strings, only : count + implicit none + type(string_type) :: string + + string = "How much wood would a woodchuck chuck if a woodchuck could chuck wood?" + + print *, count(string, "wood") ! 4 + print *, count(string, ["would", "chuck", "could"]) ! [1, 4, 1] + print *, count("a long queueueueue", "ueu", [.false., .true.]) ! [2, 4] + +end program demo_count +``` From 88f38dd9049edf10b312051b6f06772b8a25bccc Mon Sep 17 00:00:00 2001 From: Aman-Godara Date: Sun, 4 Jul 2021 22:03:38 +0530 Subject: [PATCH 4/7] improved aesthetics around '&' --- src/tests/string/test_string_functions.f90 | 24 +++++++++++----------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/tests/string/test_string_functions.f90 b/src/tests/string/test_string_functions.f90 index 8948067c7..ca44d956d 100644 --- a/src/tests/string/test_string_functions.f90 +++ b/src/tests/string/test_string_functions.f90 @@ -355,8 +355,8 @@ subroutine test_replace_all call check(replace_all(test_string_1, "TAT", "ATA") == & & "mutate DNA sequence: GTATACGATAGCCGTAATATA", & & "replace_all: 1 string_type & 2 character scalar, test case 1") - call check(replace_all("mutate DNA sequence: AGAGAGCCTAGAGAGAG", test_pattern_2, & - & "GC") == "mutate DNA sequence: GCGAGCCTGCGGCG", & + call check(replace_all("mutate DNA sequence: AGAGAGCCTAGAGAGAG", test_pattern_2, "GC") == & + & "mutate DNA sequence: GCGAGCCTGCGGCG", & & "replace_all: 1 string_type & 2 character scalar, test case 2") call check(replace_all("mutate DNA sequence: GTTATCGTATGCCGTAATTAT", "TA", & & test_replacement_2) == "mutate DNA sequence: GTagaTCGagaTGCCGagaATagaT", & @@ -392,20 +392,20 @@ subroutine test_count & 'count: all 2 as string_type, test case 2') call check(count(test_string_2, test_pattern_1, .false.) == 1, & & 'count: all 2 as string_type, test case 3') - call check(all(count([test_string_2, test_string_2, test_string_1], [test_pattern_2, & - & test_pattern_2, test_pattern_1], [.true., .false., .false.]) == [2, 1, 3]), & - & 'count: all 2 as string_type, test case 4') - call check(all(count([[test_string_1, test_string_2], [test_string_1, test_string_2]], [[test_pattern_1, & - & test_pattern_2], [test_pattern_2, test_pattern_1]], .true.) == [[4, 2], [1, 1]]), & - & 'count: all 2 as string_type, test case 5') + call check(all(count([test_string_2, test_string_2, test_string_1], & + & [test_pattern_2, test_pattern_2, test_pattern_1], [.true., .false., .false.]) == & + & [2, 1, 3]), 'count: all 2 as string_type, test case 4') + call check(all(count([[test_string_1, test_string_2], [test_string_1, test_string_2]], & + & [[test_pattern_1, test_pattern_2], [test_pattern_2, test_pattern_1]], .true.) == & + & [[4, 2], [1, 1]]), 'count: all 2 as string_type, test case 5') ! 1 string_type and 1 character scalar call check(all(count(test_string_1, ["AGA", "GTC"], [.true., .false.]) == [4, 2]), & & 'count: 1 string_type and 1 character scalar, test case 1') - call check(all(count([test_string_1, test_string_2], ["CTC", "GTC"], [.true., .false.]) & - & == [0, 3]), 'count: 1 string_type and 1 character scalar, test case 2') - call check(all(count(["AGAGAGAGTCCTGTCGAGA", "AGAGAGAGTCCTGTCGAGA"], test_pattern_1, & - & [.false., .true.]) == [3, 4]), & + call check(all(count([test_string_1, test_string_2], ["CTC", "GTC"], [.true., .false.]) == & + & [0, 3]), 'count: 1 string_type and 1 character scalar, test case 2') + call check(all(count(["AGAGAGAGTCCTGTCGAGA", "AGAGAGAGTCCTGTCGAGA"], & + & test_pattern_1, [.false., .true.]) == [3, 4]), & & 'count: 1 string_type and 1 character scalar, test case 3') call check(count(test_string_1, "GAG") == 4, & & 'count: 1 string_type and 1 character scalar, test case 4') From bf2674e9afe9deb59fb6a804151a2bce97aceea9 Mon Sep 17 00:00:00 2001 From: Aman Godara <34789087+Aman-Godara@users.noreply.github.com> Date: Sun, 4 Jul 2021 22:05:12 +0530 Subject: [PATCH 5/7] keep a space before opening bracket of while loop construct Co-authored-by: Milan Curcic --- src/stdlib_strings.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/stdlib_strings.f90 b/src/stdlib_strings.f90 index 27f2e8c17..d9dc5d75a 100644 --- a/src/stdlib_strings.f90 +++ b/src/stdlib_strings.f90 @@ -720,7 +720,7 @@ elemental function count_char_char(string, pattern, consider_overlapping) result s_i = 1 p_i = 1 - do while(s_i <= length_string) + do while (s_i <= length_string) if (string(s_i:s_i) == pattern(p_i:p_i)) then if (p_i == length_pattern) then res = res + 1 From d326924e733205c644161486f0ddbd7d36bff727 Mon Sep 17 00:00:00 2001 From: Aman-Godara Date: Sun, 4 Jul 2021 22:19:42 +0530 Subject: [PATCH 6/7] removed variable consider_overlapping_ and used its expression directly in the if-else statement --- src/stdlib_strings.f90 | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/stdlib_strings.f90 b/src/stdlib_strings.f90 index 27f2e8c17..334f90a61 100644 --- a/src/stdlib_strings.f90 +++ b/src/stdlib_strings.f90 @@ -454,9 +454,7 @@ elemental function find_char_char(string, pattern, occurrence, consider_overlapp logical, intent(in), optional :: consider_overlapping integer :: lps_array(len(pattern)) integer :: res, s_i, p_i, length_string, length_pattern, occurrence_ - logical :: consider_overlapping_ - consider_overlapping_ = optval(consider_overlapping, .true.) occurrence_ = optval(occurrence, 1) res = 0 length_string = len(string) @@ -475,7 +473,7 @@ elemental function find_char_char(string, pattern, occurrence, consider_overlapp if (occurrence_ == 0) then res = s_i - length_pattern + 1 exit - else if (consider_overlapping_) then + else if (optval(consider_overlapping, .true.)) then p_i = lps_array(p_i) else p_i = 0 @@ -708,9 +706,7 @@ elemental function count_char_char(string, pattern, consider_overlapping) result logical, intent(in), optional :: consider_overlapping integer :: lps_array(len(pattern)) integer :: res, s_i, p_i, length_string, length_pattern - logical :: consider_overlapping_ - consider_overlapping_ = optval(consider_overlapping, .true.) res = 0 length_string = len(string) length_pattern = len(pattern) @@ -724,7 +720,7 @@ elemental function count_char_char(string, pattern, consider_overlapping) result if (string(s_i:s_i) == pattern(p_i:p_i)) then if (p_i == length_pattern) then res = res + 1 - if (consider_overlapping_) then + if (optval(consider_overlapping, .true.)) then p_i = lps_array(p_i) else p_i = 0 From c8fa8a5dd6a712811cb183fd8d8bf3dfd0aaac02 Mon Sep 17 00:00:00 2001 From: Aman Godara <34789087+Aman-Godara@users.noreply.github.com> Date: Sun, 4 Jul 2021 23:09:55 +0530 Subject: [PATCH 7/7] Version meta-data for ford Co-authored-by: Jeremie Vandenplas --- doc/specs/stdlib_strings.md | 2 +- src/stdlib_strings.f90 | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/specs/stdlib_strings.md b/doc/specs/stdlib_strings.md index b8463c985..087589bbc 100644 --- a/doc/specs/stdlib_strings.md +++ b/doc/specs/stdlib_strings.md @@ -389,7 +389,7 @@ end program demo_replace_all #### Description -Returns the number of times substring `pattern` has occurred in the input string `string`. +Returns the number of times the substring `pattern` has occurred in the input string `string`. If `consider_overlapping` is not provided or is set to `.true.` the function counts two overlapping occurrences of substring `pattern` as two different occurrences. #### Syntax diff --git a/src/stdlib_strings.f90 b/src/stdlib_strings.f90 index 59e7a4a79..2554dbfb1 100644 --- a/src/stdlib_strings.f90 +++ b/src/stdlib_strings.f90 @@ -93,10 +93,11 @@ module stdlib_strings module procedure :: replace_all_char_char_char end interface replace_all + !> Version: experimental + !> !> Returns the number of times substring 'pattern' has appeared in the !> input string 'string' !> [Specifications](../page/specs/stdlib_strings.html#count) - !> Version: experimental interface count module procedure :: count_string_string module procedure :: count_string_char