Austin Schuh | 36244a1 | 2019-09-21 17:52:38 -0700 | [diff] [blame^] | 1 | // |
| 2 | // Copyright 2017 The Abseil Authors. |
| 3 | // |
| 4 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | // you may not use this file except in compliance with the License. |
| 6 | // You may obtain a copy of the License at |
| 7 | // |
| 8 | // https://www.apache.org/licenses/LICENSE-2.0 |
| 9 | // |
| 10 | // Unless required by applicable law or agreed to in writing, software |
| 11 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | // See the License for the specific language governing permissions and |
| 14 | // limitations under the License. |
| 15 | // |
| 16 | |
| 17 | // These routines provide mem versions of standard C string routines, |
| 18 | // such as strpbrk. They function exactly the same as the str versions, |
| 19 | // so if you wonder what they are, replace the word "mem" by |
| 20 | // "str" and check out the man page. I could return void*, as the |
| 21 | // strutil.h mem*() routines tend to do, but I return char* instead |
| 22 | // since this is by far the most common way these functions are called. |
| 23 | // |
| 24 | // The difference between the mem and str versions is the mem version |
| 25 | // takes a pointer and a length, rather than a '\0'-terminated string. |
| 26 | // The memcase* routines defined here assume the locale is "C" |
| 27 | // (they use absl::ascii_tolower instead of tolower). |
| 28 | // |
| 29 | // These routines are based on the BSD library. |
| 30 | // |
| 31 | // Here's a list of routines from string.h, and their mem analogues. |
| 32 | // Functions in lowercase are defined in string.h; those in UPPERCASE |
| 33 | // are defined here: |
| 34 | // |
| 35 | // strlen -- |
| 36 | // strcat strncat MEMCAT |
| 37 | // strcpy strncpy memcpy |
| 38 | // -- memccpy (very cool function, btw) |
| 39 | // -- memmove |
| 40 | // -- memset |
| 41 | // strcmp strncmp memcmp |
| 42 | // strcasecmp strncasecmp MEMCASECMP |
| 43 | // strchr memchr |
| 44 | // strcoll -- |
| 45 | // strxfrm -- |
| 46 | // strdup strndup MEMDUP |
| 47 | // strrchr MEMRCHR |
| 48 | // strspn MEMSPN |
| 49 | // strcspn MEMCSPN |
| 50 | // strpbrk MEMPBRK |
| 51 | // strstr MEMSTR MEMMEM |
| 52 | // (g)strcasestr MEMCASESTR MEMCASEMEM |
| 53 | // strtok -- |
| 54 | // strprefix MEMPREFIX (strprefix is from strutil.h) |
| 55 | // strcaseprefix MEMCASEPREFIX (strcaseprefix is from strutil.h) |
| 56 | // strsuffix MEMSUFFIX (strsuffix is from strutil.h) |
| 57 | // strcasesuffix MEMCASESUFFIX (strcasesuffix is from strutil.h) |
| 58 | // -- MEMIS |
| 59 | // -- MEMCASEIS |
| 60 | // strcount MEMCOUNT (strcount is from strutil.h) |
| 61 | |
| 62 | #ifndef ABSL_STRINGS_INTERNAL_MEMUTIL_H_ |
| 63 | #define ABSL_STRINGS_INTERNAL_MEMUTIL_H_ |
| 64 | |
| 65 | #include <cstddef> |
| 66 | #include <cstring> |
| 67 | |
| 68 | #include "absl/base/port.h" // disable some warnings on Windows |
| 69 | #include "absl/strings/ascii.h" // for absl::ascii_tolower |
| 70 | |
| 71 | namespace absl { |
| 72 | namespace strings_internal { |
| 73 | |
| 74 | inline char* memcat(char* dest, size_t destlen, const char* src, |
| 75 | size_t srclen) { |
| 76 | return reinterpret_cast<char*>(memcpy(dest + destlen, src, srclen)); |
| 77 | } |
| 78 | |
| 79 | int memcasecmp(const char* s1, const char* s2, size_t len); |
| 80 | char* memdup(const char* s, size_t slen); |
| 81 | char* memrchr(const char* s, int c, size_t slen); |
| 82 | size_t memspn(const char* s, size_t slen, const char* accept); |
| 83 | size_t memcspn(const char* s, size_t slen, const char* reject); |
| 84 | char* mempbrk(const char* s, size_t slen, const char* accept); |
| 85 | |
| 86 | // This is for internal use only. Don't call this directly |
| 87 | template <bool case_sensitive> |
| 88 | const char* int_memmatch(const char* haystack, size_t haylen, |
| 89 | const char* needle, size_t neelen) { |
| 90 | if (0 == neelen) { |
| 91 | return haystack; // even if haylen is 0 |
| 92 | } |
| 93 | const char* hayend = haystack + haylen; |
| 94 | const char* needlestart = needle; |
| 95 | const char* needleend = needlestart + neelen; |
| 96 | |
| 97 | for (; haystack < hayend; ++haystack) { |
| 98 | char hay = case_sensitive |
| 99 | ? *haystack |
| 100 | : absl::ascii_tolower(static_cast<unsigned char>(*haystack)); |
| 101 | char nee = case_sensitive |
| 102 | ? *needle |
| 103 | : absl::ascii_tolower(static_cast<unsigned char>(*needle)); |
| 104 | if (hay == nee) { |
| 105 | if (++needle == needleend) { |
| 106 | return haystack + 1 - neelen; |
| 107 | } |
| 108 | } else if (needle != needlestart) { |
| 109 | // must back up haystack in case a prefix matched (find "aab" in "aaab") |
| 110 | haystack -= needle - needlestart; // for loop will advance one more |
| 111 | needle = needlestart; |
| 112 | } |
| 113 | } |
| 114 | return nullptr; |
| 115 | } |
| 116 | |
| 117 | // These are the guys you can call directly |
| 118 | inline const char* memstr(const char* phaystack, size_t haylen, |
| 119 | const char* pneedle) { |
| 120 | return int_memmatch<true>(phaystack, haylen, pneedle, strlen(pneedle)); |
| 121 | } |
| 122 | |
| 123 | inline const char* memcasestr(const char* phaystack, size_t haylen, |
| 124 | const char* pneedle) { |
| 125 | return int_memmatch<false>(phaystack, haylen, pneedle, strlen(pneedle)); |
| 126 | } |
| 127 | |
| 128 | inline const char* memmem(const char* phaystack, size_t haylen, |
| 129 | const char* pneedle, size_t needlelen) { |
| 130 | return int_memmatch<true>(phaystack, haylen, pneedle, needlelen); |
| 131 | } |
| 132 | |
| 133 | inline const char* memcasemem(const char* phaystack, size_t haylen, |
| 134 | const char* pneedle, size_t needlelen) { |
| 135 | return int_memmatch<false>(phaystack, haylen, pneedle, needlelen); |
| 136 | } |
| 137 | |
| 138 | // This is significantly faster for case-sensitive matches with very |
| 139 | // few possible matches. See unit test for benchmarks. |
| 140 | const char* memmatch(const char* phaystack, size_t haylen, const char* pneedle, |
| 141 | size_t neelen); |
| 142 | |
| 143 | } // namespace strings_internal |
| 144 | } // namespace absl |
| 145 | |
| 146 | #endif // ABSL_STRINGS_INTERNAL_MEMUTIL_H_ |