Austin Schuh | 36244a1 | 2019-09-21 17:52:38 -0700 | [diff] [blame^] | 1 | // Copyright 2018 The Abseil Authors. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // https://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | #include "absl/strings/internal/memutil.h" |
| 16 | |
| 17 | #include <algorithm> |
| 18 | #include <cstdlib> |
| 19 | |
| 20 | #include "benchmark/benchmark.h" |
| 21 | #include "absl/strings/ascii.h" |
| 22 | |
| 23 | // We fill the haystack with aaaaaaaaaaaaaaaaaa...aaaab. |
| 24 | // That gives us: |
| 25 | // - an easy search: 'b' |
| 26 | // - a medium search: 'ab'. That means every letter is a possible match. |
| 27 | // - a pathological search: 'aaaaaa.......aaaaab' (half as many a's as haytack) |
| 28 | // We benchmark case-sensitive and case-insensitive versions of |
| 29 | // three memmem implementations: |
| 30 | // - memmem() from memutil.h |
| 31 | // - search() from STL |
| 32 | // - memmatch(), a custom implementation using memchr and memcmp. |
| 33 | // Here are sample results: |
| 34 | // |
| 35 | // Run on (12 X 3800 MHz CPU s) |
| 36 | // CPU Caches: |
| 37 | // L1 Data 32K (x6) |
| 38 | // L1 Instruction 32K (x6) |
| 39 | // L2 Unified 256K (x6) |
| 40 | // L3 Unified 15360K (x1) |
| 41 | // ---------------------------------------------------------------- |
| 42 | // Benchmark Time CPU Iterations |
| 43 | // ---------------------------------------------------------------- |
| 44 | // BM_Memmem 3583 ns 3582 ns 196469 2.59966GB/s |
| 45 | // BM_MemmemMedium 13743 ns 13742 ns 50901 693.986MB/s |
| 46 | // BM_MemmemPathological 13695030 ns 13693977 ns 51 713.133kB/s |
| 47 | // BM_Memcasemem 3299 ns 3299 ns 212942 2.82309GB/s |
| 48 | // BM_MemcasememMedium 16407 ns 16406 ns 42170 581.309MB/s |
| 49 | // BM_MemcasememPathological 17267745 ns 17266030 ns 41 565.598kB/s |
| 50 | // BM_Search 1610 ns 1609 ns 431321 5.78672GB/s |
| 51 | // BM_SearchMedium 11111 ns 11110 ns 63001 858.414MB/s |
| 52 | // BM_SearchPathological 12117390 ns 12116397 ns 58 805.984kB/s |
| 53 | // BM_Searchcase 3081 ns 3081 ns 229949 3.02313GB/s |
| 54 | // BM_SearchcaseMedium 16003 ns 16001 ns 44170 595.998MB/s |
| 55 | // BM_SearchcasePathological 15823413 ns 15821909 ns 44 617.222kB/s |
| 56 | // BM_Memmatch 197 ns 197 ns 3584225 47.2951GB/s |
| 57 | // BM_MemmatchMedium 52333 ns 52329 ns 13280 182.244MB/s |
| 58 | // BM_MemmatchPathological 659799 ns 659727 ns 1058 14.4556MB/s |
| 59 | // BM_Memcasematch 5460 ns 5460 ns 127606 1.70586GB/s |
| 60 | // BM_MemcasematchMedium 32861 ns 32857 ns 21258 290.248MB/s |
| 61 | // BM_MemcasematchPathological 15154243 ns 15153089 ns 46 644.464kB/s |
| 62 | // BM_MemmemStartup 5 ns 5 ns 150821500 |
| 63 | // BM_SearchStartup 5 ns 5 ns 150644203 |
| 64 | // BM_MemmatchStartup 7 ns 7 ns 97068802 |
| 65 | // |
| 66 | // Conclusions: |
| 67 | // |
| 68 | // The following recommendations are based on the sample results above. However, |
| 69 | // we have found that the performance of STL search can vary significantly |
| 70 | // depending on compiler and standard library implementation. We recommend you |
| 71 | // run the benchmarks for yourself on relevant platforms. |
| 72 | // |
| 73 | // If you need case-insensitive, STL search is slightly better than memmem for |
| 74 | // all cases. |
| 75 | // |
| 76 | // Case-sensitive is more subtle: |
| 77 | // Custom memmatch is _very_ fast at scanning, so if you have very few possible |
| 78 | // matches in your haystack, that's the way to go. Performance drops |
| 79 | // significantly with more matches. |
| 80 | // |
| 81 | // STL search is slightly faster than memmem in the medium and pathological |
| 82 | // benchmarks. However, the performance of memmem is currently more dependable |
| 83 | // across platforms and build configurations. |
| 84 | |
| 85 | namespace { |
| 86 | |
| 87 | constexpr int kHaystackSize = 10000; |
| 88 | constexpr int64_t kHaystackSize64 = kHaystackSize; |
| 89 | const char* MakeHaystack() { |
| 90 | char* haystack = new char[kHaystackSize]; |
| 91 | for (int i = 0; i < kHaystackSize - 1; ++i) haystack[i] = 'a'; |
| 92 | haystack[kHaystackSize - 1] = 'b'; |
| 93 | return haystack; |
| 94 | } |
| 95 | const char* const kHaystack = MakeHaystack(); |
| 96 | |
| 97 | void BM_Memmem(benchmark::State& state) { |
| 98 | for (auto _ : state) { |
| 99 | benchmark::DoNotOptimize( |
| 100 | absl::strings_internal::memmem(kHaystack, kHaystackSize, "b", 1)); |
| 101 | } |
| 102 | state.SetBytesProcessed(kHaystackSize64 * state.iterations()); |
| 103 | } |
| 104 | BENCHMARK(BM_Memmem); |
| 105 | |
| 106 | void BM_MemmemMedium(benchmark::State& state) { |
| 107 | for (auto _ : state) { |
| 108 | benchmark::DoNotOptimize( |
| 109 | absl::strings_internal::memmem(kHaystack, kHaystackSize, "ab", 2)); |
| 110 | } |
| 111 | state.SetBytesProcessed(kHaystackSize64 * state.iterations()); |
| 112 | } |
| 113 | BENCHMARK(BM_MemmemMedium); |
| 114 | |
| 115 | void BM_MemmemPathological(benchmark::State& state) { |
| 116 | for (auto _ : state) { |
| 117 | benchmark::DoNotOptimize(absl::strings_internal::memmem( |
| 118 | kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2, |
| 119 | kHaystackSize - kHaystackSize / 2)); |
| 120 | } |
| 121 | state.SetBytesProcessed(kHaystackSize64 * state.iterations()); |
| 122 | } |
| 123 | BENCHMARK(BM_MemmemPathological); |
| 124 | |
| 125 | void BM_Memcasemem(benchmark::State& state) { |
| 126 | for (auto _ : state) { |
| 127 | benchmark::DoNotOptimize( |
| 128 | absl::strings_internal::memcasemem(kHaystack, kHaystackSize, "b", 1)); |
| 129 | } |
| 130 | state.SetBytesProcessed(kHaystackSize64 * state.iterations()); |
| 131 | } |
| 132 | BENCHMARK(BM_Memcasemem); |
| 133 | |
| 134 | void BM_MemcasememMedium(benchmark::State& state) { |
| 135 | for (auto _ : state) { |
| 136 | benchmark::DoNotOptimize( |
| 137 | absl::strings_internal::memcasemem(kHaystack, kHaystackSize, "ab", 2)); |
| 138 | } |
| 139 | state.SetBytesProcessed(kHaystackSize64 * state.iterations()); |
| 140 | } |
| 141 | BENCHMARK(BM_MemcasememMedium); |
| 142 | |
| 143 | void BM_MemcasememPathological(benchmark::State& state) { |
| 144 | for (auto _ : state) { |
| 145 | benchmark::DoNotOptimize(absl::strings_internal::memcasemem( |
| 146 | kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2, |
| 147 | kHaystackSize - kHaystackSize / 2)); |
| 148 | } |
| 149 | state.SetBytesProcessed(kHaystackSize64 * state.iterations()); |
| 150 | } |
| 151 | BENCHMARK(BM_MemcasememPathological); |
| 152 | |
| 153 | bool case_eq(const char a, const char b) { |
| 154 | return absl::ascii_tolower(a) == absl::ascii_tolower(b); |
| 155 | } |
| 156 | |
| 157 | void BM_Search(benchmark::State& state) { |
| 158 | for (auto _ : state) { |
| 159 | benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize, |
| 160 | kHaystack + kHaystackSize - 1, |
| 161 | kHaystack + kHaystackSize)); |
| 162 | } |
| 163 | state.SetBytesProcessed(kHaystackSize64 * state.iterations()); |
| 164 | } |
| 165 | BENCHMARK(BM_Search); |
| 166 | |
| 167 | void BM_SearchMedium(benchmark::State& state) { |
| 168 | for (auto _ : state) { |
| 169 | benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize, |
| 170 | kHaystack + kHaystackSize - 2, |
| 171 | kHaystack + kHaystackSize)); |
| 172 | } |
| 173 | state.SetBytesProcessed(kHaystackSize64 * state.iterations()); |
| 174 | } |
| 175 | BENCHMARK(BM_SearchMedium); |
| 176 | |
| 177 | void BM_SearchPathological(benchmark::State& state) { |
| 178 | for (auto _ : state) { |
| 179 | benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize, |
| 180 | kHaystack + kHaystackSize / 2, |
| 181 | kHaystack + kHaystackSize)); |
| 182 | } |
| 183 | state.SetBytesProcessed(kHaystackSize64 * state.iterations()); |
| 184 | } |
| 185 | BENCHMARK(BM_SearchPathological); |
| 186 | |
| 187 | void BM_Searchcase(benchmark::State& state) { |
| 188 | for (auto _ : state) { |
| 189 | benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize, |
| 190 | kHaystack + kHaystackSize - 1, |
| 191 | kHaystack + kHaystackSize, case_eq)); |
| 192 | } |
| 193 | state.SetBytesProcessed(kHaystackSize64 * state.iterations()); |
| 194 | } |
| 195 | BENCHMARK(BM_Searchcase); |
| 196 | |
| 197 | void BM_SearchcaseMedium(benchmark::State& state) { |
| 198 | for (auto _ : state) { |
| 199 | benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize, |
| 200 | kHaystack + kHaystackSize - 2, |
| 201 | kHaystack + kHaystackSize, case_eq)); |
| 202 | } |
| 203 | state.SetBytesProcessed(kHaystackSize64 * state.iterations()); |
| 204 | } |
| 205 | BENCHMARK(BM_SearchcaseMedium); |
| 206 | |
| 207 | void BM_SearchcasePathological(benchmark::State& state) { |
| 208 | for (auto _ : state) { |
| 209 | benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize, |
| 210 | kHaystack + kHaystackSize / 2, |
| 211 | kHaystack + kHaystackSize, case_eq)); |
| 212 | } |
| 213 | state.SetBytesProcessed(kHaystackSize64 * state.iterations()); |
| 214 | } |
| 215 | BENCHMARK(BM_SearchcasePathological); |
| 216 | |
| 217 | char* memcasechr(const char* s, int c, size_t slen) { |
| 218 | c = absl::ascii_tolower(c); |
| 219 | for (; slen; ++s, --slen) { |
| 220 | if (absl::ascii_tolower(*s) == c) return const_cast<char*>(s); |
| 221 | } |
| 222 | return nullptr; |
| 223 | } |
| 224 | |
| 225 | const char* memcasematch(const char* phaystack, size_t haylen, |
| 226 | const char* pneedle, size_t neelen) { |
| 227 | if (0 == neelen) { |
| 228 | return phaystack; // even if haylen is 0 |
| 229 | } |
| 230 | if (haylen < neelen) return nullptr; |
| 231 | |
| 232 | const char* match; |
| 233 | const char* hayend = phaystack + haylen - neelen + 1; |
| 234 | while ((match = static_cast<char*>( |
| 235 | memcasechr(phaystack, pneedle[0], hayend - phaystack)))) { |
| 236 | if (absl::strings_internal::memcasecmp(match, pneedle, neelen) == 0) |
| 237 | return match; |
| 238 | else |
| 239 | phaystack = match + 1; |
| 240 | } |
| 241 | return nullptr; |
| 242 | } |
| 243 | |
| 244 | void BM_Memmatch(benchmark::State& state) { |
| 245 | for (auto _ : state) { |
| 246 | benchmark::DoNotOptimize( |
| 247 | absl::strings_internal::memmatch(kHaystack, kHaystackSize, "b", 1)); |
| 248 | } |
| 249 | state.SetBytesProcessed(kHaystackSize64 * state.iterations()); |
| 250 | } |
| 251 | BENCHMARK(BM_Memmatch); |
| 252 | |
| 253 | void BM_MemmatchMedium(benchmark::State& state) { |
| 254 | for (auto _ : state) { |
| 255 | benchmark::DoNotOptimize( |
| 256 | absl::strings_internal::memmatch(kHaystack, kHaystackSize, "ab", 2)); |
| 257 | } |
| 258 | state.SetBytesProcessed(kHaystackSize64 * state.iterations()); |
| 259 | } |
| 260 | BENCHMARK(BM_MemmatchMedium); |
| 261 | |
| 262 | void BM_MemmatchPathological(benchmark::State& state) { |
| 263 | for (auto _ : state) { |
| 264 | benchmark::DoNotOptimize(absl::strings_internal::memmatch( |
| 265 | kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2, |
| 266 | kHaystackSize - kHaystackSize / 2)); |
| 267 | } |
| 268 | state.SetBytesProcessed(kHaystackSize64 * state.iterations()); |
| 269 | } |
| 270 | BENCHMARK(BM_MemmatchPathological); |
| 271 | |
| 272 | void BM_Memcasematch(benchmark::State& state) { |
| 273 | for (auto _ : state) { |
| 274 | benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize, "b", 1)); |
| 275 | } |
| 276 | state.SetBytesProcessed(kHaystackSize64 * state.iterations()); |
| 277 | } |
| 278 | BENCHMARK(BM_Memcasematch); |
| 279 | |
| 280 | void BM_MemcasematchMedium(benchmark::State& state) { |
| 281 | for (auto _ : state) { |
| 282 | benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize, "ab", 2)); |
| 283 | } |
| 284 | state.SetBytesProcessed(kHaystackSize64 * state.iterations()); |
| 285 | } |
| 286 | BENCHMARK(BM_MemcasematchMedium); |
| 287 | |
| 288 | void BM_MemcasematchPathological(benchmark::State& state) { |
| 289 | for (auto _ : state) { |
| 290 | benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize, |
| 291 | kHaystack + kHaystackSize / 2, |
| 292 | kHaystackSize - kHaystackSize / 2)); |
| 293 | } |
| 294 | state.SetBytesProcessed(kHaystackSize64 * state.iterations()); |
| 295 | } |
| 296 | BENCHMARK(BM_MemcasematchPathological); |
| 297 | |
| 298 | void BM_MemmemStartup(benchmark::State& state) { |
| 299 | for (auto _ : state) { |
| 300 | benchmark::DoNotOptimize(absl::strings_internal::memmem( |
| 301 | kHaystack + kHaystackSize - 10, 10, kHaystack + kHaystackSize - 1, 1)); |
| 302 | } |
| 303 | } |
| 304 | BENCHMARK(BM_MemmemStartup); |
| 305 | |
| 306 | void BM_SearchStartup(benchmark::State& state) { |
| 307 | for (auto _ : state) { |
| 308 | benchmark::DoNotOptimize( |
| 309 | std::search(kHaystack + kHaystackSize - 10, kHaystack + kHaystackSize, |
| 310 | kHaystack + kHaystackSize - 1, kHaystack + kHaystackSize)); |
| 311 | } |
| 312 | } |
| 313 | BENCHMARK(BM_SearchStartup); |
| 314 | |
| 315 | void BM_MemmatchStartup(benchmark::State& state) { |
| 316 | for (auto _ : state) { |
| 317 | benchmark::DoNotOptimize(absl::strings_internal::memmatch( |
| 318 | kHaystack + kHaystackSize - 10, 10, kHaystack + kHaystackSize - 1, 1)); |
| 319 | } |
| 320 | } |
| 321 | BENCHMARK(BM_MemmatchStartup); |
| 322 | |
| 323 | } // namespace |