blob: dc95c3e5e55ad459994ef4a3eacd713d15fa2e45 [file] [log] [blame]
Austin Schuh36244a12019-09-21 17:52:38 -07001// Copyright 2018 The Abseil Authors.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "absl/strings/internal/memutil.h"
16
17#include <algorithm>
18#include <cstdlib>
19
20#include "benchmark/benchmark.h"
21#include "absl/strings/ascii.h"
22
23// We fill the haystack with aaaaaaaaaaaaaaaaaa...aaaab.
24// That gives us:
25// - an easy search: 'b'
26// - a medium search: 'ab'. That means every letter is a possible match.
27// - a pathological search: 'aaaaaa.......aaaaab' (half as many a's as haytack)
28// We benchmark case-sensitive and case-insensitive versions of
29// three memmem implementations:
30// - memmem() from memutil.h
31// - search() from STL
32// - memmatch(), a custom implementation using memchr and memcmp.
33// Here are sample results:
34//
35// Run on (12 X 3800 MHz CPU s)
36// CPU Caches:
37// L1 Data 32K (x6)
38// L1 Instruction 32K (x6)
39// L2 Unified 256K (x6)
40// L3 Unified 15360K (x1)
41// ----------------------------------------------------------------
42// Benchmark Time CPU Iterations
43// ----------------------------------------------------------------
44// BM_Memmem 3583 ns 3582 ns 196469 2.59966GB/s
45// BM_MemmemMedium 13743 ns 13742 ns 50901 693.986MB/s
46// BM_MemmemPathological 13695030 ns 13693977 ns 51 713.133kB/s
47// BM_Memcasemem 3299 ns 3299 ns 212942 2.82309GB/s
48// BM_MemcasememMedium 16407 ns 16406 ns 42170 581.309MB/s
49// BM_MemcasememPathological 17267745 ns 17266030 ns 41 565.598kB/s
50// BM_Search 1610 ns 1609 ns 431321 5.78672GB/s
51// BM_SearchMedium 11111 ns 11110 ns 63001 858.414MB/s
52// BM_SearchPathological 12117390 ns 12116397 ns 58 805.984kB/s
53// BM_Searchcase 3081 ns 3081 ns 229949 3.02313GB/s
54// BM_SearchcaseMedium 16003 ns 16001 ns 44170 595.998MB/s
55// BM_SearchcasePathological 15823413 ns 15821909 ns 44 617.222kB/s
56// BM_Memmatch 197 ns 197 ns 3584225 47.2951GB/s
57// BM_MemmatchMedium 52333 ns 52329 ns 13280 182.244MB/s
58// BM_MemmatchPathological 659799 ns 659727 ns 1058 14.4556MB/s
59// BM_Memcasematch 5460 ns 5460 ns 127606 1.70586GB/s
60// BM_MemcasematchMedium 32861 ns 32857 ns 21258 290.248MB/s
61// BM_MemcasematchPathological 15154243 ns 15153089 ns 46 644.464kB/s
62// BM_MemmemStartup 5 ns 5 ns 150821500
63// BM_SearchStartup 5 ns 5 ns 150644203
64// BM_MemmatchStartup 7 ns 7 ns 97068802
65//
66// Conclusions:
67//
68// The following recommendations are based on the sample results above. However,
69// we have found that the performance of STL search can vary significantly
70// depending on compiler and standard library implementation. We recommend you
71// run the benchmarks for yourself on relevant platforms.
72//
73// If you need case-insensitive, STL search is slightly better than memmem for
74// all cases.
75//
76// Case-sensitive is more subtle:
77// Custom memmatch is _very_ fast at scanning, so if you have very few possible
78// matches in your haystack, that's the way to go. Performance drops
79// significantly with more matches.
80//
81// STL search is slightly faster than memmem in the medium and pathological
82// benchmarks. However, the performance of memmem is currently more dependable
83// across platforms and build configurations.
84
85namespace {
86
87constexpr int kHaystackSize = 10000;
88constexpr int64_t kHaystackSize64 = kHaystackSize;
89const char* MakeHaystack() {
90 char* haystack = new char[kHaystackSize];
91 for (int i = 0; i < kHaystackSize - 1; ++i) haystack[i] = 'a';
92 haystack[kHaystackSize - 1] = 'b';
93 return haystack;
94}
95const char* const kHaystack = MakeHaystack();
96
97void BM_Memmem(benchmark::State& state) {
98 for (auto _ : state) {
99 benchmark::DoNotOptimize(
100 absl::strings_internal::memmem(kHaystack, kHaystackSize, "b", 1));
101 }
102 state.SetBytesProcessed(kHaystackSize64 * state.iterations());
103}
104BENCHMARK(BM_Memmem);
105
106void BM_MemmemMedium(benchmark::State& state) {
107 for (auto _ : state) {
108 benchmark::DoNotOptimize(
109 absl::strings_internal::memmem(kHaystack, kHaystackSize, "ab", 2));
110 }
111 state.SetBytesProcessed(kHaystackSize64 * state.iterations());
112}
113BENCHMARK(BM_MemmemMedium);
114
115void BM_MemmemPathological(benchmark::State& state) {
116 for (auto _ : state) {
117 benchmark::DoNotOptimize(absl::strings_internal::memmem(
118 kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2,
119 kHaystackSize - kHaystackSize / 2));
120 }
121 state.SetBytesProcessed(kHaystackSize64 * state.iterations());
122}
123BENCHMARK(BM_MemmemPathological);
124
125void BM_Memcasemem(benchmark::State& state) {
126 for (auto _ : state) {
127 benchmark::DoNotOptimize(
128 absl::strings_internal::memcasemem(kHaystack, kHaystackSize, "b", 1));
129 }
130 state.SetBytesProcessed(kHaystackSize64 * state.iterations());
131}
132BENCHMARK(BM_Memcasemem);
133
134void BM_MemcasememMedium(benchmark::State& state) {
135 for (auto _ : state) {
136 benchmark::DoNotOptimize(
137 absl::strings_internal::memcasemem(kHaystack, kHaystackSize, "ab", 2));
138 }
139 state.SetBytesProcessed(kHaystackSize64 * state.iterations());
140}
141BENCHMARK(BM_MemcasememMedium);
142
143void BM_MemcasememPathological(benchmark::State& state) {
144 for (auto _ : state) {
145 benchmark::DoNotOptimize(absl::strings_internal::memcasemem(
146 kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2,
147 kHaystackSize - kHaystackSize / 2));
148 }
149 state.SetBytesProcessed(kHaystackSize64 * state.iterations());
150}
151BENCHMARK(BM_MemcasememPathological);
152
153bool case_eq(const char a, const char b) {
154 return absl::ascii_tolower(a) == absl::ascii_tolower(b);
155}
156
157void BM_Search(benchmark::State& state) {
158 for (auto _ : state) {
159 benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
160 kHaystack + kHaystackSize - 1,
161 kHaystack + kHaystackSize));
162 }
163 state.SetBytesProcessed(kHaystackSize64 * state.iterations());
164}
165BENCHMARK(BM_Search);
166
167void BM_SearchMedium(benchmark::State& state) {
168 for (auto _ : state) {
169 benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
170 kHaystack + kHaystackSize - 2,
171 kHaystack + kHaystackSize));
172 }
173 state.SetBytesProcessed(kHaystackSize64 * state.iterations());
174}
175BENCHMARK(BM_SearchMedium);
176
177void BM_SearchPathological(benchmark::State& state) {
178 for (auto _ : state) {
179 benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
180 kHaystack + kHaystackSize / 2,
181 kHaystack + kHaystackSize));
182 }
183 state.SetBytesProcessed(kHaystackSize64 * state.iterations());
184}
185BENCHMARK(BM_SearchPathological);
186
187void BM_Searchcase(benchmark::State& state) {
188 for (auto _ : state) {
189 benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
190 kHaystack + kHaystackSize - 1,
191 kHaystack + kHaystackSize, case_eq));
192 }
193 state.SetBytesProcessed(kHaystackSize64 * state.iterations());
194}
195BENCHMARK(BM_Searchcase);
196
197void BM_SearchcaseMedium(benchmark::State& state) {
198 for (auto _ : state) {
199 benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
200 kHaystack + kHaystackSize - 2,
201 kHaystack + kHaystackSize, case_eq));
202 }
203 state.SetBytesProcessed(kHaystackSize64 * state.iterations());
204}
205BENCHMARK(BM_SearchcaseMedium);
206
207void BM_SearchcasePathological(benchmark::State& state) {
208 for (auto _ : state) {
209 benchmark::DoNotOptimize(std::search(kHaystack, kHaystack + kHaystackSize,
210 kHaystack + kHaystackSize / 2,
211 kHaystack + kHaystackSize, case_eq));
212 }
213 state.SetBytesProcessed(kHaystackSize64 * state.iterations());
214}
215BENCHMARK(BM_SearchcasePathological);
216
217char* memcasechr(const char* s, int c, size_t slen) {
218 c = absl::ascii_tolower(c);
219 for (; slen; ++s, --slen) {
220 if (absl::ascii_tolower(*s) == c) return const_cast<char*>(s);
221 }
222 return nullptr;
223}
224
225const char* memcasematch(const char* phaystack, size_t haylen,
226 const char* pneedle, size_t neelen) {
227 if (0 == neelen) {
228 return phaystack; // even if haylen is 0
229 }
230 if (haylen < neelen) return nullptr;
231
232 const char* match;
233 const char* hayend = phaystack + haylen - neelen + 1;
234 while ((match = static_cast<char*>(
235 memcasechr(phaystack, pneedle[0], hayend - phaystack)))) {
236 if (absl::strings_internal::memcasecmp(match, pneedle, neelen) == 0)
237 return match;
238 else
239 phaystack = match + 1;
240 }
241 return nullptr;
242}
243
244void BM_Memmatch(benchmark::State& state) {
245 for (auto _ : state) {
246 benchmark::DoNotOptimize(
247 absl::strings_internal::memmatch(kHaystack, kHaystackSize, "b", 1));
248 }
249 state.SetBytesProcessed(kHaystackSize64 * state.iterations());
250}
251BENCHMARK(BM_Memmatch);
252
253void BM_MemmatchMedium(benchmark::State& state) {
254 for (auto _ : state) {
255 benchmark::DoNotOptimize(
256 absl::strings_internal::memmatch(kHaystack, kHaystackSize, "ab", 2));
257 }
258 state.SetBytesProcessed(kHaystackSize64 * state.iterations());
259}
260BENCHMARK(BM_MemmatchMedium);
261
262void BM_MemmatchPathological(benchmark::State& state) {
263 for (auto _ : state) {
264 benchmark::DoNotOptimize(absl::strings_internal::memmatch(
265 kHaystack, kHaystackSize, kHaystack + kHaystackSize / 2,
266 kHaystackSize - kHaystackSize / 2));
267 }
268 state.SetBytesProcessed(kHaystackSize64 * state.iterations());
269}
270BENCHMARK(BM_MemmatchPathological);
271
272void BM_Memcasematch(benchmark::State& state) {
273 for (auto _ : state) {
274 benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize, "b", 1));
275 }
276 state.SetBytesProcessed(kHaystackSize64 * state.iterations());
277}
278BENCHMARK(BM_Memcasematch);
279
280void BM_MemcasematchMedium(benchmark::State& state) {
281 for (auto _ : state) {
282 benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize, "ab", 2));
283 }
284 state.SetBytesProcessed(kHaystackSize64 * state.iterations());
285}
286BENCHMARK(BM_MemcasematchMedium);
287
288void BM_MemcasematchPathological(benchmark::State& state) {
289 for (auto _ : state) {
290 benchmark::DoNotOptimize(memcasematch(kHaystack, kHaystackSize,
291 kHaystack + kHaystackSize / 2,
292 kHaystackSize - kHaystackSize / 2));
293 }
294 state.SetBytesProcessed(kHaystackSize64 * state.iterations());
295}
296BENCHMARK(BM_MemcasematchPathological);
297
298void BM_MemmemStartup(benchmark::State& state) {
299 for (auto _ : state) {
300 benchmark::DoNotOptimize(absl::strings_internal::memmem(
301 kHaystack + kHaystackSize - 10, 10, kHaystack + kHaystackSize - 1, 1));
302 }
303}
304BENCHMARK(BM_MemmemStartup);
305
306void BM_SearchStartup(benchmark::State& state) {
307 for (auto _ : state) {
308 benchmark::DoNotOptimize(
309 std::search(kHaystack + kHaystackSize - 10, kHaystack + kHaystackSize,
310 kHaystack + kHaystackSize - 1, kHaystack + kHaystackSize));
311 }
312}
313BENCHMARK(BM_SearchStartup);
314
315void BM_MemmatchStartup(benchmark::State& state) {
316 for (auto _ : state) {
317 benchmark::DoNotOptimize(absl::strings_internal::memmatch(
318 kHaystack + kHaystackSize - 10, 10, kHaystack + kHaystackSize - 1, 1));
319 }
320}
321BENCHMARK(BM_MemmatchStartup);
322
323} // namespace