blob: 440e77b1c01f7ec624dc5841ca1e41935d15bbdf [file] [log] [blame]
Brian Silvermancbe6df22015-09-26 17:32:32 -04001// Copyright (c) 2008, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8// * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14// * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29//
30// ---
31// Author: csilvers@google.com (Craig Silerstein)
32
33#ifndef TEMPLATE_TEMPLATE_STRING_H_
34#define TEMPLATE_TEMPLATE_STRING_H_
35
36#include <string.h> // for memcmp() and size_t
37#include <tr1/unordered_map>
38#include <string>
39#include <vector>
40
41#include <assert.h>
42#if 1
43#include <stdint.h> // one place @ac_cv_unit64@ might live
44#endif
45#if 1
46#include <inttypes.h> // another place @ac_cv_unit64@ might live
47#endif
48#include <sys/types.h> // final place @ac_cv_unit64@ might live
49
50class TemplateStringTest; // needed for friendship declaration
51class StaticTemplateStringTest;
52
53#if 1
54extern char _start[] __attribute__((weak)); // linker emits: start of .text
55extern char data_start[] __attribute__((weak)); // start of .data
56#endif
57
58
59
60namespace ctemplate {
61
62// Most methods of TemplateDictionary take a TemplateString rather than a
63// C++ string. This is for efficiency: it can avoid extra string copies.
64// For any argument that takes a TemplateString, you can pass in any of:
65// * A C++ string
66// * A char*
67// * A StringPiece
68// * TemplateString(char*, length)
69// The last of these is the most efficient, though it requires more work
70// on the call site (you have to create the TemplateString explicitly).
71class TemplateString;
72
73// If you have a string constant (e.g. the string literal "foo") that
74// you need to pass into template routines repeatedly, it is more
75// efficient if you convert it into a TemplateString only once. The
76// way to do this is to use a global StaticTemplateString via STS_INIT
77// (note: do this at global scope *only*!):
78// static const StaticTemplateString kMyVar = STS_INIT(kMyVar, "MY_VALUE");
79struct StaticTemplateString;
80
81#define STS_INIT(name, str) STS_INIT_WITH_HASH(name, str, 0)
82
83// Let's define a convenient hash function for hashing 'normal'
84// strings: char* and string. We'll use MurmurHash, which is probably
85// better than the STL default. We don't include TemplateString or
86// StaticTemplateString here, since they are hashed more efficiently
87// based on their id.
88struct StringHash {
89 inline size_t operator()(const char* s) const {
90 return Hash(s, strlen(s));
91 };
92
93 inline size_t operator()(const std::string& s) const {
94 return Hash(s.data(), s.size());
95 }
96
97 inline bool operator()(const char* a, const char* b) const {
98 return (a != b) && (strcmp(a, b) < 0); // <, for MSVC
99 }
100
101 inline bool operator()(const std::string& a, const std::string& b) const {
102 return a < b;
103 }
104
105 static const size_t bucket_size = 4; // These are required by MSVC
106 static const size_t min_buckets = 8; // 4 and 8 are the defaults
107 private:
108 size_t Hash(const char* s, size_t slen) const;
109};
110
111// ----------------------- THE CLASSES -------------------------------
112
113typedef u_int64_t TemplateId;
114
115const TemplateId kIllegalTemplateId = 0;
116
117struct StaticTemplateString {
118 // Do not define a constructor! We use only brace-initialization,
119 // so the data is constructed at static-initialization time.
120 // Anything you want to put in a constructor, put in
121 // StaticTemplateStringInitializer instead.
122
123 // These members shouldn't be accessed directly, except in the
124 // internals of the template code. They are public because that is
125 // the only way we can brace-initialize them. NOTE: MSVC (at least
126 // up to 8.0) has a bug where it ignores 'mutable' when it's buried
127 // in an internal struct. To fix that, we have to make this whole
128 // internal struct mutable. We only do this on MSVC, so on other
129 // compilers we get the full constness we want.
130#ifdef _MSC_VER
131 mutable
132#endif
133 struct {
134 const char* ptr_;
135 size_t length_;
136 mutable TemplateId id_; // sometimes lazily-initialized.
137 } do_not_use_directly_;
138
139 // This class is a good hash functor to pass in as the third
140 // argument to std::tr1::unordered_map<>, when creating a map whose keys are
141 // StaticTemplateString. NOTE: This class isn't that safe to use,
142 // because it requires that StaticTemplateStringInitializer has done
143 // its job. Unfortunately, even when you use the STS_INIT macro
144 // (which is always, right??), dynamic initialiation does not happen
145 // in a particular order, and objects in different .cc files may
146 // reference a StaticTemplateString before the corresponding
147 // StaticTemplateStringInitializer sets the id.
148 struct Hasher {
149 inline size_t operator()(const StaticTemplateString& sts) const;
150 inline bool operator()(const StaticTemplateString& a, // <, for MSVC
151 const StaticTemplateString& b) const;
152 static const size_t bucket_size = 4; // These are required by MSVC
153 static const size_t min_buckets = 8; // 4 and 8 are the defaults
154 };
155
156 inline bool empty() const {
157 return do_not_use_directly_.length_ == 0;
158 }
159
160 // Allows comparisons of StaticTemplateString objects as if they were
161 // strings. This is useful for STL.
162 inline bool operator==(const StaticTemplateString& x) const;
163};
164
165class TemplateString {
166 public:
167 TemplateString(const char* s)
168 : ptr_(s ? s : ""), length_(strlen(ptr_)),
169 is_immutable_(InTextSegment(ptr_)), id_(kIllegalTemplateId) {
170 }
171 TemplateString(const std::string& s)
172 : ptr_(s.data()), length_(s.size()),
173 is_immutable_(false), id_(kIllegalTemplateId) {
174 }
175 TemplateString(const char* s, size_t slen)
176 : ptr_(s), length_(slen),
177 is_immutable_(InTextSegment(s)), id_(kIllegalTemplateId) {
178 }
179 TemplateString(const StaticTemplateString& s)
180 : ptr_(s.do_not_use_directly_.ptr_),
181 length_(s.do_not_use_directly_.length_),
182 is_immutable_(true), id_(s.do_not_use_directly_.id_) {
183 }
184
185 const char* begin() const {
186 return ptr_;
187 }
188
189 const char* end() const {
190 return ptr_ + length_;
191 }
192
193 const char* data() const {
194 return ptr_;
195 }
196
197 size_t size() const {
198 return length_;
199 }
200
201 inline bool empty() const {
202 return length_ == 0;
203 };
204
205 inline bool is_immutable() const {
206 return is_immutable_;
207 }
208
209 // STL requires this to be public for hash_map, though I'd rather not.
210 inline bool operator==(const TemplateString& x) const {
211 return GetGlobalId() == x.GetGlobalId();
212 }
213
214 private:
215 // Only TemplateDictionaries and template expansion code can read these.
216 friend class TemplateDictionary;
217 friend class TemplateCache; // for GetGlobalId
218 friend class StaticTemplateStringInitializer; // for AddToGlo...
219 friend struct TemplateStringHasher; // for GetGlobalId
220 friend TemplateId GlobalIdForTest(const char* ptr, int len);
221 friend TemplateId GlobalIdForSTS_INIT(const TemplateString& s);
222
223 TemplateString(const char* s, size_t slen, bool is_immutable, TemplateId id)
224 : ptr_(s), length_(slen), is_immutable_(is_immutable), id_(id) {
225 }
226
227 // This returns true if s is in the .text segment of the binary.
228 // (Note this only checks .text of the main executable, not of
229 // shared libraries. So it may not be all that useful.)
230 // This requires the gnu linker (and probably elf), to define
231 // _start and data_start.
232 static bool InTextSegment(const char* s) {
233#if 1
234 return (s >= _start && s < data_start); // in .text
235#else
236 return false; // the conservative choice: assume it's not static memory
237#endif
238 }
239
240 protected:
241 inline void CacheGlobalId() { // used by HashedTemplateString
242 id_ = GetGlobalId();
243 };
244
245 private:
246 // Returns the global id, computing it for the first time if
247 // necessary. Note that since this is a const method, we don't
248 // store the computed value in id_, even if id_ is 0.
249 TemplateId GetGlobalId() const;
250 // Adds this TemplateString to the map from global-id to name.
251 void AddToGlobalIdToNameMap();
252
253 // Use sparingly. Converting to a string loses information about the
254 // id of the template string, making operations require extra hash
255 // computations.
256 std::string ToString() const { return std::string(ptr_, length_); }
257
258 // Does the reverse map from TemplateId to TemplateString contents.
259 // Returns a TemplateString(kStsEmpty) if id isn't found. Note that
260 // the TemplateString returned is not necessarily NUL terminated.
261 static TemplateString IdToString(TemplateId id);
262
263 const char* ptr_;
264 size_t length_;
265 // Do we need to manage memory for this string?
266 bool is_immutable_;
267 // Id for hash lookups. If 0, we don't have one and it should be
268 // computed as-needed.
269 TemplateId id_;
270};
271
272// ----------------------- THE CODE -------------------------------
273
274// Use the low-bit from TemplateId as the "initialized" flag. Note
275// that since all initialized TemplateId have the lower bit set, it's
276// safe to have used 0 for kIllegalTemplateId, as we did above.
277const TemplateId kTemplateStringInitializedFlag = 1;
278
279inline bool IsTemplateIdInitialized(TemplateId id) {
280 return id & kTemplateStringInitializedFlag;
281}
282
283// This is a helper struct used in TemplateString::Hasher/TemplateStringHasher
284struct TemplateIdHasher {
285 size_t operator()(TemplateId id) const {
286 // The shift has two effects: it randomizes the "initialized" flag,
287 // and slightly improves the randomness of the low bits. This is
288 // slightly useful when size_t is 32 bits, or when using a small
289 // hash tables with power-of-2 sizes.
290 return static_cast<size_t>(id ^ (id >> 33));
291 }
292 bool operator()(TemplateId a, TemplateId b) const { // <, for MSVC
293 return a < b;
294 }
295 static const size_t bucket_size = 4; // These are required by MSVC
296 static const size_t min_buckets = 8; // 4 and 8 are the defaults
297};
298
299
300inline size_t StaticTemplateString::Hasher::operator()(
301 const StaticTemplateString& sts) const {
302 TemplateId id = sts.do_not_use_directly_.id_;
303 assert(IsTemplateIdInitialized(id));
304 return TemplateIdHasher()(id);
305}
306
307inline bool StaticTemplateString::Hasher::operator()(
308 const StaticTemplateString& a, const StaticTemplateString& b) const {
309 TemplateId id_a = a.do_not_use_directly_.id_;
310 TemplateId id_b = b.do_not_use_directly_.id_;
311 assert(IsTemplateIdInitialized(id_a));
312 assert(IsTemplateIdInitialized(id_b));
313 return TemplateIdHasher()(id_a, id_b);
314}
315
316inline bool StaticTemplateString::operator==(
317 const StaticTemplateString& x) const {
318 return (do_not_use_directly_.length_ == x.do_not_use_directly_.length_ &&
319 (do_not_use_directly_.ptr_ == x.do_not_use_directly_.ptr_ ||
320 memcmp(do_not_use_directly_.ptr_, x.do_not_use_directly_.ptr_,
321 do_not_use_directly_.length_) == 0));
322}
323
324// We set up as much of StaticTemplateString as we can at
325// static-initialization time (using brace-initialization), but some
326// things can't be set up then. This class is for those things; it
327// runs at dynamic-initialization time. If you add logic here, only
328// do so as an optimization: this may be called rather late (though
329// before main), so other code should not depend on this being called
330// before them.
331class StaticTemplateStringInitializer {
332 public:
333 // This constructor operates on a const StaticTemplateString - we should
334 // only change those things that are mutable.
335 explicit StaticTemplateStringInitializer(const StaticTemplateString* sts);
336};
337
338// Don't use this. This is used only in auto-generated .varnames.h files.
339#define STS_INIT_WITH_HASH(name, str, hash) \
340 { { str, sizeof("" str "")-1, hash } }; \
341 namespace ctemplate_sts_init { \
342 static const ::ctemplate::StaticTemplateStringInitializer name##_init(&name); \
343 }
344
345// We computed this hash value for the empty string online. In debug
346// mode, we verify it's correct during runtime (that is, that we
347// verify the hash function used by make_tpl_varnames_h hasn't changed
348// since we computed this number). Note this struct is logically
349// static, but since it's in a .h file, we don't say 'static' but
350// instead rely on the linker to provide the POD-with-internal-linkage
351// magic.
352const StaticTemplateString kStsEmpty =
353 STS_INIT_WITH_HASH(kStsEmpty, "", 1457976849674613049ULL);
354
355}
356
357#endif // TEMPLATE_TEMPLATE_STRING_H_