blob: f1bd38b14affae88e66e0e9d3cced0cfecd17e22 [file] [log] [blame]
Brian Silverman70325d62015-09-20 17:00:43 -04001// Copyright (c) 2008, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8// * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14// * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29//
30// ---
31// Author: csilvers@google.com (Craig Silerstein)
32
33#ifndef TEMPLATE_TEMPLATE_STRING_H_
34#define TEMPLATE_TEMPLATE_STRING_H_
35
36#include <string.h> // for memcmp() and size_t
37#include <hash_map>
38#include <string>
39#include <vector>
40
41#include <assert.h>
42#if 0
43#include <stdint.h> // one place @ac_cv_unit64@ might live
44#endif
45#if 0
46#include <inttypes.h> // another place @ac_cv_unit64@ might live
47#endif
48#include <sys/types.h> // final place @ac_cv_unit64@ might live
49
50class TemplateStringTest; // needed for friendship declaration
51class StaticTemplateStringTest;
52
53#if 0
54extern char _start[] __attribute__((weak)); // linker emits: start of .text
55extern char data_start[] __attribute__((weak)); // start of .data
56#endif
57
58// NOTE: if you are statically linking the template library into your binary
59// (rather than using the template .dll), set '/D CTEMPLATE_DLL_DECL='
60// as a compiler flag in your project file to turn off the dllimports.
61#ifndef CTEMPLATE_DLL_DECL
62# define CTEMPLATE_DLL_DECL __declspec(dllimport)
63#endif
64
65namespace ctemplate {
66
67// Most methods of TemplateDictionary take a TemplateString rather than a
68// C++ string. This is for efficiency: it can avoid extra string copies.
69// For any argument that takes a TemplateString, you can pass in any of:
70// * A C++ string
71// * A char*
72// * A StringPiece
73// * TemplateString(char*, length)
74// The last of these is the most efficient, though it requires more work
75// on the call site (you have to create the TemplateString explicitly).
76class TemplateString;
77
78// If you have a string constant (e.g. the string literal "foo") that
79// you need to pass into template routines repeatedly, it is more
80// efficient if you convert it into a TemplateString only once. The
81// way to do this is to use a global StaticTemplateString via STS_INIT
82// (note: do this at global scope *only*!):
83// static const StaticTemplateString kMyVar = STS_INIT(kMyVar, "MY_VALUE");
84struct StaticTemplateString;
85
86#define STS_INIT(name, str) STS_INIT_WITH_HASH(name, str, 0)
87
88// Let's define a convenient hash_compare function for hashing 'normal'
89// strings: char* and string. We'll use MurmurHash, which is probably
90// better than the STL default. We don't include TemplateString or
91// StaticTemplateString here, since they are hashed more efficiently
92// based on their id.
93struct CTEMPLATE_DLL_DECL StringHash {
94 inline size_t operator()(const char* s) const {
95 return Hash(s, strlen(s));
96 };
97
98 inline size_t operator()(const std::string& s) const {
99 return Hash(s.data(), s.size());
100 }
101
102 inline bool operator()(const char* a, const char* b) const {
103 return (a != b) && (strcmp(a, b) < 0); // <, for MSVC
104 }
105
106 inline bool operator()(const std::string& a, const std::string& b) const {
107 return a < b;
108 }
109
110 static const size_t bucket_size = 4; // These are required by MSVC
111 static const size_t min_buckets = 8; // 4 and 8 are the defaults
112 private:
113 size_t Hash(const char* s, size_t slen) const;
114};
115
116// ----------------------- THE CLASSES -------------------------------
117
118typedef unsigned __int64 TemplateId;
119
120const TemplateId kIllegalTemplateId = 0;
121
122struct CTEMPLATE_DLL_DECL StaticTemplateString {
123 // Do not define a constructor! We use only brace-initialization,
124 // so the data is constructed at static-initialization time.
125 // Anything you want to put in a constructor, put in
126 // StaticTemplateStringInitializer instead.
127
128 // These members shouldn't be accessed directly, except in the
129 // internals of the template code. They are public because that is
130 // the only way we can brace-initialize them. NOTE: MSVC (at least
131 // up to 8.0) has a bug where it ignores 'mutable' when it's buried
132 // in an internal struct. To fix that, we have to make this whole
133 // internal struct mutable. We only do this on MSVC, so on other
134 // compilers we get the full constness we want.
135#ifdef _MSC_VER
136 mutable
137#endif
138 struct {
139 const char* ptr_;
140 size_t length_;
141 mutable TemplateId id_; // sometimes lazily-initialized.
142 } do_not_use_directly_;
143
144 // This class is a good hash_compare functor to pass in as the third
145 // argument to stdext::hash_map<>, when creating a map whose keys are
146 // StaticTemplateString. NOTE: This class isn't that safe to use,
147 // because it requires that StaticTemplateStringInitializer has done
148 // its job. Unfortunately, even when you use the STS_INIT macro
149 // (which is always, right??), dynamic initialiation does not happen
150 // in a particular order, and objects in different .cc files may
151 // reference a StaticTemplateString before the corresponding
152 // StaticTemplateStringInitializer sets the id.
153 struct Hasher {
154 inline size_t operator()(const StaticTemplateString& sts) const;
155 inline bool operator()(const StaticTemplateString& a, // <, for MSVC
156 const StaticTemplateString& b) const;
157 static const size_t bucket_size = 4; // These are required by MSVC
158 static const size_t min_buckets = 8; // 4 and 8 are the defaults
159 };
160
161 inline bool empty() const {
162 return do_not_use_directly_.length_ == 0;
163 }
164
165 // Allows comparisons of StaticTemplateString objects as if they were
166 // strings. This is useful for STL.
167 inline bool operator==(const StaticTemplateString& x) const;
168};
169
170class CTEMPLATE_DLL_DECL TemplateString {
171 public:
172 TemplateString(const char* s)
173 : ptr_(s ? s : ""), length_(strlen(ptr_)),
174 is_immutable_(InTextSegment(ptr_)), id_(kIllegalTemplateId) {
175 }
176 TemplateString(const std::string& s)
177 : ptr_(s.data()), length_(s.size()),
178 is_immutable_(false), id_(kIllegalTemplateId) {
179 }
180 TemplateString(const char* s, size_t slen)
181 : ptr_(s), length_(slen),
182 is_immutable_(InTextSegment(s)), id_(kIllegalTemplateId) {
183 }
184 TemplateString(const StaticTemplateString& s)
185 : ptr_(s.do_not_use_directly_.ptr_),
186 length_(s.do_not_use_directly_.length_),
187 is_immutable_(true), id_(s.do_not_use_directly_.id_) {
188 }
189
190 const char* begin() const {
191 return ptr_;
192 }
193
194 const char* end() const {
195 return ptr_ + length_;
196 }
197
198 const char* data() const {
199 return ptr_;
200 }
201
202 size_t size() const {
203 return length_;
204 }
205
206 inline bool empty() const {
207 return length_ == 0;
208 };
209
210 inline bool is_immutable() const {
211 return is_immutable_;
212 }
213
214 // STL requires this to be public for hash_map, though I'd rather not.
215 inline bool operator==(const TemplateString& x) const {
216 return GetGlobalId() == x.GetGlobalId();
217 }
218
219 private:
220 // Only TemplateDictionaries and template expansion code can read these.
221 friend class TemplateDictionary;
222 friend class TemplateCache; // for GetGlobalId
223 friend class StaticTemplateStringInitializer; // for AddToGlo...
224 friend struct TemplateStringHasher; // for GetGlobalId
225 friend TemplateId GlobalIdForTest(const char* ptr, int len);
226 friend TemplateId GlobalIdForSTS_INIT(const TemplateString& s);
227
228 TemplateString(const char* s, size_t slen, bool is_immutable, TemplateId id)
229 : ptr_(s), length_(slen), is_immutable_(is_immutable), id_(id) {
230 }
231
232 // This returns true if s is in the .text segment of the binary.
233 // (Note this only checks .text of the main executable, not of
234 // shared libraries. So it may not be all that useful.)
235 // This requires the gnu linker (and probably elf), to define
236 // _start and data_start.
237 static bool InTextSegment(const char* s) {
238#if 0
239 return (s >= _start && s < data_start); // in .text
240#else
241 return false; // the conservative choice: assume it's not static memory
242#endif
243 }
244
245 protected:
246 inline void CacheGlobalId() { // used by HashedTemplateString
247 id_ = GetGlobalId();
248 };
249
250 private:
251 // Returns the global id, computing it for the first time if
252 // necessary. Note that since this is a const method, we don't
253 // store the computed value in id_, even if id_ is 0.
254 TemplateId GetGlobalId() const;
255 // Adds this TemplateString to the map from global-id to name.
256 void AddToGlobalIdToNameMap();
257
258 // Use sparingly. Converting to a string loses information about the
259 // id of the template string, making operations require extra hash_compare
260 // computations.
261 std::string ToString() const { return std::string(ptr_, length_); }
262
263 // Does the reverse map from TemplateId to TemplateString contents.
264 // Returns a TemplateString(kStsEmpty) if id isn't found. Note that
265 // the TemplateString returned is not necessarily NUL terminated.
266 static TemplateString IdToString(TemplateId id);
267
268 const char* ptr_;
269 size_t length_;
270 // Do we need to manage memory for this string?
271 bool is_immutable_;
272 // Id for hash_compare lookups. If 0, we don't have one and it should be
273 // computed as-needed.
274 TemplateId id_;
275};
276
277// ----------------------- THE CODE -------------------------------
278
279// Use the low-bit from TemplateId as the "initialized" flag. Note
280// that since all initialized TemplateId have the lower bit set, it's
281// safe to have used 0 for kIllegalTemplateId, as we did above.
282const TemplateId kTemplateStringInitializedFlag = 1;
283
284inline bool IsTemplateIdInitialized(TemplateId id) {
285 return id & kTemplateStringInitializedFlag;
286}
287
288// This is a helper struct used in TemplateString::Hasher/TemplateStringHasher
289struct TemplateIdHasher {
290 size_t operator()(TemplateId id) const {
291 // The shift has two effects: it randomizes the "initialized" flag,
292 // and slightly improves the randomness of the low bits. This is
293 // slightly useful when size_t is 32 bits, or when using a small
294 // hash_compare tables with power-of-2 sizes.
295 return static_cast<size_t>(id ^ (id >> 33));
296 }
297 bool operator()(TemplateId a, TemplateId b) const { // <, for MSVC
298 return a < b;
299 }
300 static const size_t bucket_size = 4; // These are required by MSVC
301 static const size_t min_buckets = 8; // 4 and 8 are the defaults
302};
303
304
305inline size_t StaticTemplateString::Hasher::operator()(
306 const StaticTemplateString& sts) const {
307 TemplateId id = sts.do_not_use_directly_.id_;
308 assert(IsTemplateIdInitialized(id));
309 return TemplateIdHasher()(id);
310}
311
312inline bool StaticTemplateString::Hasher::operator()(
313 const StaticTemplateString& a, const StaticTemplateString& b) const {
314 TemplateId id_a = a.do_not_use_directly_.id_;
315 TemplateId id_b = b.do_not_use_directly_.id_;
316 assert(IsTemplateIdInitialized(id_a));
317 assert(IsTemplateIdInitialized(id_b));
318 return TemplateIdHasher()(id_a, id_b);
319}
320
321inline bool StaticTemplateString::operator==(
322 const StaticTemplateString& x) const {
323 return (do_not_use_directly_.length_ == x.do_not_use_directly_.length_ &&
324 (do_not_use_directly_.ptr_ == x.do_not_use_directly_.ptr_ ||
325 memcmp(do_not_use_directly_.ptr_, x.do_not_use_directly_.ptr_,
326 do_not_use_directly_.length_) == 0));
327}
328
329// We set up as much of StaticTemplateString as we can at
330// static-initialization time (using brace-initialization), but some
331// things can't be set up then. This class is for those things; it
332// runs at dynamic-initialization time. If you add logic here, only
333// do so as an optimization: this may be called rather late (though
334// before main), so other code should not depend on this being called
335// before them.
336class CTEMPLATE_DLL_DECL StaticTemplateStringInitializer {
337 public:
338 // This constructor operates on a const StaticTemplateString - we should
339 // only change those things that are mutable.
340 explicit StaticTemplateStringInitializer(const StaticTemplateString* sts);
341};
342
343// Don't use this. This is used only in auto-generated .varnames.h files.
344#define STS_INIT_WITH_HASH(name, str, hash_compare) \
345 { { str, sizeof(""str"")-1, hash_compare } }; \
346 namespace ctemplate_sts_init { \
347 static const ctemplate::StaticTemplateStringInitializer name##_init(&name); \
348 }
349
350// We computed this hash_compare value for the empty string online. In debug
351// mode, we verify it's correct during runtime (that is, that we
352// verify the hash_compare function used by make_tpl_varnames_h hasn't changed
353// since we computed this number). Note this struct is logically
354// static, but since it's in a .h file, we don't say 'static' but
355// instead rely on the linker to provide the POD-with-internal-linkage
356// magic.
357const StaticTemplateString kStsEmpty =
358 STS_INIT_WITH_HASH(kStsEmpty, "", 1457976849674613049ULL);
359
360}
361
362
363#endif // TEMPLATE_TEMPLATE_STRING_H_