Brian Silverman | cbe6df2 | 2015-09-26 17:32:32 -0400 | [diff] [blame] | 1 | // Copyright (c) 2008, Google Inc. |
| 2 | // All rights reserved. |
| 3 | // |
| 4 | // Redistribution and use in source and binary forms, with or without |
| 5 | // modification, are permitted provided that the following conditions are |
| 6 | // met: |
| 7 | // |
| 8 | // * Redistributions of source code must retain the above copyright |
| 9 | // notice, this list of conditions and the following disclaimer. |
| 10 | // * Redistributions in binary form must reproduce the above |
| 11 | // copyright notice, this list of conditions and the following disclaimer |
| 12 | // in the documentation and/or other materials provided with the |
| 13 | // distribution. |
| 14 | // * Neither the name of Google Inc. nor the names of its |
| 15 | // contributors may be used to endorse or promote products derived from |
| 16 | // this software without specific prior written permission. |
| 17 | // |
| 18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 29 | // |
| 30 | // --- |
| 31 | // Author: csilvers@google.com (Craig Silerstein) |
| 32 | |
| 33 | #ifndef TEMPLATE_TEMPLATE_STRING_H_ |
| 34 | #define TEMPLATE_TEMPLATE_STRING_H_ |
| 35 | |
| 36 | #include <string.h> // for memcmp() and size_t |
| 37 | #include <tr1/unordered_map> |
| 38 | #include <string> |
| 39 | #include <vector> |
| 40 | |
| 41 | #include <assert.h> |
| 42 | #if 1 |
| 43 | #include <stdint.h> // one place @ac_cv_unit64@ might live |
| 44 | #endif |
| 45 | #if 1 |
| 46 | #include <inttypes.h> // another place @ac_cv_unit64@ might live |
| 47 | #endif |
| 48 | #include <sys/types.h> // final place @ac_cv_unit64@ might live |
| 49 | |
| 50 | class TemplateStringTest; // needed for friendship declaration |
| 51 | class StaticTemplateStringTest; |
| 52 | |
| 53 | #if 1 |
| 54 | extern char _start[] __attribute__((weak)); // linker emits: start of .text |
| 55 | extern char data_start[] __attribute__((weak)); // start of .data |
| 56 | #endif |
| 57 | |
| 58 | |
| 59 | |
| 60 | namespace ctemplate { |
| 61 | |
| 62 | // Most methods of TemplateDictionary take a TemplateString rather than a |
| 63 | // C++ string. This is for efficiency: it can avoid extra string copies. |
| 64 | // For any argument that takes a TemplateString, you can pass in any of: |
| 65 | // * A C++ string |
| 66 | // * A char* |
| 67 | // * A StringPiece |
| 68 | // * TemplateString(char*, length) |
| 69 | // The last of these is the most efficient, though it requires more work |
| 70 | // on the call site (you have to create the TemplateString explicitly). |
| 71 | class TemplateString; |
| 72 | |
| 73 | // If you have a string constant (e.g. the string literal "foo") that |
| 74 | // you need to pass into template routines repeatedly, it is more |
| 75 | // efficient if you convert it into a TemplateString only once. The |
| 76 | // way to do this is to use a global StaticTemplateString via STS_INIT |
| 77 | // (note: do this at global scope *only*!): |
| 78 | // static const StaticTemplateString kMyVar = STS_INIT(kMyVar, "MY_VALUE"); |
| 79 | struct StaticTemplateString; |
| 80 | |
| 81 | #define STS_INIT(name, str) STS_INIT_WITH_HASH(name, str, 0) |
| 82 | |
| 83 | // Let's define a convenient hash function for hashing 'normal' |
| 84 | // strings: char* and string. We'll use MurmurHash, which is probably |
| 85 | // better than the STL default. We don't include TemplateString or |
| 86 | // StaticTemplateString here, since they are hashed more efficiently |
| 87 | // based on their id. |
| 88 | struct StringHash { |
| 89 | inline size_t operator()(const char* s) const { |
| 90 | return Hash(s, strlen(s)); |
| 91 | }; |
| 92 | |
| 93 | inline size_t operator()(const std::string& s) const { |
| 94 | return Hash(s.data(), s.size()); |
| 95 | } |
| 96 | |
| 97 | inline bool operator()(const char* a, const char* b) const { |
| 98 | return (a != b) && (strcmp(a, b) < 0); // <, for MSVC |
| 99 | } |
| 100 | |
| 101 | inline bool operator()(const std::string& a, const std::string& b) const { |
| 102 | return a < b; |
| 103 | } |
| 104 | |
| 105 | static const size_t bucket_size = 4; // These are required by MSVC |
| 106 | static const size_t min_buckets = 8; // 4 and 8 are the defaults |
| 107 | private: |
| 108 | size_t Hash(const char* s, size_t slen) const; |
| 109 | }; |
| 110 | |
| 111 | // ----------------------- THE CLASSES ------------------------------- |
| 112 | |
| 113 | typedef u_int64_t TemplateId; |
| 114 | |
| 115 | const TemplateId kIllegalTemplateId = 0; |
| 116 | |
| 117 | struct StaticTemplateString { |
| 118 | // Do not define a constructor! We use only brace-initialization, |
| 119 | // so the data is constructed at static-initialization time. |
| 120 | // Anything you want to put in a constructor, put in |
| 121 | // StaticTemplateStringInitializer instead. |
| 122 | |
| 123 | // These members shouldn't be accessed directly, except in the |
| 124 | // internals of the template code. They are public because that is |
| 125 | // the only way we can brace-initialize them. NOTE: MSVC (at least |
| 126 | // up to 8.0) has a bug where it ignores 'mutable' when it's buried |
| 127 | // in an internal struct. To fix that, we have to make this whole |
| 128 | // internal struct mutable. We only do this on MSVC, so on other |
| 129 | // compilers we get the full constness we want. |
| 130 | #ifdef _MSC_VER |
| 131 | mutable |
| 132 | #endif |
| 133 | struct { |
| 134 | const char* ptr_; |
| 135 | size_t length_; |
| 136 | mutable TemplateId id_; // sometimes lazily-initialized. |
| 137 | } do_not_use_directly_; |
| 138 | |
| 139 | // This class is a good hash functor to pass in as the third |
| 140 | // argument to std::tr1::unordered_map<>, when creating a map whose keys are |
| 141 | // StaticTemplateString. NOTE: This class isn't that safe to use, |
| 142 | // because it requires that StaticTemplateStringInitializer has done |
| 143 | // its job. Unfortunately, even when you use the STS_INIT macro |
| 144 | // (which is always, right??), dynamic initialiation does not happen |
| 145 | // in a particular order, and objects in different .cc files may |
| 146 | // reference a StaticTemplateString before the corresponding |
| 147 | // StaticTemplateStringInitializer sets the id. |
| 148 | struct Hasher { |
| 149 | inline size_t operator()(const StaticTemplateString& sts) const; |
| 150 | inline bool operator()(const StaticTemplateString& a, // <, for MSVC |
| 151 | const StaticTemplateString& b) const; |
| 152 | static const size_t bucket_size = 4; // These are required by MSVC |
| 153 | static const size_t min_buckets = 8; // 4 and 8 are the defaults |
| 154 | }; |
| 155 | |
| 156 | inline bool empty() const { |
| 157 | return do_not_use_directly_.length_ == 0; |
| 158 | } |
| 159 | |
| 160 | // Allows comparisons of StaticTemplateString objects as if they were |
| 161 | // strings. This is useful for STL. |
| 162 | inline bool operator==(const StaticTemplateString& x) const; |
| 163 | }; |
| 164 | |
| 165 | class TemplateString { |
| 166 | public: |
| 167 | TemplateString(const char* s) |
| 168 | : ptr_(s ? s : ""), length_(strlen(ptr_)), |
| 169 | is_immutable_(InTextSegment(ptr_)), id_(kIllegalTemplateId) { |
| 170 | } |
| 171 | TemplateString(const std::string& s) |
| 172 | : ptr_(s.data()), length_(s.size()), |
| 173 | is_immutable_(false), id_(kIllegalTemplateId) { |
| 174 | } |
| 175 | TemplateString(const char* s, size_t slen) |
| 176 | : ptr_(s), length_(slen), |
| 177 | is_immutable_(InTextSegment(s)), id_(kIllegalTemplateId) { |
| 178 | } |
| 179 | TemplateString(const StaticTemplateString& s) |
| 180 | : ptr_(s.do_not_use_directly_.ptr_), |
| 181 | length_(s.do_not_use_directly_.length_), |
| 182 | is_immutable_(true), id_(s.do_not_use_directly_.id_) { |
| 183 | } |
| 184 | |
| 185 | const char* begin() const { |
| 186 | return ptr_; |
| 187 | } |
| 188 | |
| 189 | const char* end() const { |
| 190 | return ptr_ + length_; |
| 191 | } |
| 192 | |
| 193 | const char* data() const { |
| 194 | return ptr_; |
| 195 | } |
| 196 | |
| 197 | size_t size() const { |
| 198 | return length_; |
| 199 | } |
| 200 | |
| 201 | inline bool empty() const { |
| 202 | return length_ == 0; |
| 203 | }; |
| 204 | |
| 205 | inline bool is_immutable() const { |
| 206 | return is_immutable_; |
| 207 | } |
| 208 | |
| 209 | // STL requires this to be public for hash_map, though I'd rather not. |
| 210 | inline bool operator==(const TemplateString& x) const { |
| 211 | return GetGlobalId() == x.GetGlobalId(); |
| 212 | } |
| 213 | |
| 214 | private: |
| 215 | // Only TemplateDictionaries and template expansion code can read these. |
| 216 | friend class TemplateDictionary; |
| 217 | friend class TemplateCache; // for GetGlobalId |
| 218 | friend class StaticTemplateStringInitializer; // for AddToGlo... |
| 219 | friend struct TemplateStringHasher; // for GetGlobalId |
| 220 | friend TemplateId GlobalIdForTest(const char* ptr, int len); |
| 221 | friend TemplateId GlobalIdForSTS_INIT(const TemplateString& s); |
| 222 | |
| 223 | TemplateString(const char* s, size_t slen, bool is_immutable, TemplateId id) |
| 224 | : ptr_(s), length_(slen), is_immutable_(is_immutable), id_(id) { |
| 225 | } |
| 226 | |
| 227 | // This returns true if s is in the .text segment of the binary. |
| 228 | // (Note this only checks .text of the main executable, not of |
| 229 | // shared libraries. So it may not be all that useful.) |
| 230 | // This requires the gnu linker (and probably elf), to define |
| 231 | // _start and data_start. |
| 232 | static bool InTextSegment(const char* s) { |
| 233 | #if 1 |
| 234 | return (s >= _start && s < data_start); // in .text |
| 235 | #else |
| 236 | return false; // the conservative choice: assume it's not static memory |
| 237 | #endif |
| 238 | } |
| 239 | |
| 240 | protected: |
| 241 | inline void CacheGlobalId() { // used by HashedTemplateString |
| 242 | id_ = GetGlobalId(); |
| 243 | }; |
| 244 | |
| 245 | private: |
| 246 | // Returns the global id, computing it for the first time if |
| 247 | // necessary. Note that since this is a const method, we don't |
| 248 | // store the computed value in id_, even if id_ is 0. |
| 249 | TemplateId GetGlobalId() const; |
| 250 | // Adds this TemplateString to the map from global-id to name. |
| 251 | void AddToGlobalIdToNameMap(); |
| 252 | |
| 253 | // Use sparingly. Converting to a string loses information about the |
| 254 | // id of the template string, making operations require extra hash |
| 255 | // computations. |
| 256 | std::string ToString() const { return std::string(ptr_, length_); } |
| 257 | |
| 258 | // Does the reverse map from TemplateId to TemplateString contents. |
| 259 | // Returns a TemplateString(kStsEmpty) if id isn't found. Note that |
| 260 | // the TemplateString returned is not necessarily NUL terminated. |
| 261 | static TemplateString IdToString(TemplateId id); |
| 262 | |
| 263 | const char* ptr_; |
| 264 | size_t length_; |
| 265 | // Do we need to manage memory for this string? |
| 266 | bool is_immutable_; |
| 267 | // Id for hash lookups. If 0, we don't have one and it should be |
| 268 | // computed as-needed. |
| 269 | TemplateId id_; |
| 270 | }; |
| 271 | |
| 272 | // ----------------------- THE CODE ------------------------------- |
| 273 | |
| 274 | // Use the low-bit from TemplateId as the "initialized" flag. Note |
| 275 | // that since all initialized TemplateId have the lower bit set, it's |
| 276 | // safe to have used 0 for kIllegalTemplateId, as we did above. |
| 277 | const TemplateId kTemplateStringInitializedFlag = 1; |
| 278 | |
| 279 | inline bool IsTemplateIdInitialized(TemplateId id) { |
| 280 | return id & kTemplateStringInitializedFlag; |
| 281 | } |
| 282 | |
| 283 | // This is a helper struct used in TemplateString::Hasher/TemplateStringHasher |
| 284 | struct TemplateIdHasher { |
| 285 | size_t operator()(TemplateId id) const { |
| 286 | // The shift has two effects: it randomizes the "initialized" flag, |
| 287 | // and slightly improves the randomness of the low bits. This is |
| 288 | // slightly useful when size_t is 32 bits, or when using a small |
| 289 | // hash tables with power-of-2 sizes. |
| 290 | return static_cast<size_t>(id ^ (id >> 33)); |
| 291 | } |
| 292 | bool operator()(TemplateId a, TemplateId b) const { // <, for MSVC |
| 293 | return a < b; |
| 294 | } |
| 295 | static const size_t bucket_size = 4; // These are required by MSVC |
| 296 | static const size_t min_buckets = 8; // 4 and 8 are the defaults |
| 297 | }; |
| 298 | |
| 299 | |
| 300 | inline size_t StaticTemplateString::Hasher::operator()( |
| 301 | const StaticTemplateString& sts) const { |
| 302 | TemplateId id = sts.do_not_use_directly_.id_; |
| 303 | assert(IsTemplateIdInitialized(id)); |
| 304 | return TemplateIdHasher()(id); |
| 305 | } |
| 306 | |
| 307 | inline bool StaticTemplateString::Hasher::operator()( |
| 308 | const StaticTemplateString& a, const StaticTemplateString& b) const { |
| 309 | TemplateId id_a = a.do_not_use_directly_.id_; |
| 310 | TemplateId id_b = b.do_not_use_directly_.id_; |
| 311 | assert(IsTemplateIdInitialized(id_a)); |
| 312 | assert(IsTemplateIdInitialized(id_b)); |
| 313 | return TemplateIdHasher()(id_a, id_b); |
| 314 | } |
| 315 | |
| 316 | inline bool StaticTemplateString::operator==( |
| 317 | const StaticTemplateString& x) const { |
| 318 | return (do_not_use_directly_.length_ == x.do_not_use_directly_.length_ && |
| 319 | (do_not_use_directly_.ptr_ == x.do_not_use_directly_.ptr_ || |
| 320 | memcmp(do_not_use_directly_.ptr_, x.do_not_use_directly_.ptr_, |
| 321 | do_not_use_directly_.length_) == 0)); |
| 322 | } |
| 323 | |
| 324 | // We set up as much of StaticTemplateString as we can at |
| 325 | // static-initialization time (using brace-initialization), but some |
| 326 | // things can't be set up then. This class is for those things; it |
| 327 | // runs at dynamic-initialization time. If you add logic here, only |
| 328 | // do so as an optimization: this may be called rather late (though |
| 329 | // before main), so other code should not depend on this being called |
| 330 | // before them. |
| 331 | class StaticTemplateStringInitializer { |
| 332 | public: |
| 333 | // This constructor operates on a const StaticTemplateString - we should |
| 334 | // only change those things that are mutable. |
| 335 | explicit StaticTemplateStringInitializer(const StaticTemplateString* sts); |
| 336 | }; |
| 337 | |
| 338 | // Don't use this. This is used only in auto-generated .varnames.h files. |
| 339 | #define STS_INIT_WITH_HASH(name, str, hash) \ |
| 340 | { { str, sizeof("" str "")-1, hash } }; \ |
| 341 | namespace ctemplate_sts_init { \ |
| 342 | static const ::ctemplate::StaticTemplateStringInitializer name##_init(&name); \ |
| 343 | } |
| 344 | |
| 345 | // We computed this hash value for the empty string online. In debug |
| 346 | // mode, we verify it's correct during runtime (that is, that we |
| 347 | // verify the hash function used by make_tpl_varnames_h hasn't changed |
| 348 | // since we computed this number). Note this struct is logically |
| 349 | // static, but since it's in a .h file, we don't say 'static' but |
| 350 | // instead rely on the linker to provide the POD-with-internal-linkage |
| 351 | // magic. |
| 352 | const StaticTemplateString kStsEmpty = |
| 353 | STS_INIT_WITH_HASH(kStsEmpty, "", 1457976849674613049ULL); |
| 354 | |
| 355 | } |
| 356 | |
| 357 | #endif // TEMPLATE_TEMPLATE_STRING_H_ |