Brian Silverman | 70325d6 | 2015-09-20 17:00:43 -0400 | [diff] [blame] | 1 | // Copyright (c) 2008, Google Inc. |
| 2 | // All rights reserved. |
| 3 | // |
| 4 | // Redistribution and use in source and binary forms, with or without |
| 5 | // modification, are permitted provided that the following conditions are |
| 6 | // met: |
| 7 | // |
| 8 | // * Redistributions of source code must retain the above copyright |
| 9 | // notice, this list of conditions and the following disclaimer. |
| 10 | // * Redistributions in binary form must reproduce the above |
| 11 | // copyright notice, this list of conditions and the following disclaimer |
| 12 | // in the documentation and/or other materials provided with the |
| 13 | // distribution. |
| 14 | // * Neither the name of Google Inc. nor the names of its |
| 15 | // contributors may be used to endorse or promote products derived from |
| 16 | // this software without specific prior written permission. |
| 17 | // |
| 18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 29 | |
| 30 | // --- |
| 31 | // Authors: jcrim@google.com (Jay Crim) |
| 32 | // csilvers@google.com (Craig Silverstein) |
| 33 | |
| 34 | #include <config.h> |
| 35 | #include "base/mutex.h" // This has to come first to get _XOPEN_SOURCE |
| 36 | #include <ctemplate/find_ptr.h> |
| 37 | #include <ctemplate/template_string.h> |
| 38 | #include HASH_SET_H |
| 39 | #include "base/arena.h" |
| 40 | #include "base/thread_annotations.h" |
| 41 | #include <assert.h> |
| 42 | #include "base/macros.h" // for uint32, uint64, UNALIGNED_LOAD32 |
| 43 | #include "base/util.h" |
| 44 | |
| 45 | #ifdef HAVE_UNORDERED_MAP |
| 46 | using HASH_NAMESPACE::unordered_set; |
| 47 | // This is totally cheap, but minimizes the need for #ifdef's below... |
| 48 | #define hash_set unordered_set |
| 49 | #else |
| 50 | using HASH_NAMESPACE::hash_set; |
| 51 | #endif |
| 52 | |
| 53 | namespace ctemplate { |
| 54 | |
| 55 | // Based on public domain MurmurHashUnaligned2, by Austin Appleby. |
| 56 | // http://murmurhash.googlepages.com/ |
| 57 | // This variation: |
| 58 | // - interleaves odd/even 32-bit words to improve performance and |
| 59 | // to generate more random bits, |
| 60 | // - has a more complex final mix to combine the 32-bit hashes into |
| 61 | // 64-bits, |
| 62 | // - uses a fixed seed. |
| 63 | // This is not static because template_string_test accesses it directly. |
| 64 | uint64 MurmurHash64(const char* ptr, size_t len) { |
| 65 | const uint32 kMultiplyVal = 0x5bd1e995; |
| 66 | const int kShiftVal = 24; |
| 67 | const uint32 kHashSeed1 = 0xc86b14f7; |
| 68 | const uint32 kHashSeed2 = 0x650f5c4d; |
| 69 | |
| 70 | uint32 h1 = kHashSeed1 ^ len, h2 = kHashSeed2; |
| 71 | while (len >= 8) { |
| 72 | uint32 k1 = UNALIGNED_LOAD32(ptr); |
| 73 | k1 *= kMultiplyVal; |
| 74 | k1 ^= k1 >> kShiftVal; |
| 75 | k1 *= kMultiplyVal; |
| 76 | |
| 77 | h1 *= kMultiplyVal; |
| 78 | h1 ^= k1; |
| 79 | ptr += 4; |
| 80 | |
| 81 | uint32 k2 = UNALIGNED_LOAD32(ptr); |
| 82 | k2 *= kMultiplyVal; |
| 83 | k2 ^= k2 >> kShiftVal; |
| 84 | k2 *= kMultiplyVal; |
| 85 | |
| 86 | h2 *= kMultiplyVal; |
| 87 | h2 ^= k2; |
| 88 | ptr += 4; |
| 89 | |
| 90 | len -= 8; |
| 91 | } |
| 92 | |
| 93 | if (len >= 4) { |
| 94 | uint32 k1 = UNALIGNED_LOAD32(ptr); |
| 95 | k1 *= kMultiplyVal; |
| 96 | k1 ^= k1 >> kShiftVal; |
| 97 | k1 *= kMultiplyVal; |
| 98 | |
| 99 | h1 *= kShiftVal; |
| 100 | h1 ^= k1; |
| 101 | |
| 102 | ptr += 4; |
| 103 | len -= 4; |
| 104 | } |
| 105 | |
| 106 | switch(len) { |
| 107 | case 3: |
| 108 | h2 ^= ptr[2] << 16; // fall through. |
| 109 | case 2: |
| 110 | h2 ^= ptr[1] << 8; // fall through. |
| 111 | case 1: |
| 112 | h2 ^= ptr[0]; // fall through. |
| 113 | default: |
| 114 | h2 *= kMultiplyVal; |
| 115 | } |
| 116 | |
| 117 | h1 ^= h2 >> 18; |
| 118 | h1 *= kMultiplyVal; |
| 119 | h2 ^= h1 >> 22; |
| 120 | h2 *= kMultiplyVal; |
| 121 | h1 ^= h2 >> 17; |
| 122 | h1 *= kMultiplyVal; |
| 123 | |
| 124 | uint64 h = h1; |
| 125 | h = (h << 32) | h2; |
| 126 | return h; |
| 127 | } |
| 128 | |
| 129 | // Unlike StaticTemplateString, it is not a good idea to have a |
| 130 | // default TemplateString::Hasher because TemplateString does not |
| 131 | // provide any lifetime guarantees. The global template_string_set is |
| 132 | // an obvious exception. |
| 133 | struct TemplateStringHasher { |
| 134 | size_t operator()(const TemplateString& ts) const { |
| 135 | TemplateId id = ts.GetGlobalId(); |
| 136 | DCHECK(IsTemplateIdInitialized(id)); |
| 137 | return hasher(id); |
| 138 | } |
| 139 | // Less operator for MSVC's hash containers. |
| 140 | bool operator()(const TemplateString& a, const TemplateString& b) const { |
| 141 | const TemplateId id_a = a.GetGlobalId(); |
| 142 | const TemplateId id_b = b.GetGlobalId(); |
| 143 | assert(IsTemplateIdInitialized(id_a)); |
| 144 | assert(IsTemplateIdInitialized(id_b)); |
| 145 | return hasher(id_a, id_b); |
| 146 | } |
| 147 | TemplateIdHasher hasher; |
| 148 | // These two public members are required by msvc. 4 and 8 are defaults. |
| 149 | static const size_t bucket_size = 4; |
| 150 | static const size_t min_buckets = 8; |
| 151 | }; |
| 152 | |
| 153 | namespace { |
| 154 | Mutex mutex(base::LINKER_INITIALIZED); |
| 155 | |
| 156 | typedef hash_set<TemplateString, TemplateStringHasher> TemplateStringSet; |
| 157 | |
| 158 | TemplateStringSet* template_string_set |
| 159 | GUARDED_BY(mutex) PT_GUARDED_BY(mutex) = NULL; |
| 160 | |
| 161 | UnsafeArena* arena |
| 162 | GUARDED_BY(mutex) PT_GUARDED_BY(mutex) = NULL; |
| 163 | } // unnamed namespace |
| 164 | |
| 165 | |
| 166 | size_t StringHash::Hash(const char* s, size_t slen) const { |
| 167 | return static_cast<size_t>(MurmurHash64(s, slen)); |
| 168 | } |
| 169 | |
| 170 | void TemplateString::AddToGlobalIdToNameMap() LOCKS_EXCLUDED(mutex) { |
| 171 | // shouldn't be calling this if we don't have an id. |
| 172 | CHECK(IsTemplateIdInitialized(id_)); |
| 173 | { |
| 174 | // Check to see if it's already here. |
| 175 | ReaderMutexLock reader_lock(&mutex); |
| 176 | if (template_string_set) { |
| 177 | const TemplateString* iter = |
| 178 | find_ptr0(*template_string_set, *this); |
| 179 | if (iter) { |
| 180 | DCHECK_EQ(TemplateString(ptr_, length_), |
| 181 | TemplateString(iter->ptr_, iter->length_)) |
| 182 | << "TemplateId collision!"; |
| 183 | return; |
| 184 | } |
| 185 | } |
| 186 | } |
| 187 | WriterMutexLock writer_lock(&mutex); |
| 188 | // First initialize our data structures if we need to. |
| 189 | if (!template_string_set) { |
| 190 | template_string_set = new TemplateStringSet; |
| 191 | } |
| 192 | |
| 193 | if (!arena) { |
| 194 | arena = new UnsafeArena(1024); // 1024 was picked out of a hat. |
| 195 | } |
| 196 | |
| 197 | if (template_string_set->count(*this)) { |
| 198 | return; |
| 199 | } |
| 200 | // If we are immutable, we can store ourselves directly in the map. |
| 201 | // Otherwise, we need to make an immutable copy. |
| 202 | if (is_immutable()) { |
| 203 | template_string_set->insert(*this); |
| 204 | } else { |
| 205 | const char* immutable_copy = arena->Memdup(ptr_, length_); |
| 206 | template_string_set->insert( |
| 207 | TemplateString(immutable_copy, length_, true, id_)); |
| 208 | } |
| 209 | } |
| 210 | |
| 211 | TemplateId TemplateString::GetGlobalId() const { |
| 212 | if (IsTemplateIdInitialized(id_)) { |
| 213 | return id_; |
| 214 | } |
| 215 | // Initialize the id and sets the "initialized" flag. |
| 216 | return static_cast<TemplateId>(MurmurHash64(ptr_, length_) | |
| 217 | kTemplateStringInitializedFlag); |
| 218 | } |
| 219 | |
| 220 | // static |
| 221 | TemplateString TemplateString::IdToString(TemplateId id) LOCKS_EXCLUDED(mutex) { |
| 222 | ReaderMutexLock reader_lock(&mutex); |
| 223 | if (!template_string_set) |
| 224 | return TemplateString(kStsEmpty); |
| 225 | // To search the set by TemplateId, we must first construct a dummy |
| 226 | // TemplateString. This may seem weird, but it lets us use a |
| 227 | // hash_set instead of a hash_map. |
| 228 | TemplateString id_as_template_string(NULL, 0, false, id); |
| 229 | const TemplateString* iter = find_ptr0(*template_string_set, id_as_template_string); |
| 230 | return iter ? *iter : TemplateString(kStsEmpty); |
| 231 | } |
| 232 | |
| 233 | StaticTemplateStringInitializer::StaticTemplateStringInitializer( |
| 234 | const StaticTemplateString* sts) { |
| 235 | // Compute the sts's id if it wasn't specified at static-init |
| 236 | // time. If it was specified at static-init time, verify it's |
| 237 | // correct. This is necessary because static-init id's are, by |
| 238 | // nature, pre-computed, and the id-generating algorithm may have |
| 239 | // changed between the time they were computed and now. |
| 240 | if (sts->do_not_use_directly_.id_ == 0) { |
| 241 | sts->do_not_use_directly_.id_ = TemplateString(*sts).GetGlobalId(); |
| 242 | } else { |
| 243 | // Don't use the TemplateString(const StaticTemplateString& sts) |
| 244 | // constructor below, since if we do, GetGlobalId will just return |
| 245 | // sts->do_not_use_directly_.id_ and the check will be pointless. |
| 246 | DCHECK_EQ(TemplateString(sts->do_not_use_directly_.ptr_, |
| 247 | sts->do_not_use_directly_.length_).GetGlobalId(), |
| 248 | sts->do_not_use_directly_.id_); |
| 249 | } |
| 250 | |
| 251 | // Now add this id/name pair to the backwards map from id to name. |
| 252 | TemplateString ts_copy_of_sts(*sts); |
| 253 | ts_copy_of_sts.AddToGlobalIdToNameMap(); |
| 254 | } |
| 255 | |
| 256 | } |