Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 1 | // -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- |
| 2 | // Copyright (c) 2005, Google Inc. |
| 3 | // All rights reserved. |
| 4 | // |
| 5 | // Redistribution and use in source and binary forms, with or without |
| 6 | // modification, are permitted provided that the following conditions are |
| 7 | // met: |
| 8 | // |
| 9 | // * Redistributions of source code must retain the above copyright |
| 10 | // notice, this list of conditions and the following disclaimer. |
| 11 | // * Redistributions in binary form must reproduce the above |
| 12 | // copyright notice, this list of conditions and the following disclaimer |
| 13 | // in the documentation and/or other materials provided with the |
| 14 | // distribution. |
| 15 | // * Neither the name of Google Inc. nor the names of its |
| 16 | // contributors may be used to endorse or promote products derived from |
| 17 | // this software without specific prior written permission. |
| 18 | // |
| 19 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 20 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 21 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 22 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 23 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 24 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 25 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 26 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 27 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 28 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 29 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 30 | |
| 31 | // --- |
| 32 | // Author: Sanjay Ghemawat |
| 33 | // |
| 34 | // A fast map from addresses to values. Assumes that addresses are |
| 35 | // clustered. The main use is intended to be for heap-profiling. |
| 36 | // May be too memory-hungry for other uses. |
| 37 | // |
| 38 | // We use a user-defined allocator/de-allocator so that we can use |
| 39 | // this data structure during heap-profiling. |
| 40 | // |
| 41 | // IMPLEMENTATION DETAIL: |
| 42 | // |
| 43 | // Some default definitions/parameters: |
| 44 | // * Block -- aligned 128-byte region of the address space |
| 45 | // * Cluster -- aligned 1-MB region of the address space |
| 46 | // * Block-ID -- block-number within a cluster |
| 47 | // * Cluster-ID -- Starting address of cluster divided by cluster size |
| 48 | // |
| 49 | // We use a three-level map to represent the state: |
| 50 | // 1. A hash-table maps from a cluster-ID to the data for that cluster. |
| 51 | // 2. For each non-empty cluster we keep an array indexed by |
| 52 | // block-ID tht points to the first entry in the linked-list |
| 53 | // for the block. |
| 54 | // 3. At the bottom, we keep a singly-linked list of all |
| 55 | // entries in a block (for non-empty blocks). |
| 56 | // |
| 57 | // hash table |
| 58 | // +-------------+ |
| 59 | // | id->cluster |---> ... |
| 60 | // | ... | |
| 61 | // | id->cluster |---> Cluster |
| 62 | // +-------------+ +-------+ Data for one block |
| 63 | // | nil | +------------------------------------+ |
| 64 | // | ----+---|->[addr/value]-->[addr/value]-->... | |
| 65 | // | nil | +------------------------------------+ |
| 66 | // | ----+--> ... |
| 67 | // | nil | |
| 68 | // | ... | |
| 69 | // +-------+ |
| 70 | // |
| 71 | // Note that we require zero-bytes of overhead for completely empty |
| 72 | // clusters. The minimum space requirement for a cluster is the size |
| 73 | // of the hash-table entry plus a pointer value for each block in |
| 74 | // the cluster. Empty blocks impose no extra space requirement. |
| 75 | // |
| 76 | // The cost of a lookup is: |
| 77 | // a. A hash-table lookup to find the cluster |
| 78 | // b. An array access in the cluster structure |
| 79 | // c. A traversal over the linked-list for a block |
| 80 | |
| 81 | #ifndef BASE_ADDRESSMAP_INL_H_ |
| 82 | #define BASE_ADDRESSMAP_INL_H_ |
| 83 | |
| 84 | #include "config.h" |
| 85 | #include <stddef.h> |
| 86 | #include <string.h> |
| 87 | #if defined HAVE_STDINT_H |
| 88 | #include <stdint.h> // to get uint16_t (ISO naming madness) |
| 89 | #elif defined HAVE_INTTYPES_H |
| 90 | #include <inttypes.h> // another place uint16_t might be defined |
| 91 | #else |
| 92 | #include <sys/types.h> // our last best hope |
| 93 | #endif |
| 94 | |
| 95 | // This class is thread-unsafe -- that is, instances of this class can |
| 96 | // not be accessed concurrently by multiple threads -- because the |
| 97 | // callback function for Iterate() may mutate contained values. If the |
| 98 | // callback functions you pass do not mutate their Value* argument, |
| 99 | // AddressMap can be treated as thread-compatible -- that is, it's |
| 100 | // safe for multiple threads to call "const" methods on this class, |
| 101 | // but not safe for one thread to call const methods on this class |
| 102 | // while another thread is calling non-const methods on the class. |
| 103 | template <class Value> |
| 104 | class AddressMap { |
| 105 | public: |
| 106 | typedef void* (*Allocator)(size_t size); |
| 107 | typedef void (*DeAllocator)(void* ptr); |
| 108 | typedef const void* Key; |
| 109 | |
| 110 | // Create an AddressMap that uses the specified allocator/deallocator. |
| 111 | // The allocator/deallocator should behave like malloc/free. |
| 112 | // For instance, the allocator does not need to return initialized memory. |
| 113 | AddressMap(Allocator alloc, DeAllocator dealloc); |
| 114 | ~AddressMap(); |
| 115 | |
| 116 | // If the map contains an entry for "key", return it. Else return NULL. |
| 117 | inline const Value* Find(Key key) const; |
| 118 | inline Value* FindMutable(Key key); |
| 119 | |
| 120 | // Insert <key,value> into the map. Any old value associated |
| 121 | // with key is forgotten. |
| 122 | void Insert(Key key, Value value); |
| 123 | |
| 124 | // Remove any entry for key in the map. If an entry was found |
| 125 | // and removed, stores the associated value in "*removed_value" |
| 126 | // and returns true. Else returns false. |
| 127 | bool FindAndRemove(Key key, Value* removed_value); |
| 128 | |
| 129 | // Similar to Find but we assume that keys are addresses of non-overlapping |
| 130 | // memory ranges whose sizes are given by size_func. |
| 131 | // If the map contains a range into which "key" points |
| 132 | // (at its start or inside of it, but not at the end), |
| 133 | // return the address of the associated value |
| 134 | // and store its key in "*res_key". |
| 135 | // Else return NULL. |
| 136 | // max_size specifies largest range size possibly in existence now. |
| 137 | typedef size_t (*ValueSizeFunc)(const Value& v); |
| 138 | const Value* FindInside(ValueSizeFunc size_func, size_t max_size, |
| 139 | Key key, Key* res_key); |
| 140 | |
| 141 | // Iterate over the address map calling 'callback' |
| 142 | // for all stored key-value pairs and passing 'arg' to it. |
| 143 | // We don't use full Closure/Callback machinery not to add |
| 144 | // unnecessary dependencies to this class with low-level uses. |
| 145 | template<class Type> |
| 146 | inline void Iterate(void (*callback)(Key, Value*, Type), Type arg) const; |
| 147 | |
| 148 | private: |
| 149 | typedef uintptr_t Number; |
| 150 | |
| 151 | // The implementation assumes that addresses inserted into the map |
| 152 | // will be clustered. We take advantage of this fact by splitting |
| 153 | // up the address-space into blocks and using a linked-list entry |
| 154 | // for each block. |
| 155 | |
| 156 | // Size of each block. There is one linked-list for each block, so |
| 157 | // do not make the block-size too big. Oterwise, a lot of time |
| 158 | // will be spent traversing linked lists. |
| 159 | static const int kBlockBits = 7; |
| 160 | static const int kBlockSize = 1 << kBlockBits; |
| 161 | |
| 162 | // Entry kept in per-block linked-list |
| 163 | struct Entry { |
| 164 | Entry* next; |
| 165 | Key key; |
| 166 | Value value; |
| 167 | }; |
| 168 | |
| 169 | // We further group a sequence of consecutive blocks into a cluster. |
| 170 | // The data for a cluster is represented as a dense array of |
| 171 | // linked-lists, one list per contained block. |
| 172 | static const int kClusterBits = 13; |
| 173 | static const Number kClusterSize = 1 << (kBlockBits + kClusterBits); |
| 174 | static const int kClusterBlocks = 1 << kClusterBits; |
| 175 | |
| 176 | // We use a simple chaining hash-table to represent the clusters. |
| 177 | struct Cluster { |
| 178 | Cluster* next; // Next cluster in hash table chain |
| 179 | Number id; // Cluster ID |
| 180 | Entry* blocks[kClusterBlocks]; // Per-block linked-lists |
| 181 | }; |
| 182 | |
| 183 | // Number of hash-table entries. With the block-size/cluster-size |
| 184 | // defined above, each cluster covers 1 MB, so an 4K entry |
| 185 | // hash-table will give an average hash-chain length of 1 for 4GB of |
| 186 | // in-use memory. |
| 187 | static const int kHashBits = 12; |
| 188 | static const int kHashSize = 1 << 12; |
| 189 | |
| 190 | // Number of entry objects allocated at a time |
| 191 | static const int ALLOC_COUNT = 64; |
| 192 | |
| 193 | Cluster** hashtable_; // The hash-table |
| 194 | Entry* free_; // Free list of unused Entry objects |
| 195 | |
| 196 | // Multiplicative hash function: |
| 197 | // The value "kHashMultiplier" is the bottom 32 bits of |
| 198 | // int((sqrt(5)-1)/2 * 2^32) |
| 199 | // This is a good multiplier as suggested in CLR, Knuth. The hash |
| 200 | // value is taken to be the top "k" bits of the bottom 32 bits |
| 201 | // of the muliplied value. |
| 202 | static const uint32_t kHashMultiplier = 2654435769u; |
| 203 | static int HashInt(Number x) { |
| 204 | // Multiply by a constant and take the top bits of the result. |
| 205 | const uint32_t m = static_cast<uint32_t>(x) * kHashMultiplier; |
| 206 | return static_cast<int>(m >> (32 - kHashBits)); |
| 207 | } |
| 208 | |
| 209 | // Find cluster object for specified address. If not found |
| 210 | // and "create" is true, create the object. If not found |
| 211 | // and "create" is false, return NULL. |
| 212 | // |
| 213 | // This method is bitwise-const if create is false. |
| 214 | Cluster* FindCluster(Number address, bool create) { |
| 215 | // Look in hashtable |
| 216 | const Number cluster_id = address >> (kBlockBits + kClusterBits); |
| 217 | const int h = HashInt(cluster_id); |
| 218 | for (Cluster* c = hashtable_[h]; c != NULL; c = c->next) { |
| 219 | if (c->id == cluster_id) { |
| 220 | return c; |
| 221 | } |
| 222 | } |
| 223 | |
| 224 | // Create cluster if necessary |
| 225 | if (create) { |
| 226 | Cluster* c = New<Cluster>(1); |
| 227 | c->id = cluster_id; |
| 228 | c->next = hashtable_[h]; |
| 229 | hashtable_[h] = c; |
| 230 | return c; |
| 231 | } |
| 232 | return NULL; |
| 233 | } |
| 234 | |
| 235 | // Return the block ID for an address within its cluster |
| 236 | static int BlockID(Number address) { |
| 237 | return (address >> kBlockBits) & (kClusterBlocks - 1); |
| 238 | } |
| 239 | |
| 240 | //-------------------------------------------------------------- |
| 241 | // Memory management -- we keep all objects we allocate linked |
| 242 | // together in a singly linked list so we can get rid of them |
| 243 | // when we are all done. Furthermore, we allow the client to |
| 244 | // pass in custom memory allocator/deallocator routines. |
| 245 | //-------------------------------------------------------------- |
| 246 | struct Object { |
| 247 | Object* next; |
| 248 | // The real data starts here |
| 249 | }; |
| 250 | |
| 251 | Allocator alloc_; // The allocator |
| 252 | DeAllocator dealloc_; // The deallocator |
| 253 | Object* allocated_; // List of allocated objects |
| 254 | |
| 255 | // Allocates a zeroed array of T with length "num". Also inserts |
| 256 | // the allocated block into a linked list so it can be deallocated |
| 257 | // when we are all done. |
| 258 | template <class T> T* New(int num) { |
| 259 | void* ptr = (*alloc_)(sizeof(Object) + num*sizeof(T)); |
| 260 | memset(ptr, 0, sizeof(Object) + num*sizeof(T)); |
| 261 | Object* obj = reinterpret_cast<Object*>(ptr); |
| 262 | obj->next = allocated_; |
| 263 | allocated_ = obj; |
| 264 | return reinterpret_cast<T*>(reinterpret_cast<Object*>(ptr) + 1); |
| 265 | } |
| 266 | }; |
| 267 | |
| 268 | // More implementation details follow: |
| 269 | |
| 270 | template <class Value> |
| 271 | AddressMap<Value>::AddressMap(Allocator alloc, DeAllocator dealloc) |
| 272 | : free_(NULL), |
| 273 | alloc_(alloc), |
| 274 | dealloc_(dealloc), |
| 275 | allocated_(NULL) { |
| 276 | hashtable_ = New<Cluster*>(kHashSize); |
| 277 | } |
| 278 | |
| 279 | template <class Value> |
| 280 | AddressMap<Value>::~AddressMap() { |
| 281 | // De-allocate all of the objects we allocated |
| 282 | for (Object* obj = allocated_; obj != NULL; /**/) { |
| 283 | Object* next = obj->next; |
| 284 | (*dealloc_)(obj); |
| 285 | obj = next; |
| 286 | } |
| 287 | } |
| 288 | |
| 289 | template <class Value> |
| 290 | inline const Value* AddressMap<Value>::Find(Key key) const { |
| 291 | return const_cast<AddressMap*>(this)->FindMutable(key); |
| 292 | } |
| 293 | |
| 294 | template <class Value> |
| 295 | inline Value* AddressMap<Value>::FindMutable(Key key) { |
| 296 | const Number num = reinterpret_cast<Number>(key); |
| 297 | const Cluster* const c = FindCluster(num, false/*do not create*/); |
| 298 | if (c != NULL) { |
| 299 | for (Entry* e = c->blocks[BlockID(num)]; e != NULL; e = e->next) { |
| 300 | if (e->key == key) { |
| 301 | return &e->value; |
| 302 | } |
| 303 | } |
| 304 | } |
| 305 | return NULL; |
| 306 | } |
| 307 | |
| 308 | template <class Value> |
| 309 | void AddressMap<Value>::Insert(Key key, Value value) { |
| 310 | const Number num = reinterpret_cast<Number>(key); |
| 311 | Cluster* const c = FindCluster(num, true/*create*/); |
| 312 | |
| 313 | // Look in linked-list for this block |
| 314 | const int block = BlockID(num); |
| 315 | for (Entry* e = c->blocks[block]; e != NULL; e = e->next) { |
| 316 | if (e->key == key) { |
| 317 | e->value = value; |
| 318 | return; |
| 319 | } |
| 320 | } |
| 321 | |
| 322 | // Create entry |
| 323 | if (free_ == NULL) { |
| 324 | // Allocate a new batch of entries and add to free-list |
| 325 | Entry* array = New<Entry>(ALLOC_COUNT); |
| 326 | for (int i = 0; i < ALLOC_COUNT-1; i++) { |
| 327 | array[i].next = &array[i+1]; |
| 328 | } |
| 329 | array[ALLOC_COUNT-1].next = free_; |
| 330 | free_ = &array[0]; |
| 331 | } |
| 332 | Entry* e = free_; |
| 333 | free_ = e->next; |
| 334 | e->key = key; |
| 335 | e->value = value; |
| 336 | e->next = c->blocks[block]; |
| 337 | c->blocks[block] = e; |
| 338 | } |
| 339 | |
| 340 | template <class Value> |
| 341 | bool AddressMap<Value>::FindAndRemove(Key key, Value* removed_value) { |
| 342 | const Number num = reinterpret_cast<Number>(key); |
| 343 | Cluster* const c = FindCluster(num, false/*do not create*/); |
| 344 | if (c != NULL) { |
| 345 | for (Entry** p = &c->blocks[BlockID(num)]; *p != NULL; p = &(*p)->next) { |
| 346 | Entry* e = *p; |
| 347 | if (e->key == key) { |
| 348 | *removed_value = e->value; |
| 349 | *p = e->next; // Remove e from linked-list |
| 350 | e->next = free_; // Add e to free-list |
| 351 | free_ = e; |
| 352 | return true; |
| 353 | } |
| 354 | } |
| 355 | } |
| 356 | return false; |
| 357 | } |
| 358 | |
| 359 | template <class Value> |
| 360 | const Value* AddressMap<Value>::FindInside(ValueSizeFunc size_func, |
| 361 | size_t max_size, |
| 362 | Key key, |
| 363 | Key* res_key) { |
| 364 | const Number key_num = reinterpret_cast<Number>(key); |
| 365 | Number num = key_num; // we'll move this to move back through the clusters |
| 366 | while (1) { |
| 367 | const Cluster* c = FindCluster(num, false/*do not create*/); |
| 368 | if (c != NULL) { |
| 369 | while (1) { |
| 370 | const int block = BlockID(num); |
| 371 | bool had_smaller_key = false; |
| 372 | for (const Entry* e = c->blocks[block]; e != NULL; e = e->next) { |
| 373 | const Number e_num = reinterpret_cast<Number>(e->key); |
| 374 | if (e_num <= key_num) { |
| 375 | if (e_num == key_num || // to handle 0-sized ranges |
| 376 | key_num < e_num + (*size_func)(e->value)) { |
| 377 | *res_key = e->key; |
| 378 | return &e->value; |
| 379 | } |
| 380 | had_smaller_key = true; |
| 381 | } |
| 382 | } |
| 383 | if (had_smaller_key) return NULL; // got a range before 'key' |
| 384 | // and it did not contain 'key' |
| 385 | if (block == 0) break; |
| 386 | // try address-wise previous block |
| 387 | num |= kBlockSize - 1; // start at the last addr of prev block |
| 388 | num -= kBlockSize; |
| 389 | if (key_num - num > max_size) return NULL; |
| 390 | } |
| 391 | } |
| 392 | if (num < kClusterSize) return NULL; // first cluster |
| 393 | // go to address-wise previous cluster to try |
| 394 | num |= kClusterSize - 1; // start at the last block of previous cluster |
| 395 | num -= kClusterSize; |
| 396 | if (key_num - num > max_size) return NULL; |
| 397 | // Having max_size to limit the search is crucial: else |
| 398 | // we have to traverse a lot of empty clusters (or blocks). |
| 399 | // We can avoid needing max_size if we put clusters into |
| 400 | // a search tree, but performance suffers considerably |
| 401 | // if we use this approach by using stl::set. |
| 402 | } |
| 403 | } |
| 404 | |
| 405 | template <class Value> |
| 406 | template <class Type> |
| 407 | inline void AddressMap<Value>::Iterate(void (*callback)(Key, Value*, Type), |
| 408 | Type arg) const { |
| 409 | // We could optimize this by traversing only non-empty clusters and/or blocks |
| 410 | // but it does not speed up heap-checker noticeably. |
| 411 | for (int h = 0; h < kHashSize; ++h) { |
| 412 | for (const Cluster* c = hashtable_[h]; c != NULL; c = c->next) { |
| 413 | for (int b = 0; b < kClusterBlocks; ++b) { |
| 414 | for (Entry* e = c->blocks[b]; e != NULL; e = e->next) { |
| 415 | callback(e->key, &e->value, arg); |
| 416 | } |
| 417 | } |
| 418 | } |
| 419 | } |
| 420 | } |
| 421 | |
| 422 | #endif // BASE_ADDRESSMAP_INL_H_ |