diff options
author | Vizerai <jsking@google.com> | 2017-04-28 21:28:37 -0700 |
---|---|---|
committer | Vizerai <jsking@google.com> | 2017-04-28 21:28:37 -0700 |
commit | 6903d7423808dfdb4011ae8d603402f6def1430a (patch) | |
tree | 68fd54b21e1c82060ca20dfac18df524951ce607 /src | |
parent | 26e85fe0d79f4b9df9fdfde2923c61b87f91088c (diff) |
changed from symbolic links to actual files
Diffstat (limited to 'src')
-rw-r--r--[l---------] | src/core/ext/census/intrusive_hash_map.c | 304 | ||||
-rw-r--r--[l---------] | src/core/ext/census/intrusive_hash_map.h | 170 |
2 files changed, 472 insertions, 2 deletions
diff --git a/src/core/ext/census/intrusive_hash_map.c b/src/core/ext/census/intrusive_hash_map.c index 55490cd9f9..4a747c9137 120000..100644 --- a/src/core/ext/census/intrusive_hash_map.c +++ b/src/core/ext/census/intrusive_hash_map.c @@ -1 +1,303 @@ -/google/src/cloud/jsking/cppTraceImpl/google3/experimental/users/jsking/intrusive_hash_map.c
\ No newline at end of file +/* + * Copyright 2017 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/core/ext/census/intrusive_hash_map.h" +#include <string.h> + +extern bool hm_index_compare(const hm_index *A, const hm_index *B); + +/* Simple hashing function that takes lower 32 bits. */ +static inline uint32_t chunked_vector_hasher(uint64_t key) { + return (uint32_t)key; +} + +/* Vector chunks are 1MiB divided by pointer size. */ +static const size_t VECTOR_CHUNK_SIZE = (1 << 20) / sizeof(void *); + +/* Helper functions which return buckets from the chunked vector. These are + meant for internal use only within the intrusive_hash_map data structure. */ +static inline void **get_mutable_bucket(const chunked_vector *buckets, + uint32_t index) { + if (index < VECTOR_CHUNK_SIZE) { + return &buckets->first_[index]; + } + size_t rest_index = (index - VECTOR_CHUNK_SIZE) / VECTOR_CHUNK_SIZE; + return &buckets->rest_[rest_index][index % VECTOR_CHUNK_SIZE]; +} + +static inline void *get_bucket(const chunked_vector *buckets, uint32_t index) { + if (index < VECTOR_CHUNK_SIZE) { + return buckets->first_[index]; + } + size_t rest_index = (index - VECTOR_CHUNK_SIZE) / VECTOR_CHUNK_SIZE; + return buckets->rest_[rest_index][index % VECTOR_CHUNK_SIZE]; +} + +/* Helper function. */ +static inline size_t RestSize(const chunked_vector *vec) { + return (vec->size_ <= VECTOR_CHUNK_SIZE) + ? 0 + : (vec->size_ - VECTOR_CHUNK_SIZE - 1) / VECTOR_CHUNK_SIZE + 1; +} + +/* Initialize chunked vector to size of 0. */ +static void chunked_vector_init(chunked_vector *vec) { + vec->size_ = 0; + vec->first_ = NULL; + vec->rest_ = NULL; +} + +/* Clear chunked vector and free all memory that has been allocated then + initialize chunked vector. */ +static void chunked_vector_clear(chunked_vector *vec) { + if (vec->first_ != NULL) { + gpr_free(vec->first_); + } + if (vec->rest_ != NULL) { + size_t rest_size = RestSize(vec); + for (uint32_t i = 0; i < rest_size; ++i) { + if (vec->rest_[i] != NULL) { + gpr_free(vec->rest_[i]); + } + } + gpr_free(vec->rest_); + } + chunked_vector_init(vec); +} + +/* Clear chunked vector and then resize it to n entries. Allow the first 1MB to + be read w/o an extra cache miss. The rest of the elements are stored in an + array of arrays to avoid large mallocs. */ +static void chunked_vector_reset(chunked_vector *vec, size_t n) { + chunked_vector_clear(vec); + vec->size_ = n; + if (n <= VECTOR_CHUNK_SIZE) { + vec->first_ = (void **)gpr_malloc(sizeof(void *) * n); + memset(vec->first_, 0, sizeof(void *) * n); + } else { + vec->first_ = (void **)gpr_malloc(sizeof(void *) * VECTOR_CHUNK_SIZE); + memset(vec->first_, 0, sizeof(void *) * VECTOR_CHUNK_SIZE); + size_t rest_size = RestSize(vec); + vec->rest_ = (void ***)gpr_malloc(sizeof(void **) * rest_size); + memset(vec->rest_, 0, sizeof(void **) * rest_size); + int i = 0; + n -= VECTOR_CHUNK_SIZE; + while (n > 0) { + size_t this_size = GPR_MIN(n, VECTOR_CHUNK_SIZE); + vec->rest_[i] = (void **)gpr_malloc(sizeof(void *) * this_size); + memset(vec->rest_[i], 0, sizeof(void *) * this_size); + n -= this_size; + ++i; + } + } +} + +void intrusive_hash_map_init(intrusive_hash_map *hash_map, + uint32_t initial_log2_table_size) { + hash_map->log2_num_buckets = initial_log2_table_size; + hash_map->num_items = 0; + uint32_t num_buckets = (uint32_t)1 << hash_map->log2_num_buckets; + hash_map->extend_threshold = num_buckets >> 1; + chunked_vector_init(&hash_map->buckets); + chunked_vector_reset(&hash_map->buckets, num_buckets); + hash_map->hash_mask = num_buckets - 1; +} + +bool intrusive_hash_map_empty(const intrusive_hash_map *hash_map) { + return hash_map->num_items == 0; +} + +size_t intrusive_hash_map_size(const intrusive_hash_map *hash_map) { + return hash_map->num_items; +} + +void intrusive_hash_map_end(const intrusive_hash_map *hash_map, hm_index *idx) { + idx->bucket_index = (uint32_t)hash_map->buckets.size_; + GPR_ASSERT(idx->bucket_index <= UINT32_MAX); + idx->item = NULL; +} + +void intrusive_hash_map_next(const intrusive_hash_map *hash_map, + hm_index *idx) { + idx->item = idx->item->hash_link; + while (idx->item == NULL) { + idx->bucket_index++; + if (idx->bucket_index >= hash_map->buckets.size_) { + /* Reached end of table. */ + idx->item = NULL; + return; + } + idx->item = (hm_item *)get_bucket(&hash_map->buckets, idx->bucket_index); + } +} + +void intrusive_hash_map_begin(const intrusive_hash_map *hash_map, + hm_index *idx) { + for (uint32_t i = 0; i < hash_map->buckets.size_; ++i) { + if (get_bucket(&hash_map->buckets, i) != NULL) { + idx->bucket_index = i; + idx->item = (hm_item *)get_bucket(&hash_map->buckets, i); + return; + } + } + intrusive_hash_map_end(hash_map, idx); +} + +hm_item *intrusive_hash_map_find(const intrusive_hash_map *hash_map, + uint64_t key) { + uint32_t index = chunked_vector_hasher(key) & hash_map->hash_mask; + + hm_item *p = (hm_item *)get_bucket(&hash_map->buckets, index); + while (p != NULL) { + if (key == p->key) { + return p; + } + p = p->hash_link; + } + return NULL; +} + +hm_item *intrusive_hash_map_erase(intrusive_hash_map *hash_map, uint64_t key) { + uint32_t index = chunked_vector_hasher(key) & hash_map->hash_mask; + + hm_item **slot = (hm_item **)get_mutable_bucket(&hash_map->buckets, index); + hm_item *p = *slot; + if (p == NULL) { + return NULL; + } + + if (key == p->key) { + *slot = p->hash_link; + p->hash_link = NULL; + hash_map->num_items--; + return p; + } + + hm_item *prev = p; + p = p->hash_link; + + while (p) { + if (key == p->key) { + prev->hash_link = p->hash_link; + p->hash_link = NULL; + hash_map->num_items--; + return p; + } + prev = p; + p = p->hash_link; + } + return NULL; +} + +/* Insert an hm_item* into the underlying chunked vector. hash_mask is + * array_size-1. Returns true if it is a new hm_item and false if the hm_item + * already existed. + */ +static inline bool intrusive_hash_map_internal_insert(chunked_vector *buckets, + uint32_t hash_mask, + hm_item *item) { + const uint64_t key = item->key; + uint32_t index = chunked_vector_hasher(key) & hash_mask; + hm_item **slot = (hm_item **)get_mutable_bucket(buckets, index); + hm_item *p = *slot; + item->hash_link = p; + + /* Check to see if key already exists. */ + while (p) { + if (p->key == key) { + return false; + } + p = p->hash_link; + } + + /* Otherwise add new entry. */ + *slot = item; + return true; +} + +/* Extend the allocated number of elements in the hash map by a factor of 2. */ +void intrusive_hash_map_extend(intrusive_hash_map *hash_map) { + uint32_t new_log2_num_buckets = 1 + hash_map->log2_num_buckets; + uint32_t new_num_buckets = (uint32_t)1 << new_log2_num_buckets; + GPR_ASSERT(new_num_buckets <= UINT32_MAX && new_num_buckets > 0); + chunked_vector new_buckets; + chunked_vector_init(&new_buckets); + chunked_vector_reset(&new_buckets, new_num_buckets); + uint32_t new_hash_mask = new_num_buckets - 1; + + hm_index cur_idx; + hm_index end_idx; + intrusive_hash_map_end(hash_map, &end_idx); + intrusive_hash_map_begin(hash_map, &cur_idx); + while (!hm_index_compare(&cur_idx, &end_idx)) { + hm_item *new_item = cur_idx.item; + intrusive_hash_map_next(hash_map, &cur_idx); + intrusive_hash_map_internal_insert(&new_buckets, new_hash_mask, new_item); + } + + /* Set values for new chunked_vector. extend_threshold is set to half of + * new_num_buckets. */ + hash_map->log2_num_buckets = new_log2_num_buckets; + chunked_vector_clear(&hash_map->buckets); + hash_map->buckets = new_buckets; + hash_map->hash_mask = new_hash_mask; + hash_map->extend_threshold = new_num_buckets >> 1; +} + +/* Insert a hm_item. The hm_item must remain live until it is removed from the + table. This object does not take the ownership of hm_item. The caller must + remove this hm_item from the table and delete it before this table is + deleted. If hm_item exists already num_items is not changed. */ +bool intrusive_hash_map_insert(intrusive_hash_map *hash_map, hm_item *item) { + if (hash_map->num_items >= hash_map->extend_threshold) { + intrusive_hash_map_extend(hash_map); + } + if (intrusive_hash_map_internal_insert(&hash_map->buckets, + hash_map->hash_mask, item)) { + hash_map->num_items++; + return true; + } + return false; +} + +void intrusive_hash_map_clear(intrusive_hash_map *hash_map, + void (*free_object)(void *)) { + hm_index cur; + hm_index end; + intrusive_hash_map_end(hash_map, &end); + intrusive_hash_map_begin(hash_map, &cur); + + while (!hm_index_compare(&cur, &end)) { + hm_index next = cur; + intrusive_hash_map_next(hash_map, &next); + if (cur.item != NULL) { + hm_item *item = intrusive_hash_map_erase(hash_map, cur.item->key); + (*free_object)((void *)item); + gpr_free(item); + } + cur = next; + } +} + +void intrusive_hash_map_free(intrusive_hash_map *hash_map, + void (*free_object)(void *)) { + intrusive_hash_map_clear(hash_map, (*free_object)); + hash_map->num_items = 0; + hash_map->extend_threshold = 0; + hash_map->log2_num_buckets = 0; + hash_map->hash_mask = 0; + chunked_vector_clear(&hash_map->buckets); +} diff --git a/src/core/ext/census/intrusive_hash_map.h b/src/core/ext/census/intrusive_hash_map.h index 641ec396dd..900e4368c9 120000..100644 --- a/src/core/ext/census/intrusive_hash_map.h +++ b/src/core/ext/census/intrusive_hash_map.h @@ -1 +1,169 @@ -/google/src/cloud/jsking/cppTraceImpl/google3/experimental/users/jsking/intrusive_hash_map.h
\ No newline at end of file +/* + * Copyright 2017 Google Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef GRPC_CORE_EXT_CENSUS_INTRUSIVE_HASH_MAP_H +#define GRPC_CORE_EXT_CENSUS_INTRUSIVE_HASH_MAP_H + +#include <grpc/support/alloc.h> +#include <grpc/support/log.h> +#include <grpc/support/useful.h> +#include <stdbool.h> + +/* intrusive_hash_map is a fast chained hash table. It is almost always faster + * than STL hash_map, since this hash map avoids malloc and free during insert + * and erase. This hash map is faster than a dense hash map when the application + * calls insert and erase more often than find. When the workload is dominated + * by find() a dense hash map may be faster. + * + * intrusive_hash_map uses an intrusive header placed within a user defined + * struct. IHM_key MUST be set to a valid value before insertion into the hash + * map or undefined behavior may occur. IHM_hash_link needs to be set to NULL + * initially. + * + * EXAMPLE USAGE: + * + * typedef struct string_item { + * INTRUSIVE_HASH_MAP_HEADER; + * // User data. + * char *str_buf; + * uint16_t len; + * } string_item; + * + * static string_item *make_string_item(uint64_t key, const char *buf, + * uint16_t len) { + * string_item *item = (string_item *)gpr_malloc(sizeof(string_item)); + * item->IHM_key = key; + * item->IHM_hash_link = NULL; + * item->len = len; + * item->str_buf = (char *)malloc(len); + * memcpy(item->str_buf, buf, len); + * return item; + * } + * + * string_item *new_item1 = make_string_item(10, "test1", 5); + * bool ok = intrusive_hash_map_insert(&hash_map, (hm_item *)new_item1); + * + * string_item *item1 = + * (string_item *)intrusive_hash_map_find(&hash_map, 10); + */ + +/* Hash map item. Stores key and a pointer to the actual object. A user defined + * version of this can be passed in as long as the first 2 entries (key and + * hash_link) are the same. Pointer to struct will need to be cast as + * (hm_item *) when passed to hash map. This allows it to be intrusive. */ +typedef struct hm_item { + uint64_t key; + struct hm_item *hash_link; + /* Optional user defined data after this. */ +} hm_item; + +/* Macro provided for ease of use. This must be first in the user defined + * struct. */ +#define INTRUSIVE_HASH_MAP_HEADER \ + uint64_t IHM_key; \ + struct hm_item *IHM_hash_link + +/* The chunked vector is a data structure that allocates buckets for use in the + * hash map. ChunkedVector is logically equivalent to T*[N] (cast void* as + * T*). It's internally implemented as an array of 1MB arrays to avoid + * allocating large consecutive memory chunks. This is an internal data + * structure that should never be accessed directly. */ +typedef struct chunked_vector { + size_t size_; + void **first_; + void ***rest_; +} chunked_vector; + +/* Core intrusive hash map data structure. All internal elements are managed by + * functions and should not be altered manually. intrusive_hash_map_init() + * must first be called before an intrusive_hash_map can be used. */ +typedef struct intrusive_hash_map { + uint32_t num_items; + uint32_t extend_threshold; + uint32_t log2_num_buckets; + uint32_t hash_mask; + chunked_vector buckets; +} intrusive_hash_map; + +/* Index struct which acts as a pseudo-iterator within the hash map. */ +typedef struct hm_index { + uint32_t bucket_index; // hash map bucket index. + hm_item *item; // Pointer to hm_item within the hash map. +} hm_index; + +/* Returns true if two hm_indices point to the same object within the hash map + * and false otherwise. */ +inline bool hm_index_compare(const hm_index *A, const hm_index *B) { + return (A->item == B->item && A->bucket_index == B->bucket_index); +} + +/* Helper functions for iterating over the hash map. */ +/* On return idx will contain an invalid index which is always equal to + * hash_map->buckets.size_ */ +void intrusive_hash_map_end(const intrusive_hash_map *hash_map, hm_index *idx); + +/* Iterates index to the next valid entry in the hash map and stores the + * index within idx. If end of table is reached, idx will contain the same + * values as if intrusive_hash_map_end() was called. */ +void intrusive_hash_map_next(const intrusive_hash_map *hash_map, hm_index *idx); + +/* On return, idx will contain the index of the first non-null entry in the hash + * map. If the hash map is empty, idx will contain the same values as if + * intrusive_hash_map_end() was called. */ +void intrusive_hash_map_begin(const intrusive_hash_map *hash_map, + hm_index *idx); + +/* Initialize intrusive hash map data structure. This must be called before + * the hash map can be used. The initial size of an intrusive hash map will be + * 2^initial_log2_map_size (valid range is [0, 31]). */ +void intrusive_hash_map_init(intrusive_hash_map *hash_map, + uint32_t initial_log2_map_size); + +/* Returns true if the hash map is empty and false otherwise. */ +bool intrusive_hash_map_empty(const intrusive_hash_map *hash_map); + +/* Returns the number of elements currently in the hash map. */ +size_t intrusive_hash_map_size(const intrusive_hash_map *hash_map); + +/* Find a hm_item within the hash map by key. Returns NULL if item was not + * found. */ +hm_item *intrusive_hash_map_find(const intrusive_hash_map *hash_map, + uint64_t key); + +/* Erase the hm_item that corresponds with key. If the hm_item is found, return + * the pointer to the hm_item. Else returns NULL. */ +hm_item *intrusive_hash_map_erase(intrusive_hash_map *hash_map, uint64_t key); + +/* Attempts to insert a new hm_item into the hash map. If an element with the + * same key already exists, it will not insert the new item and return false. + * Otherwise, it will insert the new item and return true. */ +bool intrusive_hash_map_insert(intrusive_hash_map *hash_map, hm_item *item); + +/* Clear entire contents of the hash map, but leaves internal data structure + * untouched. Second argument takes a function pointer to a method that will + * free the object designated by the user and pointed to by hash_map->value. */ +void intrusive_hash_map_clear(intrusive_hash_map *hash_map, + void (*free_object)(void *)); + +/* Erase all contents of hash map and free the memory. Hash map is invalid + * after calling this function and cannot be used until it has been + * reinitialized (intrusive_hash_map_init()). takes a function pointer to a + * method that will free the object designated by the user and pointed to by + * hash_map->value.*/ +void intrusive_hash_map_free(intrusive_hash_map *hash_map, + void (*free_object)(void *)); + +#endif |