diff options
author | 2015-02-25 16:45:20 +0100 | |
---|---|---|
committer | 2015-02-25 16:45:20 +0100 | |
commit | d08b27fa9701fecfdb69e1b0d1ac2459efc2129b (patch) | |
tree | 5d50963026239ca5aebfb47ea5b8db7e814e57c8 /src/main/java/com/google/devtools/build/lib/collect/CompactHashSet.java |
Update from Google.
--
MOE_MIGRATED_REVID=85702957
Diffstat (limited to 'src/main/java/com/google/devtools/build/lib/collect/CompactHashSet.java')
-rw-r--r-- | src/main/java/com/google/devtools/build/lib/collect/CompactHashSet.java | 604 |
1 files changed, 604 insertions, 0 deletions
diff --git a/src/main/java/com/google/devtools/build/lib/collect/CompactHashSet.java b/src/main/java/com/google/devtools/build/lib/collect/CompactHashSet.java new file mode 100644 index 0000000000..5ee24175a4 --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/collect/CompactHashSet.java @@ -0,0 +1,604 @@ +// Copyright 2014 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +/* + * Copyright (C) 2012 The Guava Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.google.devtools.build.lib.collect; + +import com.google.common.base.Preconditions; +import com.google.common.primitives.Ints; + +import java.io.IOException; +import java.io.InvalidObjectException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.io.Serializable; +import java.lang.reflect.Array; +import java.util.AbstractSet; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.ConcurrentModificationException; +import java.util.Iterator; +import java.util.NoSuchElementException; +import java.util.Objects; + +import javax.annotation.Nullable; + +/** + * CompactHashSet is an implementation of a Set. All optional operations (adding and + * removing) are supported. The elements can be any objects. + * + * <p>{@code contains(x)}, {@code add(x)} and {@code remove(x)}, are all (expected and amortized) + * constant time operations. Expected in the hashtable sense (depends on the hash function + * doing a good job of distributing the elements to the buckets to a distribution not far from + * uniform), and amortized since some operations can trigger a hash table resize. + * + * <p>Unlike {@code java.util.HashSet}, iteration is only proportional to the actual + * {@code size()}, which is optimal, and <i>not</i> the size of the internal hashtable, + * which could be much larger than {@code size()}. Furthermore, this structure only depends + * on a fixed number of arrays; {@code add(x)} operations <i>do not</i> create objects + * for the garbage collector to deal with, and for every element added, the garbage collector + * will have to traverse {@code 1.5} references on average, in the marking phase, not {@code 5.0} + * as in {@code java.util.HashSet}. + * + * <p>If there are no removals, then {@link #iterator iteration} order is the same as insertion + * order. Any removal invalidates any ordering guarantees. + */ +// TODO(bazel-team): This was branched of an internal version of guava. If the class is released, we +// should remove this again. +public class CompactHashSet<E> extends AbstractSet<E> implements Serializable { + // TODO(bazel-team): cache all field accesses in local vars + + // A partial copy of com.google.common.collect.Hashing. + private static final int C1 = 0xcc9e2d51; + private static final int C2 = 0x1b873593; + + /* + * This method was rewritten in Java from an intermediate step of the Murmur hash function in + * http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp, which contained the + * following header: + * + * MurmurHash3 was written by Austin Appleby, and is placed in the public domain. The author + * hereby disclaims copyright to this source code. + */ + private static int smear(int hashCode) { + return C2 * Integer.rotateLeft(hashCode * C1, 15); + } + + private static int smearedHash(@Nullable Object o) { + return smear((o == null) ? 0 : o.hashCode()); + } + + private static final int MAX_TABLE_SIZE = Ints.MAX_POWER_OF_TWO; + + private static int closedTableSize(int expectedEntries, double loadFactor) { + // Get the recommended table size. + // Round down to the nearest power of 2. + expectedEntries = Math.max(expectedEntries, 2); + int tableSize = Integer.highestOneBit(expectedEntries); + // Check to make sure that we will not exceed the maximum load factor. + if (expectedEntries > (int) (loadFactor * tableSize)) { + tableSize <<= 1; + return (tableSize > 0) ? tableSize : MAX_TABLE_SIZE; + } + return tableSize; + } + + /** + * Creates an empty {@code CompactHashSet} instance. + */ + public static <E> CompactHashSet<E> create() { + return new CompactHashSet<E>(); + } + + /** + * Creates a <i>mutable</i> {@code CompactHashSet} instance containing the elements + * of the given collection in unspecified order. + * + * @param collection the elements that the set should contain + * @return a new {@code CompactHashSet} containing those elements (minus duplicates) + */ + public static <E> CompactHashSet<E> create(Collection<? extends E> collection) { + CompactHashSet<E> set = createWithExpectedSize(collection.size()); + set.addAll(collection); + return set; + } + + /** + * Creates a <i>mutable</i> {@code CompactHashSet} instance containing the given + * elements in unspecified order. + * + * @param elements the elements that the set should contain + * @return a new {@code CompactHashSet} containing those elements (minus duplicates) + */ + @SafeVarargs + public static <E> CompactHashSet<E> create(E... elements) { + CompactHashSet<E> set = createWithExpectedSize(elements.length); + Collections.addAll(set, elements); + return set; + } + + /** + * Creates a {@code CompactHashSet} instance, with a high enough "initial capacity" + * that it <i>should</i> hold {@code expectedSize} elements without growth. + * + * @param expectedSize the number of elements you expect to add to the returned set + * @return a new, empty {@code CompactHashSet} with enough capacity to hold {@code + * expectedSize} elements without resizing + * @throws IllegalArgumentException if {@code expectedSize} is negative + */ + public static <E> CompactHashSet<E> createWithExpectedSize(int expectedSize) { + return new CompactHashSet<E>(expectedSize); + } + + private static final int MAXIMUM_CAPACITY = 1 << 30; + + // TODO(bazel-team): decide, and inline, load factor. 0.75? + private static final float DEFAULT_LOAD_FACTOR = 1.0f; + + /** + * Bitmask that selects the low 32 bits. + */ + private static final long NEXT_MASK = (1L << 32) - 1; + + /** + * Bitmask that selects the high 32 bits. + */ + private static final long HASH_MASK = ~NEXT_MASK; + + // TODO(bazel-team): decide default size + private static final int DEFAULT_SIZE = 3; + + static final int UNSET = -1; + + /** + * The hashtable. Its values are indexes to both the elements and entries arrays. + * + * Currently, the UNSET value means "null pointer", and any non negative value x is + * the actual index. + * + * Its size must be a power of two. + */ + private transient int[] table; + + /** + * Contains the logical entries, in the range of [0, size()). The high 32 bits of each + * long is the smeared hash of the element, whereas the low 32 bits is the "next" pointer + * (pointing to the next entry in the bucket chain). The pointers in [size(), entries.length) + * are all "null" (UNSET). + */ + private transient long[] entries; + + /** + * The elements contained in the set, in the range of [0, size()). + */ + transient Object[] elements; + + /** + * The load factor. + */ + transient float loadFactor; + + /** + * Keeps track of modifications of this set, to make it possible to throw + * ConcurrentModificationException in the iterator. Note that we choose not to + * make this volatile, so we do less of a "best effort" to track such errors, + * for better performance. + */ + transient int modCount; + + /** + * When we have this many elements, resize the hashtable. + */ + private transient int threshold; + + /** + * The number of elements contained in the set. + */ + private transient int size; + + /** + * Constructs a new empty instance of {@code CompactHashSet}. + */ + CompactHashSet() { + init(DEFAULT_SIZE, DEFAULT_LOAD_FACTOR); + } + + /** + * Constructs a new instance of {@code CompactHashSet} with the specified capacity. + * + * @param expectedSize the initial capacity of this {@code CompactHashSet}. + */ + CompactHashSet(int expectedSize) { + init(expectedSize, DEFAULT_LOAD_FACTOR); + } + + /** + * Pseudoconstructor for serialization support. + */ + void init(int expectedSize, float loadFactor) { + Preconditions.checkArgument(expectedSize >= 0, "Initial capacity must be non-negative"); + Preconditions.checkArgument(loadFactor > 0, "Illegal load factor"); + int buckets = closedTableSize(expectedSize, loadFactor); + this.table = newTable(buckets); + this.loadFactor = loadFactor; + this.elements = new Object[expectedSize]; + this.entries = newEntries(expectedSize); + this.threshold = Math.max(1, (int) (buckets * loadFactor)); + } + + private static int[] newTable(int size) { + int[] array = new int[size]; + Arrays.fill(array, UNSET); + return array; + } + + private static long[] newEntries(int size) { + long[] array = new long[size]; + Arrays.fill(array, UNSET); + return array; + } + + private static int getHash(long entry) { + return (int) (entry >>> 32); + } + + /** + * Returns the index, or UNSET if the pointer is "null" + */ + private static int getNext(long entry) { + return (int) entry; + } + + /** + * Returns a new entry value by changing the "next" index of an existing entry + */ + private static long swapNext(long entry, int newNext) { + return (HASH_MASK & entry) | (NEXT_MASK & newNext); + } + + private int hashTableMask() { + return table.length - 1; + } + + @Override + public boolean add(@Nullable E object) { + long[] entries = this.entries; + Object[] elements = this.elements; + int hash = smearedHash(object); + int tableIndex = hash & hashTableMask(); + int newEntryIndex = this.size; // current size, and pointer to the entry to be appended + int next = table[tableIndex]; + if (next == UNSET) { // uninitialized bucket + table[tableIndex] = newEntryIndex; + } else { + int last; + long entry; + do { + last = next; + entry = entries[next]; + if (getHash(entry) == hash && Objects.equals(object, elements[next])) { + return false; + } + next = getNext(entry); + } while (next != UNSET); + entries[last] = swapNext(entry, newEntryIndex); + } + if (newEntryIndex == Integer.MAX_VALUE) { + throw new IllegalStateException("Cannot contain more than Integer.MAX_VALUE elements!"); + } + int newSize = newEntryIndex + 1; + resizeMeMaybe(newSize); + insertEntry(newEntryIndex, object, hash); + this.size = newSize; + if (newEntryIndex >= threshold) { + resizeTable(2 * table.length); + } + modCount++; + return true; + } + + /** + * Creates a fresh entry with the specified object at the specified position in the entry + * arrays. + */ + void insertEntry(int entryIndex, E object, int hash) { + this.entries[entryIndex] = ((long) hash << 32) | (NEXT_MASK & UNSET); + this.elements[entryIndex] = object; + } + + /** + * Returns currentSize + 1, after resizing the entries storage if necessary. + */ + private void resizeMeMaybe(int newSize) { + int entriesSize = entries.length; + if (newSize > entriesSize) { + int newCapacity = entriesSize + Math.max(1, entriesSize >>> 1); + if (newCapacity < 0) { + newCapacity = Integer.MAX_VALUE; + } + if (newCapacity != entriesSize) { + resizeEntries(newCapacity); + } + } + } + + /** + * Resizes the internal entries array to the specified capacity, which may be greater or less + * than the current capacity. + */ + void resizeEntries(int newCapacity) { + this.elements = Arrays.copyOf(elements, newCapacity); + long[] entries = this.entries; + int oldSize = entries.length; + entries = Arrays.copyOf(entries, newCapacity); + if (newCapacity > oldSize) { + Arrays.fill(entries, oldSize, newCapacity, UNSET); + } + this.entries = entries; + } + + private void resizeTable(int newCapacity) { // newCapacity always a power of two + int[] oldTable = table; + int oldCapacity = oldTable.length; + if (oldCapacity >= MAXIMUM_CAPACITY) { + threshold = Integer.MAX_VALUE; + return; + } + int newThreshold = 1 + (int) (newCapacity * loadFactor); + int[] newTable = newTable(newCapacity); + long[] entries = this.entries; + + int mask = newTable.length - 1; + for (int i = 0; i < size; i++) { + long oldEntry = entries[i]; + int hash = getHash(oldEntry); + int tableIndex = hash & mask; + int next = newTable[tableIndex]; + newTable[tableIndex] = i; + entries[i] = ((long) hash << 32) | (NEXT_MASK & next); + } + + this.threshold = newThreshold; + this.table = newTable; + } + + @Override + public boolean contains(@Nullable Object object) { + int hash = smearedHash(object); + int next = table[hash & hashTableMask()]; + while (next != UNSET) { + long entry = entries[next]; + if (getHash(entry) == hash && Objects.equals(object, elements[next])) { + return true; + } + next = getNext(entry); + } + return false; + } + + @Override + public boolean remove(@Nullable Object object) { + return remove(object, smearedHash(object)); + } + + private boolean remove(Object object, int hash) { + int tableIndex = hash & hashTableMask(); + int next = table[tableIndex]; + if (next == UNSET) { + return false; + } + int last = UNSET; + do { + if (getHash(entries[next]) == hash && Objects.equals(object, elements[next])) { + if (last == UNSET) { + // we need to update the root link from table[] + table[tableIndex] = getNext(entries[next]); + } else { + // we need to update the link from the chain + entries[last] = swapNext(entries[last], getNext(entries[next])); + } + + moveEntry(next); + size--; + modCount++; + return true; + } + last = next; + next = getNext(entries[next]); + } while (next != UNSET); + return false; + } + + /** + * Moves the last entry in the entry array into {@code dstIndex}, and nulls out its old position. + */ + void moveEntry(int dstIndex) { + int srcIndex = size() - 1; + if (dstIndex < srcIndex) { + // move last entry to deleted spot + elements[dstIndex] = elements[srcIndex]; + elements[srcIndex] = null; + + // move the last entry to the removed spot, just like we moved the element + long lastEntry = entries[srcIndex]; + entries[dstIndex] = lastEntry; + entries[srcIndex] = UNSET; + + // also need to update whoever's "next" pointer was pointing to the last entry place + // reusing "tableIndex" and "next"; these variables were no longer needed + int tableIndex = getHash(lastEntry) & hashTableMask(); + int lastNext = table[tableIndex]; + if (lastNext == srcIndex) { + // we need to update the root pointer + table[tableIndex] = dstIndex; + } else { + // we need to update a pointer in an entry + int previous; + long entry; + do { + previous = lastNext; + lastNext = getNext(entry = entries[lastNext]); + } while (lastNext != srcIndex); + // here, entries[previous] points to the old entry location; update it + entries[previous] = swapNext(entry, dstIndex); + } + } else { + elements[dstIndex] = null; + entries[dstIndex] = UNSET; + } + } + + @Override + public Iterator<E> iterator() { + return new Iterator<E>() { + int expectedModCount = modCount; + boolean nextCalled = false; + int index = 0; + + @Override + public boolean hasNext() { + return index < size; + } + + @Override + @SuppressWarnings("unchecked") + public E next() { + checkForConcurrentModification(); + if (!hasNext()) { + throw new NoSuchElementException(); + } + nextCalled = true; + return (E) elements[index++]; + } + + @Override + public void remove() { + checkForConcurrentModification(); + Preconditions.checkState(nextCalled, "no calls to next() since the last call to remove()"); + expectedModCount++; + index--; + CompactHashSet.this.remove(elements[index], getHash(entries[index])); + nextCalled = false; + } + + private void checkForConcurrentModification() { + if (modCount != expectedModCount) { + throw new ConcurrentModificationException(); + } + } + }; + } + + @Override + public int size() { + return size; + } + + @Override + public boolean isEmpty() { + return size == 0; + } + + @Override + public Object[] toArray() { + return Arrays.copyOf(elements, size); + } + + @SuppressWarnings("unchecked") + @Override + public <T> T[] toArray(T[] a) { + if (a.length < size) { + a = (T[]) Array.newInstance(a.getClass().getComponentType(), size); + } + System.arraycopy(elements, 0, a, 0, size); + return a; + } + + /** + * Ensures that this {@code CompactHashSet} has the smallest representation in memory, + * given its current size. + */ + public void trimToSize() { + int size = this.size; + if (size < entries.length) { + resizeEntries(size); + } + // size / loadFactor gives the table size of the appropriate load factor, + // but that may not be a power of two. We floor it to a power of two by + // keeping its highest bit. But the smaller table may have a load factor + // larger than what we want; then we want to go to the next power of 2 if we can + int minimumTableSize = Math.max(1, Integer.highestOneBit((int) (size / loadFactor))); + if (minimumTableSize < MAXIMUM_CAPACITY) { + double load = (double) size / minimumTableSize; + if (load > loadFactor) { + minimumTableSize <<= 1; // increase to next power if possible + } + } + + if (minimumTableSize < table.length) { + resizeTable(minimumTableSize); + } + } + + @Override + public void clear() { + modCount++; + Arrays.fill(elements, 0, size, null); + Arrays.fill(table, UNSET); + Arrays.fill(entries, UNSET); + this.size = 0; + } + + private void writeObject(ObjectOutputStream stream) throws IOException { + stream.defaultWriteObject(); + stream.writeInt(table.length); + stream.writeFloat(loadFactor); + stream.writeInt(size); + for (E e : this) { + stream.writeObject(e); + } + } + + @SuppressWarnings("unchecked") + private void readObject(ObjectInputStream stream) throws IOException, ClassNotFoundException { + stream.defaultReadObject(); + int length = stream.readInt(); + float loadFactor = stream.readFloat(); + int elementCount = stream.readInt(); + try { + init(length, loadFactor); + } catch (IllegalArgumentException e) { + throw new InvalidObjectException(e.getMessage()); + } + for (int i = elementCount; --i >= 0;) { + E element = (E) stream.readObject(); + add(element); + } + } +} |