Update from Google.

-- MOE_MIGRATED_REVID=85702957
author: Han-Wen Nienhuys <hanwen@google.com> 2015-02-25 16:45:20 +0100
committer: Han-Wen Nienhuys <hanwen@google.com> 2015-02-25 16:45:20 +0100
commit: d08b27fa9701fecfdb69e1b0d1ac2459efc2129b (patch)
tree: 5d50963026239ca5aebfb47ea5b8db7e814e57c8 /src/main/java/com/google/devtools/build/lib/collect/CompactHashSet.java
1 files changed, 604 insertions, 0 deletions
diff --git a/src/main/java/com/google/devtools/build/lib/collect/CompactHashSet.java b/src/main/java/com/google/devtools/build/lib/collect/CompactHashSet.java
new file mode 100644
index 0000000000..5ee24175a4
--- /dev/null
+++ b/src/main/java/com/google/devtools/build/lib/collect/CompactHashSet.java
@@ -0,0 +1,604 @@
+// Copyright 2014 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+/*
+ * Copyright (C) 2012 The Guava Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.devtools.build.lib.collect;
+
+import com.google.common.base.Preconditions;
+import com.google.common.primitives.Ints;
+
+import java.io.IOException;
+import java.io.InvalidObjectException;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.io.Serializable;
+import java.lang.reflect.Array;
+import java.util.AbstractSet;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.ConcurrentModificationException;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+import java.util.Objects;
+
+import javax.annotation.Nullable;
+
+/**
+ * CompactHashSet is an implementation of a Set. All optional operations (adding and
+ * removing) are supported. The elements can be any objects.
+ *
+ * <p>{@code contains(x)}, {@code add(x)} and {@code remove(x)}, are all (expected and amortized)
+ * constant time operations. Expected in the hashtable sense (depends on the hash function
+ * doing a good job of distributing the elements to the buckets to a distribution not far from
+ * uniform), and amortized since some operations can trigger a hash table resize.
+ *
+ * <p>Unlike {@code java.util.HashSet}, iteration is only proportional to the actual
+ * {@code size()}, which is optimal, and <i>not</i> the size of the internal hashtable,
+ * which could be much larger than {@code size()}. Furthermore, this structure only depends
+ * on a fixed number of arrays; {@code add(x)} operations <i>do not</i> create objects
+ * for the garbage collector to deal with, and for every element added, the garbage collector
+ * will have to traverse {@code 1.5} references on average, in the marking phase, not {@code 5.0}
+ * as in {@code java.util.HashSet}.
+ *
+ * <p>If there are no removals, then {@link #iterator iteration} order is the same as insertion
+ * order. Any removal invalidates any ordering guarantees.
+ */
+// TODO(bazel-team): This was branched of an internal version of guava. If the class is released, we
+// should remove this again.
+public class CompactHashSet<E> extends AbstractSet<E> implements Serializable {
+  // TODO(bazel-team): cache all field accesses in local vars
+
+  // A partial copy of com.google.common.collect.Hashing.
+  private static final int C1 = 0xcc9e2d51;
+  private static final int C2 = 0x1b873593;
+
+  /*
+   * This method was rewritten in Java from an intermediate step of the Murmur hash function in
+   * http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp, which contained the
+   * following header:
+   *
+   * MurmurHash3 was written by Austin Appleby, and is placed in the public domain. The author
+   * hereby disclaims copyright to this source code.
+   */
+  private static int smear(int hashCode) {
+    return C2 * Integer.rotateLeft(hashCode * C1, 15);
+  }
+
+  private static int smearedHash(@Nullable Object o) {
+    return smear((o == null) ? 0 : o.hashCode());
+  }
+
+  private static final int MAX_TABLE_SIZE = Ints.MAX_POWER_OF_TWO;
+
+  private static int closedTableSize(int expectedEntries, double loadFactor) {
+    // Get the recommended table size.
+    // Round down to the nearest power of 2.
+    expectedEntries = Math.max(expectedEntries, 2);
+    int tableSize = Integer.highestOneBit(expectedEntries);
+    // Check to make sure that we will not exceed the maximum load factor.
+    if (expectedEntries > (int) (loadFactor * tableSize)) {
+      tableSize <<= 1;
+      return (tableSize > 0) ? tableSize : MAX_TABLE_SIZE;
+    }
+    return tableSize;
+  }
+
+  /**
+   * Creates an empty {@code CompactHashSet} instance.
+   */
+  public static <E> CompactHashSet<E> create() {
+    return new CompactHashSet<E>();
+  }
+
+  /**
+   * Creates a <i>mutable</i> {@code CompactHashSet} instance containing the elements
+   * of the given collection in unspecified order.
+   *
+   * @param collection the elements that the set should contain
+   * @return a new {@code CompactHashSet} containing those elements (minus duplicates)
+   */
+  public static <E> CompactHashSet<E> create(Collection<? extends E> collection) {
+    CompactHashSet<E> set = createWithExpectedSize(collection.size());
+    set.addAll(collection);
+    return set;
+  }
+
+  /**
+   * Creates a <i>mutable</i> {@code CompactHashSet} instance containing the given
+   * elements in unspecified order.
+   *
+   * @param elements the elements that the set should contain
+   * @return a new {@code CompactHashSet} containing those elements (minus duplicates)
+   */
+  @SafeVarargs
+  public static <E> CompactHashSet<E> create(E... elements) {
+    CompactHashSet<E> set = createWithExpectedSize(elements.length);
+    Collections.addAll(set, elements);
+    return set;
+  }
+
+  /**
+   * Creates a {@code CompactHashSet} instance, with a high enough "initial capacity"
+   * that it <i>should</i> hold {@code expectedSize} elements without growth.
+   *
+   * @param expectedSize the number of elements you expect to add to the returned set
+   * @return a new, empty {@code CompactHashSet} with enough capacity to hold {@code
+   *         expectedSize} elements without resizing
+   * @throws IllegalArgumentException if {@code expectedSize} is negative
+   */
+  public static <E> CompactHashSet<E> createWithExpectedSize(int expectedSize) {
+    return new CompactHashSet<E>(expectedSize);
+  }
+
+  private static final int MAXIMUM_CAPACITY = 1 << 30;
+
+  // TODO(bazel-team): decide, and inline, load factor. 0.75?
+  private static final float DEFAULT_LOAD_FACTOR = 1.0f;
+
+  /**
+   * Bitmask that selects the low 32 bits.
+   */
+  private static final long NEXT_MASK  = (1L << 32) - 1;
+
+  /**
+   * Bitmask that selects the high 32 bits.
+   */
+  private static final long HASH_MASK = ~NEXT_MASK;
+
+  // TODO(bazel-team): decide default size
+  private static final int DEFAULT_SIZE = 3;
+
+  static final int UNSET = -1;
+
+  /**
+   * The hashtable. Its values are indexes to both the elements and entries arrays.
+   *
+   * Currently, the UNSET value means "null pointer", and any non negative value x is
+   * the actual index.
+   *
+   * Its size must be a power of two.
+   */
+  private transient int[] table;
+
+  /**
+   * Contains the logical entries, in the range of [0, size()). The high 32 bits of each
+   * long is the smeared hash of the element, whereas the low 32 bits is the "next" pointer
+   * (pointing to the next entry in the bucket chain). The pointers in [size(), entries.length)
+   * are all "null" (UNSET).
+   */
+  private transient long[] entries;
+
+  /**
+   * The elements contained in the set, in the range of [0, size()).
+   */
+  transient Object[] elements;
+
+  /**
+   * The load factor.
+   */
+  transient float loadFactor;
+
+  /**
+   * Keeps track of modifications of this set, to make it possible to throw
+   * ConcurrentModificationException in the iterator. Note that we choose not to
+   * make this volatile, so we do less of a "best effort" to track such errors,
+   * for better performance.
+   */
+  transient int modCount;
+
+  /**
+   * When we have this many elements, resize the hashtable.
+   */
+  private transient int threshold;
+
+  /**
+   * The number of elements contained in the set.
+   */
+  private transient int size;
+
+  /**
+   * Constructs a new empty instance of {@code CompactHashSet}.
+   */
+  CompactHashSet() {
+    init(DEFAULT_SIZE, DEFAULT_LOAD_FACTOR);
+  }
+
+  /**
+   * Constructs a new instance of {@code CompactHashSet} with the specified capacity.
+   *
+   * @param expectedSize the initial capacity of this {@code CompactHashSet}.
+   */
+  CompactHashSet(int expectedSize) {
+    init(expectedSize, DEFAULT_LOAD_FACTOR);
+  }
+
+  /**
+   * Pseudoconstructor for serialization support.
+   */
+  void init(int expectedSize, float loadFactor) {
+    Preconditions.checkArgument(expectedSize >= 0, "Initial capacity must be non-negative");
+    Preconditions.checkArgument(loadFactor > 0, "Illegal load factor");
+    int buckets = closedTableSize(expectedSize, loadFactor);
+    this.table = newTable(buckets);
+    this.loadFactor = loadFactor;
+    this.elements = new Object[expectedSize];
+    this.entries = newEntries(expectedSize);
+    this.threshold = Math.max(1, (int) (buckets * loadFactor));
+  }
+
+  private static int[] newTable(int size) {
+    int[] array = new int[size];
+    Arrays.fill(array, UNSET);
+    return array;
+  }
+
+  private static long[] newEntries(int size) {
+    long[] array = new long[size];
+    Arrays.fill(array, UNSET);
+    return array;
+  }
+
+  private static int getHash(long entry) {
+    return (int) (entry >>> 32);
+  }
+
+  /**
+   * Returns the index, or UNSET if the pointer is "null"
+   */
+  private static int getNext(long entry) {
+    return (int) entry;
+  }
+
+  /**
+   * Returns a new entry value by changing the "next" index of an existing entry
+   */
+  private static long swapNext(long entry, int newNext) {
+    return (HASH_MASK & entry) | (NEXT_MASK & newNext);
+  }
+
+  private int hashTableMask() {
+    return table.length - 1;
+  }
+
+  @Override
+  public boolean add(@Nullable E object) {
+    long[] entries = this.entries;
+    Object[] elements = this.elements;
+    int hash = smearedHash(object);
+    int tableIndex = hash & hashTableMask();
+    int newEntryIndex = this.size; // current size, and pointer to the entry to be appended
+    int next = table[tableIndex];
+    if (next == UNSET) { // uninitialized bucket
+      table[tableIndex] = newEntryIndex;
+    } else {
+      int last;
+      long entry;
+      do {
+        last = next;
+        entry = entries[next];
+        if (getHash(entry) == hash && Objects.equals(object, elements[next])) {
+          return false;
+        }
+        next = getNext(entry);
+      } while (next != UNSET);
+      entries[last] = swapNext(entry, newEntryIndex);
+    }
+    if (newEntryIndex == Integer.MAX_VALUE) {
+      throw new IllegalStateException("Cannot contain more than Integer.MAX_VALUE elements!");
+    }
+    int newSize = newEntryIndex + 1;
+    resizeMeMaybe(newSize);
+    insertEntry(newEntryIndex, object, hash);
+    this.size = newSize;
+    if (newEntryIndex >= threshold) {
+      resizeTable(2 * table.length);
+    }
+    modCount++;
+    return true;
+  }
+
+  /**
+   * Creates a fresh entry with the specified object at the specified position in the entry
+   * arrays.
+   */
+  void insertEntry(int entryIndex, E object, int hash) {
+    this.entries[entryIndex] = ((long) hash << 32) | (NEXT_MASK & UNSET);
+    this.elements[entryIndex] = object;
+  }
+
+  /**
+   * Returns currentSize + 1, after resizing the entries storage if necessary.
+   */
+  private void resizeMeMaybe(int newSize) {
+    int entriesSize = entries.length;
+    if (newSize > entriesSize) {
+      int newCapacity = entriesSize + Math.max(1, entriesSize >>> 1);
+      if (newCapacity < 0) {
+        newCapacity = Integer.MAX_VALUE;
+      }
+      if (newCapacity != entriesSize) {
+        resizeEntries(newCapacity);
+      }
+    }
+  }
+
+  /**
+   * Resizes the internal entries array to the specified capacity, which may be greater or less
+   * than the current capacity.
+   */
+  void resizeEntries(int newCapacity) {
+    this.elements = Arrays.copyOf(elements, newCapacity);
+    long[] entries = this.entries;
+    int oldSize = entries.length;
+    entries = Arrays.copyOf(entries, newCapacity);
+    if (newCapacity > oldSize) {
+      Arrays.fill(entries, oldSize, newCapacity, UNSET);
+    }
+    this.entries = entries;
+  }
+
+  private void resizeTable(int newCapacity) { // newCapacity always a power of two
+    int[] oldTable = table;
+    int oldCapacity = oldTable.length;
+    if (oldCapacity >= MAXIMUM_CAPACITY) {
+      threshold = Integer.MAX_VALUE;
+      return;
+    }
+    int newThreshold = 1 + (int) (newCapacity * loadFactor);
+    int[] newTable = newTable(newCapacity);
+    long[] entries = this.entries;
+
+    int mask = newTable.length - 1;
+    for (int i = 0; i < size; i++) {
+      long oldEntry = entries[i];
+      int hash = getHash(oldEntry);
+      int tableIndex = hash & mask;
+      int next = newTable[tableIndex];
+      newTable[tableIndex] = i;
+      entries[i] = ((long) hash << 32) | (NEXT_MASK & next);
+    }
+
+    this.threshold = newThreshold;
+    this.table = newTable;
+  }
+
+  @Override
+  public boolean contains(@Nullable Object object) {
+    int hash = smearedHash(object);
+    int next = table[hash & hashTableMask()];
+    while (next != UNSET) {
+      long entry = entries[next];
+      if (getHash(entry) == hash && Objects.equals(object, elements[next])) {
+        return true;
+      }
+      next = getNext(entry);
+    }
+    return false;
+  }
+
+  @Override
+  public boolean remove(@Nullable Object object) {
+    return remove(object, smearedHash(object));
+  }
+
+  private boolean remove(Object object, int hash) {
+    int tableIndex = hash & hashTableMask();
+    int next = table[tableIndex];
+    if (next == UNSET) {
+      return false;
+    }
+    int last = UNSET;
+    do {
+      if (getHash(entries[next]) == hash && Objects.equals(object, elements[next])) {
+        if (last == UNSET) {
+          // we need to update the root link from table[]
+          table[tableIndex] = getNext(entries[next]);
+        } else {
+          // we need to update the link from the chain
+          entries[last] = swapNext(entries[last], getNext(entries[next]));
+        }
+
+        moveEntry(next);
+        size--;
+        modCount++;
+        return true;
+      }
+      last = next;
+      next = getNext(entries[next]);
+    } while (next != UNSET);
+    return false;
+  }
+
+  /**
+   * Moves the last entry in the entry array into {@code dstIndex}, and nulls out its old position.
+   */
+  void moveEntry(int dstIndex) {
+    int srcIndex = size() - 1;
+    if (dstIndex < srcIndex) {
+      // move last entry to deleted spot
+      elements[dstIndex] = elements[srcIndex];
+      elements[srcIndex] = null;
+
+      // move the last entry to the removed spot, just like we moved the element
+      long lastEntry = entries[srcIndex];
+      entries[dstIndex] = lastEntry;
+      entries[srcIndex] = UNSET;
+
+      // also need to update whoever's "next" pointer was pointing to the last entry place
+      // reusing "tableIndex" and "next"; these variables were no longer needed
+      int tableIndex = getHash(lastEntry) & hashTableMask();
+      int lastNext = table[tableIndex];
+      if (lastNext == srcIndex) {
+        // we need to update the root pointer
+        table[tableIndex] = dstIndex;
+      } else {
+        // we need to update a pointer in an entry
+        int previous;
+        long entry;
+        do {
+          previous = lastNext;
+          lastNext = getNext(entry = entries[lastNext]);
+        } while (lastNext != srcIndex);
+        // here, entries[previous] points to the old entry location; update it
+        entries[previous] = swapNext(entry, dstIndex);
+      }
+    } else {
+      elements[dstIndex] = null;
+      entries[dstIndex] = UNSET;
+    }
+  }
+
+  @Override
+  public Iterator<E> iterator() {
+    return new Iterator<E>() {
+      int expectedModCount = modCount;
+      boolean nextCalled = false;
+      int index = 0;
+
+      @Override
+      public boolean hasNext() {
+        return index < size;
+      }
+
+      @Override
+      @SuppressWarnings("unchecked")
+      public E next() {
+        checkForConcurrentModification();
+        if (!hasNext()) {
+          throw new NoSuchElementException();
+        }
+        nextCalled = true;
+        return (E) elements[index++];
+      }
+
+      @Override
+      public void remove() {
+        checkForConcurrentModification();
+        Preconditions.checkState(nextCalled, "no calls to next() since the last call to remove()");
+        expectedModCount++;
+        index--;
+        CompactHashSet.this.remove(elements[index], getHash(entries[index]));
+        nextCalled = false;
+      }
+
+      private void checkForConcurrentModification() {
+        if (modCount != expectedModCount) {
+          throw new ConcurrentModificationException();
+        }
+      }
+    };
+  }
+
+  @Override
+  public int size() {
+    return size;
+  }
+
+  @Override
+  public boolean isEmpty() {
+    return size == 0;
+  }
+
+  @Override
+  public Object[] toArray() {
+    return Arrays.copyOf(elements, size);
+  }
+
+  @SuppressWarnings("unchecked")
+  @Override
+  public <T> T[] toArray(T[] a) {
+    if (a.length < size) {
+      a = (T[]) Array.newInstance(a.getClass().getComponentType(), size);
+    }
+    System.arraycopy(elements, 0, a, 0, size);
+    return a;
+  }
+
+  /**
+   * Ensures that this {@code CompactHashSet} has the smallest representation in memory,
+   * given its current size.
+   */
+  public void trimToSize() {
+    int size = this.size;
+    if (size < entries.length) {
+      resizeEntries(size);
+    }
+    // size / loadFactor gives the table size of the appropriate load factor,
+    // but that may not be a power of two. We floor it to a power of two by
+    // keeping its highest bit. But the smaller table may have a load factor
+    // larger than what we want; then we want to go to the next power of 2 if we can
+    int minimumTableSize = Math.max(1, Integer.highestOneBit((int) (size / loadFactor)));
+    if (minimumTableSize < MAXIMUM_CAPACITY) {
+      double load = (double) size / minimumTableSize;
+      if (load > loadFactor) {
+        minimumTableSize <<= 1; // increase to next power if possible
+      }
+    }
+
+    if (minimumTableSize < table.length) {
+      resizeTable(minimumTableSize);
+    }
+  }
+
+  @Override
+  public void clear() {
+    modCount++;
+    Arrays.fill(elements, 0, size, null);
+    Arrays.fill(table, UNSET);
+    Arrays.fill(entries, UNSET);
+    this.size = 0;
+  }
+
+  private void writeObject(ObjectOutputStream stream) throws IOException {
+    stream.defaultWriteObject();
+    stream.writeInt(table.length);
+    stream.writeFloat(loadFactor);
+    stream.writeInt(size);
+    for (E e : this) {
+      stream.writeObject(e);
+    }
+  }
+
+  @SuppressWarnings("unchecked")
+  private void readObject(ObjectInputStream stream) throws IOException, ClassNotFoundException {
+    stream.defaultReadObject();
+    int length = stream.readInt();
+    float loadFactor = stream.readFloat();
+    int elementCount = stream.readInt();
+    try {
+      init(length, loadFactor);
+    } catch (IllegalArgumentException e) {
+      throw new InvalidObjectException(e.getMessage());
+    }
+    for (int i = elementCount; --i >= 0;) {
+      E element = (E) stream.readObject();
+      add(element);
+    }
+  }
+}
author	Han-Wen Nienhuys <hanwen@google.com>	2015-02-25 16:45:20 +0100
committer	Han-Wen Nienhuys <hanwen@google.com>	2015-02-25 16:45:20 +0100
commit	d08b27fa9701fecfdb69e1b0d1ac2459efc2129b (patch)
tree	5d50963026239ca5aebfb47ea5b8db7e814e57c8 /src/main/java/com/google/devtools/build/lib/collect/CompactHashSet.java