summaryrefslogtreecommitdiff
path: root/absl/crc/internal/crc_cord_state.h
blob: fbbb8c004c859cce70a026eaf06c2ff6cadef5fc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
// Copyright 2022 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef ABSL_CRC_INTERNAL_CRC_CORD_STATE_H_
#define ABSL_CRC_INTERNAL_CRC_CORD_STATE_H_

#include <atomic>
#include <cstddef>
#include <deque>

#include "absl/base/config.h"
#include "absl/crc/crc32c.h"

namespace absl {
ABSL_NAMESPACE_BEGIN
namespace crc_internal {

// CrcCordState is a copy-on-write class that holds the chunked CRC32C data
// that allows CrcCord to perform efficient substring operations. CrcCordState
// is used as a member variable in CrcCord. When a CrcCord is converted to a
// Cord, the CrcCordState is shallow-copied into the root node of the Cord. If
// the converted Cord is modified outside of CrcCord, the CrcCordState is
// discarded from the Cord. If the Cord is converted back to a CrcCord, and the
// Cord is still carrying the CrcCordState in its root node, the CrcCord can
// re-use the CrcCordState, making the construction of the CrcCord cheap.
//
// CrcCordState does not try to encapsulate the CRC32C state (CrcCord requires
// knowledge of how CrcCordState represents the CRC32C state). It does
// encapsulate the copy-on-write nature of the state.
class CrcCordState {
 public:
  // Constructors.
  CrcCordState();
  CrcCordState(const CrcCordState&);
  CrcCordState(CrcCordState&&);

  // Destructor. Atomically unreferences the data.
  ~CrcCordState();

  // Copy and move operators.
  CrcCordState& operator=(const CrcCordState&);
  CrcCordState& operator=(CrcCordState&&);

  // A (length, crc) pair.
  struct PrefixCrc {
    PrefixCrc() = default;
    PrefixCrc(size_t length_arg, absl::crc32c_t crc_arg)
        : length(length_arg), crc(crc_arg) {}

    size_t length = 0;

    // TODO(absl-team): Memory stomping often zeros out memory. If this struct
    // gets overwritten, we could end up with {0, 0}, which is the correct CRC
    // for a string of length 0. Consider storing a scrambled value and
    // unscrambling it before verifying it.
    absl::crc32c_t crc = absl::crc32c_t{0};
  };

  // The representation of the chunked CRC32C data.
  struct Rep {
    // `removed_prefix` is the crc and length of any prefix that has been
    // removed from the Cord (for example, by calling
    // `CrcCord::RemovePrefix()`). To get the checksum of any prefix of the
    // cord, this value must be subtracted from `prefix_crc`. See `Checksum()`
    // for an example.
    //
    // CrcCordState is said to be "normalized" if removed_prefix.length == 0.
    PrefixCrc removed_prefix;

    // A deque of (length, crc) pairs, representing length and crc of a prefix
    // of the Cord, before removed_prefix has been subtracted. The lengths of
    // the prefixes are stored in increasing order. If the Cord is not empty,
    // the last value in deque is the contains the CRC32C of the entire Cord
    // when removed_prefix is subtracted from it.
    std::deque<PrefixCrc> prefix_crc;
  };

  // Returns a reference to the representation of the chunked CRC32C data.
  const Rep& rep() const { return refcounted_rep_->rep; }

  // Returns a mutable reference to the representation of the chunked CRC32C
  // data. Calling this function will copy the data if another instance also
  // holds a reference to the data, so it is important to call rep() instead if
  // the data may not be mutated.
  Rep* mutable_rep() {
    if (refcounted_rep_->count.load(std::memory_order_acquire) != 1) {
      RefcountedRep* copy = new RefcountedRep;
      copy->rep = refcounted_rep_->rep;
      Unref(refcounted_rep_);
      refcounted_rep_ = copy;
    }
    return &refcounted_rep_->rep;
  }

  // Returns the CRC32C of the entire Cord.
  absl::crc32c_t Checksum() const;

  // Returns true if the chunked CRC32C cached is normalized.
  bool IsNormalized() const { return rep().removed_prefix.length == 0; }

  // Normalizes the chunked CRC32C checksum cache by subtracting any removed
  // prefix from the chunks.
  void Normalize();

  // Returns the number of cached chunks.
  size_t NumChunks() const { return rep().prefix_crc.size(); }

  // Helper that returns the (length, crc) of the `n`-th cached chunked.
  PrefixCrc NormalizedPrefixCrcAtNthChunk(size_t n) const;

  // Poisons all chunks to so that Checksum() will likely be incorrect with high
  // probability.
  void Poison();

 private:
  struct RefcountedRep {
    std::atomic<int32_t> count{1};
    Rep rep;
  };

  // Adds a reference to the shared global empty `RefcountedRep`, and returns a
  // pointer to the `RefcountedRep`. This is an optimization to avoid unneeded
  // allocations when the allocation is unlikely to ever be used. The returned
  // pointer can be `Unref()`ed when it is no longer needed.  Since the returned
  // instance will always have a reference counter greater than 1, attempts to
  // modify it (by calling `mutable_rep()`) will create a new unshared copy.
  static RefcountedRep* RefSharedEmptyRep();

  static void Ref(RefcountedRep* r) {
    assert(r != nullptr);
    r->count.fetch_add(1, std::memory_order_relaxed);
  }

  static void Unref(RefcountedRep* r) {
    assert(r != nullptr);
    if (r->count.fetch_sub(1, std::memory_order_acq_rel) == 1) {
      delete r;
    }
  }

  RefcountedRep* refcounted_rep_;
};

}  // namespace crc_internal
ABSL_NAMESPACE_END
}  // namespace absl

#endif  // ABSL_CRC_INTERNAL_CRC_CORD_STATE_H_