FST  openfst-1.8.3
OpenFst Library
arc-arena.h
Go to the documentation of this file.
1 // Copyright 2005-2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 // Allocators for contiguous arrays of arcs.
19 
20 #ifndef FST_ARC_ARENA_H_
21 #define FST_ARC_ARENA_H_
22 
23 #include <algorithm>
24 #include <cstddef>
25 #include <cstdint>
26 #include <deque>
27 #include <list>
28 #include <memory>
29 #include <utility>
30 
31 #include <fst/fst.h>
32 #include <fst/memory.h>
33 #include <unordered_map>
34 
35 namespace fst {
36 
37 // ArcArena is used for fast allocation of contiguous arrays of arcs.
38 //
39 // To create an arc array:
40 // for each state:
41 // for each arc:
42 // arena.PushArc();
43 // // Commits these arcs and returns pointer to them.
44 // Arc *arcs = arena.GetArcs();
45 //
46 // OR
47 //
48 // arena.DropArcs(); // Throws away current arcs, reuse the space.
49 //
50 // The arcs returned are guaranteed to be contiguous and the pointer returned
51 // will never be invalidated until the arena is cleared for reuse.
52 //
53 // The contents of the arena can be released with a call to arena.Clear() after
54 // which the arena will restart with an initial allocation capable of holding at
55 // least all of the arcs requested in the last usage before Clear() making
56 // subsequent uses of the Arena more efficient.
57 //
58 // The max_retained_size option can limit the amount of arc space requested on
59 // Clear() to avoid excess growth from intermittent high usage.
60 template <typename Arc>
61 class ArcArena {
62  public:
63  explicit ArcArena(size_t block_size = 256, size_t max_retained_size = 1e6)
64  : block_size_(block_size), max_retained_size_(max_retained_size) {
65  blocks_.emplace_back(MakeSharedBlock(block_size_));
66  first_block_size_ = block_size_;
67  total_size_ = block_size_;
68  arcs_ = blocks_.back().get();
69  end_ = arcs_ + block_size_;
70  next_ = arcs_;
71  }
72 
73  ArcArena(const ArcArena &copy)
74  : arcs_(copy.arcs_),
75  next_(copy.next_),
76  end_(copy.end_),
77  block_size_(copy.block_size_),
78  first_block_size_(copy.first_block_size_),
79  total_size_(copy.total_size_),
80  max_retained_size_(copy.max_retained_size_),
81  blocks_(copy.blocks_) {
82  NewBlock(block_size_);
83  }
84 
85  void ReserveArcs(size_t n) {
86  if (next_ + n < end_) return;
87  NewBlock(n);
88  }
89 
90  void PushArc(const Arc &arc) {
91  if (next_ == end_) {
92  size_t length = next_ - arcs_;
93  NewBlock(length * 2);
94  }
95  *next_ = arc;
96  ++next_;
97  }
98 
99  const Arc *GetArcs() {
100  const auto *arcs = arcs_;
101  arcs_ = next_;
102  return arcs;
103  }
104 
105  void DropArcs() { next_ = arcs_; }
106 
107  size_t Size() { return total_size_; }
108 
109  void Clear() {
110  blocks_.resize(1);
111  if (total_size_ > first_block_size_) {
112  first_block_size_ = std::min(max_retained_size_, total_size_);
113  blocks_.back() = MakeSharedBlock(first_block_size_);
114  }
115  total_size_ = first_block_size_;
116  arcs_ = blocks_.back().get();
117  end_ = arcs_ + first_block_size_;
118  next_ = arcs_;
119  }
120 
121  private:
122  // Allocates a new block with capacity of at least n or block_size,
123  // copying incomplete arc sequence from old block to new block.
124  void NewBlock(size_t n) {
125  const auto length = next_ - arcs_;
126  const auto new_block_size = std::max(n, block_size_);
127  total_size_ += new_block_size;
128  blocks_.emplace_back(MakeSharedBlock(new_block_size));
129  std::copy(arcs_, next_, blocks_.back().get());
130  arcs_ = blocks_.back().get();
131  next_ = arcs_ + length;
132  end_ = arcs_ + new_block_size;
133  }
134 
135  std::shared_ptr<Arc[]> MakeSharedBlock(size_t size) {
136  return std::shared_ptr<Arc[]>(new Arc[size]);
137  }
138 
139  Arc *arcs_;
140  Arc *next_;
141  const Arc *end_;
142  size_t block_size_;
143  size_t first_block_size_;
144  size_t total_size_;
145  size_t max_retained_size_;
146  std::list<std::shared_ptr<Arc[]>> blocks_;
147 };
148 
149 // ArcArenaStateStore uses a resusable ArcArena to store arc arrays and does not
150 // require that the Expander call ReserveArcs first.
151 //
152 // TODO(tombagby): Make cache type configurable.
153 // TODO(tombagby): Provide ThreadLocal/Concurrent configuration.
154 template <class A>
156  public:
157  using Arc = A;
158  using Weight = typename Arc::Weight;
159  using StateId = typename Arc::StateId;
160 
161  class State {
162  public:
163  Weight Final() const { return final_weight_; }
164 
165  size_t NumInputEpsilons() const { return niepsilons_; }
166 
167  size_t NumOutputEpsilons() const { return noepsilons_; }
168 
169  size_t NumArcs() const { return narcs_; }
170 
171  const Arc &GetArc(size_t n) const { return arcs_[n]; }
172 
173  const Arc *Arcs() const { return arcs_; }
174 
175  int *MutableRefCount() const { return nullptr; }
176 
177  private:
178  State(Weight final_weight, int32_t niepsilons, int32_t noepsilons,
179  int32_t narcs, const Arc *arcs)
180  : final_weight_(std::move(final_weight)),
181  niepsilons_(niepsilons),
182  noepsilons_(noepsilons),
183  narcs_(narcs),
184  arcs_(arcs) {}
185 
186  Weight final_weight_;
187  size_t niepsilons_;
188  size_t noepsilons_;
189  size_t narcs_;
190  const Arc *arcs_;
191 
192  friend class ArcArenaStateStore<Arc>;
193  };
194 
195  template <class Expander>
196  State *FindOrExpand(Expander &expander, StateId state_id) {
197  const auto &[it, success] = cache_.emplace(state_id, nullptr);
198  if (!success) return it->second;
199  // Needs a new state.
200  StateBuilder builder(&arena_);
201  expander.Expand(state_id, &builder);
202  const auto arcs = arena_.GetArcs();
203  size_t narcs = builder.narcs_;
204  size_t niepsilons = 0;
205  size_t noepsilons = 0;
206  for (size_t i = 0; i < narcs; ++i) {
207  if (arcs[i].ilabel == 0) ++niepsilons;
208  if (arcs[i].olabel == 0) ++noepsilons;
209  }
210  states_.emplace_back(
211  State(builder.final_weight_, niepsilons, noepsilons, narcs, arcs));
212  // Places it in the cache.
213  auto state = &states_.back();
214  it->second = state;
215  return state;
216  }
217 
218  State *Find(StateId state_id) const {
219  auto it = cache_.find(state_id);
220  return (it == cache_.end()) ? nullptr : it->second;
221  }
222 
223  private:
224  class StateBuilder {
225  public:
226  explicit StateBuilder(ArcArena<Arc> *arena)
227  : arena_(arena), final_weight_(Weight::Zero()), narcs_(0) {}
228 
229  void SetFinal(Weight weight) { final_weight_ = std::move(weight); }
230 
231  void ReserveArcs(size_t n) { arena_->ReserveArcs(n); }
232 
233  void AddArc(const Arc &arc) {
234  ++narcs_;
235  arena_->PushArc(arc);
236  }
237 
238  private:
239  friend class ArcArenaStateStore<Arc>;
240 
241  ArcArena<Arc> *arena_;
242  Weight final_weight_;
243  size_t narcs_;
244  };
245 
246  std::unordered_map<StateId, State *> cache_;
247  std::deque<State> states_;
248  ArcArena<Arc> arena_;
249 };
250 
251 } // namespace fst
252 
253 #endif // FST_ARC_ARENA_H_
typename Arc::StateId StateId
Definition: arc-arena.h:159
void PushArc(const Arc &arc)
Definition: arc-arena.h:90
typename Arc::Weight Weight
Definition: arc-arena.h:158
void Clear()
Definition: arc-arena.h:109
const Arc * Arcs() const
Definition: arc-arena.h:173
size_t Size()
Definition: arc-arena.h:107
size_t NumOutputEpsilons() const
Definition: arc-arena.h:167
const Arc & GetArc(size_t n) const
Definition: arc-arena.h:171
void DropArcs()
Definition: arc-arena.h:105
int * MutableRefCount() const
Definition: arc-arena.h:175
State * Find(StateId state_id) const
Definition: arc-arena.h:218
ArcArena(size_t block_size=256, size_t max_retained_size=1e6)
Definition: arc-arena.h:63
State * FindOrExpand(Expander &expander, StateId state_id)
Definition: arc-arena.h:196
void ReserveArcs(size_t n)
Definition: arc-arena.h:85
ArcArena(const ArcArena &copy)
Definition: arc-arena.h:73
const Arc * GetArcs()
Definition: arc-arena.h:99
size_t NumInputEpsilons() const
Definition: arc-arena.h:165