FST  openfst-1.8.3
OpenFst Library
ngram-fst.h
Go to the documentation of this file.
1 // Copyright 2005-2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 // NgramFst implements a n-gram language model based upon the LOUDS data
19 // structure. Please refer to "Unary Data Structures for Language Models"
20 // http://research.google.com/pubs/archive/37218.pdf
21 
22 #ifndef FST_EXTENSIONS_NGRAM_NGRAM_FST_H_
23 #define FST_EXTENSIONS_NGRAM_NGRAM_FST_H_
24 
25 #include <sys/types.h>
26 
27 #include <algorithm>
28 #include <cstddef>
29 #include <cstdint>
30 #include <cstring>
31 #include <ios>
32 #include <iostream>
33 #include <istream>
34 #include <memory>
35 #include <ostream>
36 #include <queue>
37 #include <string>
38 #include <utility>
39 #include <vector>
40 
41 #include <fst/compat.h>
42 #include <fst/log.h>
44 #include <fst/arcsort.h>
45 #include <fst/expanded-fst.h>
46 #include <fstream>
47 #include <fst/fst.h>
48 #include <fst/fstlib.h>
49 #include <fst/mapped-file.h>
50 #include <fst/matcher.h>
51 #include <fst/properties.h>
52 #include <fst/util.h>
53 #include <fst/vector-fst.h>
54 
55 namespace fst {
56 template <class A>
57 class NGramFst;
58 template <class A>
60 
61 // Instance data containing mutable state for bookkeeping repeated access to
62 // the same state.
63 template <class A>
64 struct NGramFstInst {
65  typedef typename A::Label Label;
66  typedef typename A::StateId StateId;
67  typedef typename A::Weight Weight;
68  StateId state_;
69  size_t num_futures_;
70  size_t offset_;
71  size_t node_;
72  StateId node_state_;
73  std::vector<Label> context_;
74  StateId context_state_;
76  : state_(kNoStateId),
77  node_state_(kNoStateId),
78  context_state_(kNoStateId) {}
79 };
80 
81 namespace internal {
82 
83 // Implementation class for LOUDS based NgramFst interface.
84 template <class A>
85 class NGramFstImpl : public FstImpl<A> {
88  using FstImpl<A>::SetType;
90 
91  friend class ArcIterator<NGramFst<A>>;
92  friend class NGramFstMatcher<A>;
93 
94  public:
98 
99  typedef A Arc;
100  typedef typename A::Label Label;
101  typedef typename A::StateId StateId;
102  typedef typename A::Weight Weight;
103 
105  SetType("ngram");
106  SetInputSymbols(nullptr);
107  SetOutputSymbols(nullptr);
108  SetProperties(kStaticProperties);
109  }
110 
111  NGramFstImpl(const Fst<A> &fst, std::vector<StateId> *order_out);
112 
113  explicit NGramFstImpl(const Fst<A> &fst) : NGramFstImpl(fst, nullptr) {}
114 
115  NGramFstImpl(const NGramFstImpl &other) {
116  FSTERROR() << "Copying NGramFst Impls is not supported, use safe = false.";
117  SetProperties(kError, kError);
118  }
119 
120  static NGramFstImpl<A> *Read(std::istream &strm, const FstReadOptions &opts) {
121  auto impl = std::make_unique<NGramFstImpl<A>>();
122  FstHeader hdr;
123  if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) return nullptr;
124  uint64_t num_states, num_futures, num_final;
125  const size_t offset =
126  sizeof(num_states) + sizeof(num_futures) + sizeof(num_final);
127  // Peek at num_states and num_futures to see how much more needs to be read.
128  strm.read(reinterpret_cast<char *>(&num_states), sizeof(num_states));
129  strm.read(reinterpret_cast<char *>(&num_futures), sizeof(num_futures));
130  strm.read(reinterpret_cast<char *>(&num_final), sizeof(num_final));
131  size_t size = Storage(num_states, num_futures, num_final);
132  std::unique_ptr<MappedFile> data_region(MappedFile::Allocate(size));
133  char *data = static_cast<char *>(data_region->mutable_data());
134  // Copy num_states, num_futures and num_final back into data.
135  memcpy(data, reinterpret_cast<char *>(&num_states), sizeof(num_states));
136  memcpy(data + sizeof(num_states), reinterpret_cast<char *>(&num_futures),
137  sizeof(num_futures));
138  memcpy(data + sizeof(num_states) + sizeof(num_futures),
139  reinterpret_cast<char *>(&num_final), sizeof(num_final));
140  strm.read(data + offset, size - offset);
141  if (strm.fail()) return nullptr;
142  impl->Init(data, std::move(data_region));
143  return impl.release();
144  }
145 
146  bool Write(std::ostream &strm, const FstWriteOptions &opts) const {
147  FstHeader hdr;
148  hdr.SetStart(Start());
149  hdr.SetNumStates(num_states_);
150  WriteHeader(strm, opts, kFileVersion, &hdr);
151  strm.write(data_, StorageSize());
152  return !strm.fail();
153  }
154 
155  StateId Start() const { return start_; }
156 
157  Weight Final(StateId state) const {
158  if (final_index_.Get(state)) {
159  return final_probs_[final_index_.Rank1(state)];
160  } else {
161  return Weight::Zero();
162  }
163  }
164 
165  size_t NumArcs(StateId state, NGramFstInst<A> *inst = nullptr) const {
166  if (inst == nullptr) {
167  const std::pair<size_t, size_t> zeros =
168  (state == 0) ? select_root_ : future_index_.Select0s(state);
169  return zeros.second - zeros.first - 1;
170  }
171  SetInstFuture(state, inst);
172  return inst->num_futures_ + ((state == 0) ? 0 : 1);
173  }
174 
175  size_t NumInputEpsilons(StateId state) const {
176  // State 0 has no parent, thus no backoff.
177  if (state == 0) return 0;
178  return 1;
179  }
180 
181  size_t NumOutputEpsilons(StateId state) const {
182  return NumInputEpsilons(state);
183  }
184 
185  StateId NumStates() const { return num_states_; }
186 
188  data->base = nullptr;
189  data->nstates = num_states_;
190  }
191 
192  static size_t Storage(uint64_t num_states, uint64_t num_futures,
193  uint64_t num_final) {
194  uint64_t b64;
195  Weight weight;
196  Label label;
197  size_t offset =
198  sizeof(num_states) + sizeof(num_futures) + sizeof(num_final);
199  offset +=
200  sizeof(b64) * (BitmapIndex::StorageSize(num_states * 2 + 1) +
201  BitmapIndex::StorageSize(num_futures + num_states + 1) +
202  BitmapIndex::StorageSize(num_states));
203  offset += (num_states + 1) * sizeof(label) + num_futures * sizeof(label);
204  // Pad for alignemnt, see
205  // http://en.wikipedia.org/wiki/Data_structure_alignment#Computing_padding
206  offset = (offset + sizeof(weight) - 1) & ~(sizeof(weight) - 1);
207  offset += (num_states + 1) * sizeof(weight) + num_final * sizeof(weight) +
208  (num_futures + 1) * sizeof(weight);
209  return offset;
210  }
211 
212  void SetInstFuture(StateId state, NGramFstInst<A> *inst) const {
213  if (inst->state_ != state) {
214  inst->state_ = state;
215  const std::pair<size_t, size_t> zeros = future_index_.Select0s(state);
216  inst->num_futures_ = zeros.second - zeros.first - 1;
217  inst->offset_ = future_index_.Rank1(zeros.first + 1);
218  }
219  }
220 
221  void SetInstNode(NGramFstInst<A> *inst) const {
222  if (inst->node_state_ != inst->state_) {
223  inst->node_state_ = inst->state_;
224  inst->node_ = context_index_.Select1(inst->state_);
225  }
226  }
227 
228  void SetInstContext(NGramFstInst<A> *inst) const {
229  SetInstNode(inst);
230  if (inst->context_state_ != inst->state_) {
231  inst->context_state_ = inst->state_;
232  inst->context_.clear();
233  size_t node = inst->node_;
234  while (node != 0) {
235  const size_t rank1 = context_index_.Rank1(node);
236  const size_t rank0 = node - rank1;
237  inst->context_.push_back(context_words_[rank1]);
238  node = context_index_.Select1(rank0 - 1);
239  }
240  }
241  }
242 
243  // Access to the underlying representation
244  const char *GetData(size_t *data_size) const {
245  *data_size = StorageSize();
246  return data_;
247  }
248 
249  void Init(const char *data,
250  std::unique_ptr<MappedFile> data_region);
251 
252  const std::vector<Label> &GetContext(StateId s, NGramFstInst<A> *inst) const {
253  SetInstFuture(s, inst);
254  SetInstContext(inst);
255  return inst->context_;
256  }
257 
258  size_t StorageSize() const {
259  return Storage(num_states_, num_futures_, num_final_);
260  }
261 
262  void GetStates(const std::vector<Label> &context,
263  std::vector<StateId> *states) const;
264 
265  private:
266  StateId Transition(const std::vector<Label> &context, Label future) const;
267 
268  // Properties always true for this Fst class.
269  static constexpr uint64_t kStaticProperties =
274  // Current file format version.
275  static constexpr int kFileVersion = 4;
276  // Minimum file format version supported.
277  static constexpr int kMinFileVersion = 4;
278 
279  std::unique_ptr<MappedFile> data_region_;
280  const char *data_ = nullptr; // Not owned.
281  StateId start_ = fst::kNoStateId;
282  uint64_t num_states_ = 0;
283  uint64_t num_futures_ = 0;
284  uint64_t num_final_ = 0;
285  std::pair<size_t, size_t> select_root_;
286  const Label *root_children_ = nullptr;
287  // borrowed references
288  const uint64_t *context_ = nullptr;
289  const uint64_t *future_ = nullptr;
290  const uint64_t *final_ = nullptr;
291  const Label *context_words_ = nullptr;
292  const Label *future_words_ = nullptr;
293  const Weight *backoff_ = nullptr;
294  const Weight *final_probs_ = nullptr;
295  const Weight *future_probs_ = nullptr;
296  // Uses all operations.
297  BitmapIndex context_index_;
298  // Uses Select0 and Rank1.
299  BitmapIndex future_index_;
300  // Uses Get and Rank1. This wastes space if there are no or few final
301  // states, but it's also small. TODO(jrosenstock): Look at EliasFanoArray.
302  BitmapIndex final_index_;
303 };
304 
305 template <typename A>
307  const std::vector<Label> &context,
308  std::vector<typename A::StateId> *states) const {
309  states->clear();
310  states->push_back(0);
311  typename std::vector<Label>::const_reverse_iterator cit = context.rbegin();
312  const Label *children = root_children_;
313  size_t num_children = select_root_.second - 2;
314  const Label *loc = std::lower_bound(children, children + num_children, *cit);
315  if (loc == children + num_children || *loc != *cit) return;
316  size_t node = 2 + loc - children;
317  size_t node_rank = context_index_.Rank1(node);
318  states->push_back(node_rank);
319  if (context.size() == 1) return;
320  std::pair<size_t, size_t> zeros =
321  node_rank == 0 ? select_root_ : context_index_.Select0s(node_rank);
322  size_t first_child = zeros.first + 1;
323  ++cit;
324  if (context_index_.Get(first_child) != false) {
325  size_t last_child = zeros.second - 1;
326  while (cit != context.rend()) {
327  children = context_words_ + context_index_.Rank1(first_child);
328  loc = std::lower_bound(children, children + last_child - first_child + 1,
329  *cit);
330  if (loc == children + last_child - first_child + 1 || *loc != *cit) {
331  break;
332  }
333  ++cit;
334  node = first_child + loc - children;
335  node_rank = context_index_.Rank1(node);
336  states->push_back(node_rank);
337  zeros =
338  node_rank == 0 ? select_root_ : context_index_.Select0s(node_rank);
339  first_child = zeros.first + 1;
340  if (context_index_.Get(first_child) == false) break;
341  last_child = zeros.second - 1;
342  }
343  }
344 }
345 
346 } // namespace internal
347 
348 /*****************************************************************************/
349 template <class A>
350 class NGramFst : public ImplToExpandedFst<internal::NGramFstImpl<A>> {
351  friend class ArcIterator<NGramFst<A>>;
352  friend class NGramFstMatcher<A>;
353 
354  public:
355  typedef A Arc;
356  typedef typename A::StateId StateId;
357  typedef typename A::Label Label;
358  typedef typename A::Weight Weight;
360 
361  explicit NGramFst(const Fst<A> &dst)
362  : ImplToExpandedFst<Impl>(std::make_shared<Impl>(dst, nullptr)) {}
363 
364  NGramFst(const Fst<A> &fst, std::vector<StateId> *order_out)
365  : ImplToExpandedFst<Impl>(std::make_shared<Impl>(fst, order_out)) {}
366 
367  // Because the NGramFstImpl is a const stateless data structure, there
368  // is never a need to do anything beside copy the reference.
369  NGramFst(const NGramFst<A> &fst, bool safe = false)
370  : ImplToExpandedFst<Impl>(fst, false) {}
371 
372  NGramFst() : ImplToExpandedFst<Impl>(std::make_shared<Impl>()) {}
373 
374  // Non-standard constructor to initialize NGramFst directly from data. Caller
375  // maintains ownership of data, which must outlive the NGramFst.
376  explicit NGramFst(const char *data)
377  : ImplToExpandedFst<Impl>(std::make_shared<Impl>()) {
378  GetMutableImpl()->Init(data, /*data_region=*/nullptr);
379  }
380 
381  // Get method that gets the data associated with Init().
382  const char *GetData(size_t *data_size) const {
383  return GetImpl()->GetData(data_size);
384  }
385 
386  const std::vector<Label> GetContext(StateId s) const {
387  return GetImpl()->GetContext(s, &inst_);
388  }
389 
390  // Consumes as much as possible of context from right to left, returns the
391  // the states corresponding to the increasingly conditioned input sequence.
392  void GetStates(const std::vector<Label> &context,
393  std::vector<StateId> *state) const {
394  return GetImpl()->GetStates(context, state);
395  }
396 
397  size_t NumArcs(StateId s) const override {
398  return GetImpl()->NumArcs(s, &inst_);
399  }
400 
401  NGramFst<A> *Copy(bool safe = false) const override {
402  return new NGramFst(*this, safe);
403  }
404 
405  static NGramFst<A> *Read(std::istream &strm, const FstReadOptions &opts) {
406  Impl *impl = Impl::Read(strm, opts);
407  return impl ? new NGramFst<A>(std::shared_ptr<Impl>(impl)) : nullptr;
408  }
409 
410  static NGramFst<A> *Read(const std::string &source) {
411  if (!source.empty()) {
412  std::ifstream strm(source,
413  std::ios_base::in | std::ios_base::binary);
414  if (!strm.good()) {
415  LOG(ERROR) << "NGramFst::Read: Can't open file: " << source;
416  return nullptr;
417  }
418  return Read(strm, FstReadOptions(source));
419  } else {
420  return Read(std::cin, FstReadOptions("standard input"));
421  }
422  }
423 
424  bool Write(std::ostream &strm, const FstWriteOptions &opts) const override {
425  return GetImpl()->Write(strm, opts);
426  }
427 
428  bool Write(const std::string &source) const override {
429  return Fst<A>::WriteFile(source);
430  }
431 
432  inline void InitStateIterator(StateIteratorData<A> *data) const override {
433  GetImpl()->InitStateIterator(data);
434  }
435 
436  inline void InitArcIterator(StateId s,
437  ArcIteratorData<A> *data) const override;
438 
439  MatcherBase<A> *InitMatcher(MatchType match_type) const override {
440  return new NGramFstMatcher<A>(this, match_type);
441  }
442 
443  size_t StorageSize() const { return GetImpl()->StorageSize(); }
444 
445  static bool HasRequiredProps(const Fst<A> &fst) {
446  static const auto props =
448  return fst.Properties(props, true) == props;
449  }
450 
451  static bool HasRequiredStructure(const Fst<A> &fst) {
452  if (!HasRequiredProps(fst)) {
453  return false;
454  }
455  typename A::StateId unigram = fst.Start();
456  while (true) { // Follows epsilon arc chain to find unigram state.
457  if (unigram == fst::kNoStateId) return false; // No unigram state.
458  typename fst::ArcIterator<Fst<A>> aiter(fst, unigram);
459  if (aiter.Done() || aiter.Value().ilabel != 0) break;
460  unigram = aiter.Value().nextstate;
461  aiter.Next();
462  }
463  // Other requirement: all states other than unigram an epsilon arc.
464  for (fst::StateIterator<Fst<A>> siter(fst); !siter.Done();
465  siter.Next()) {
466  const typename A::StateId &state = siter.Value();
467  fst::ArcIterator<Fst<A>> aiter(fst, state);
468  if (state != unigram) {
469  if (aiter.Done()) return false;
470  if (aiter.Value().ilabel != 0) return false;
471  aiter.Next();
472  if (!aiter.Done() && aiter.Value().ilabel == 0) return false;
473  }
474  }
475  return true;
476  }
477 
478  private:
480  using ImplToExpandedFst<Impl, ExpandedFst<A>>::GetMutableImpl;
481 
482  explicit NGramFst(std::shared_ptr<Impl> impl)
483  : ImplToExpandedFst<Impl>(impl) {}
484 
485  mutable NGramFstInst<A> inst_;
486 };
487 
488 template <class A>
490  ArcIteratorData<A> *data) const {
491  GetImpl()->SetInstFuture(s, &inst_);
492  GetImpl()->SetInstNode(&inst_);
493  data->base = std::make_unique<ArcIterator<NGramFst<A>>>(*this, s);
494 }
495 
496 namespace internal {
497 
498 template <typename A>
500  std::vector<StateId> *order_out) {
501  typedef A Arc;
502  typedef typename Arc::Label Label;
503  typedef typename Arc::Weight Weight;
504  typedef typename Arc::StateId StateId;
505  SetType("ngram");
506  SetInputSymbols(fst.InputSymbols());
507  SetOutputSymbols(fst.OutputSymbols());
508  SetProperties(kStaticProperties);
509 
510  // Check basic requirements for an OpenGrm language model Fst.
511  if (!NGramFst<A>::HasRequiredProps(fst)) {
512  FSTERROR() << "NGramFst only accepts OpenGrm language models as input";
513  SetProperties(kError, kError);
514  return;
515  }
516 
517  int64_t num_states = CountStates(fst);
518  std::vector<Label> context(num_states, 0);
519 
520  // Find the unigram state by starting from the start state, following
521  // epsilons.
522  StateId unigram = fst.Start();
523  while (true) {
524  if (unigram == kNoStateId) {
525  FSTERROR() << "Could not identify unigram state";
526  SetProperties(kError, kError);
527  return;
528  }
529  ArcIterator<Fst<A>> aiter(fst, unigram);
530  if (aiter.Done()) {
531  LOG(WARNING) << "Unigram state " << unigram << " has no arcs.";
532  break;
533  }
534  if (aiter.Value().ilabel != 0) break;
535  unigram = aiter.Value().nextstate;
536  }
537 
538  // Each state's context is determined by the subtree it is under from the
539  // unigram state.
540  std::queue<std::pair<StateId, Label>> label_queue;
541  std::vector<bool> visited(num_states);
542  // Force an epsilon link to the start state.
543  label_queue.push(std::make_pair(fst.Start(), 0));
544  for (ArcIterator<Fst<A>> aiter(fst, unigram); !aiter.Done(); aiter.Next()) {
545  label_queue.push(
546  std::make_pair(aiter.Value().nextstate, aiter.Value().ilabel));
547  }
548  // investigate states in breadth first fashion to assign context words.
549  while (!label_queue.empty()) {
550  std::pair<StateId, Label> &now = label_queue.front();
551  if (!visited[now.first]) {
552  context[now.first] = now.second;
553  visited[now.first] = true;
554  for (ArcIterator<Fst<A>> aiter(fst, now.first); !aiter.Done();
555  aiter.Next()) {
556  const Arc &arc = aiter.Value();
557  if (arc.ilabel != 0) {
558  label_queue.push(std::make_pair(arc.nextstate, now.second));
559  }
560  }
561  }
562  label_queue.pop();
563  }
564  visited.clear();
565 
566  // The arc from the start state should be assigned an epsilon to put it
567  // in front of the all other labels (which makes Start state 1 after
568  // unigram which is state 0).
569  context[fst.Start()] = 0;
570 
571  // Build the tree of contexts fst by reversing the epsilon arcs from fst.
572  VectorFst<Arc> context_fst;
573  uint64_t num_final = 0;
574  for (int i = 0; i < num_states; ++i) {
575  if (fst.Final(i) != Weight::Zero()) {
576  ++num_final;
577  }
578  context_fst.SetFinal(context_fst.AddState(), fst.Final(i));
579  }
580  context_fst.SetStart(unigram);
581  context_fst.SetInputSymbols(fst.InputSymbols());
582  context_fst.SetOutputSymbols(fst.OutputSymbols());
583  int64_t num_context_arcs = 0;
584  int64_t num_futures = 0;
585  for (StateIterator<Fst<A>> siter(fst); !siter.Done(); siter.Next()) {
586  const StateId &state = siter.Value();
587  num_futures += fst.NumArcs(state) - fst.NumInputEpsilons(state);
588  ArcIterator<Fst<A>> aiter(fst, state);
589  if (!aiter.Done()) {
590  const Arc &arc = aiter.Value();
591  // this arc goes from state to arc.nextstate, so create an arc from
592  // arc.nextstate to state to reverse it.
593  if (arc.ilabel == 0) {
594  context_fst.AddArc(arc.nextstate, Arc(context[state], context[state],
595  arc.weight, state));
596  num_context_arcs++;
597  }
598  }
599  }
600  if (num_context_arcs != context_fst.NumStates() - 1) {
601  FSTERROR() << "Number of contexts arcs != number of states - 1";
602  SetProperties(kError, kError);
603  return;
604  }
605  if (context_fst.NumStates() != num_states) {
606  FSTERROR() << "Number of contexts != number of states";
607  SetProperties(kError, kError);
608  return;
609  }
610  int64_t context_props =
611  context_fst.Properties(kIDeterministic | kILabelSorted, true);
612  if (!(context_props & kIDeterministic)) {
613  FSTERROR() << "Input Fst is not structured properly";
614  SetProperties(kError, kError);
615  return;
616  }
617  if (!(context_props & kILabelSorted)) {
618  ArcSort(&context_fst, ILabelCompare<Arc>());
619  }
620 
621  uint64_t b64;
622  Weight weight;
623  Label label = kNoLabel;
624  const size_t storage = Storage(num_states, num_futures, num_final);
625  std::unique_ptr<MappedFile> data_region(MappedFile::Allocate(storage));
626  char *data = static_cast<char *>(data_region->mutable_data());
627  memset(data, 0, storage);
628  size_t offset = 0;
629  memcpy(data + offset, reinterpret_cast<char *>(&num_states),
630  sizeof(num_states));
631  offset += sizeof(num_states);
632  memcpy(data + offset, reinterpret_cast<char *>(&num_futures),
633  sizeof(num_futures));
634  offset += sizeof(num_futures);
635  memcpy(data + offset, reinterpret_cast<char *>(&num_final),
636  sizeof(num_final));
637  offset += sizeof(num_final);
638  uint64_t *context_bits = reinterpret_cast<uint64_t *>(data + offset);
639  offset += BitmapIndex::StorageSize(num_states * 2 + 1) * sizeof(b64);
640  uint64_t *future_bits = reinterpret_cast<uint64_t *>(data + offset);
641  offset +=
642  BitmapIndex::StorageSize(num_futures + num_states + 1) * sizeof(b64);
643  uint64_t *final_bits = reinterpret_cast<uint64_t *>(data + offset);
644  offset += BitmapIndex::StorageSize(num_states) * sizeof(b64);
645  Label *context_words = reinterpret_cast<Label *>(data + offset);
646  offset += (num_states + 1) * sizeof(label);
647  Label *future_words = reinterpret_cast<Label *>(data + offset);
648  offset += num_futures * sizeof(label);
649  offset = (offset + sizeof(weight) - 1) & ~(sizeof(weight) - 1);
650  Weight *backoff = reinterpret_cast<Weight *>(data + offset);
651  offset += (num_states + 1) * sizeof(weight);
652  Weight *final_probs = reinterpret_cast<Weight *>(data + offset);
653  offset += num_final * sizeof(weight);
654  Weight *future_probs = reinterpret_cast<Weight *>(data + offset);
655  int64_t context_arc = 0, future_arc = 0, context_bit = 0, future_bit = 0,
656  final_bit = 0;
657 
658  // pseudo-root bits
659  BitmapIndex::Set(context_bits, context_bit++);
660  ++context_bit;
661  context_words[context_arc] = label;
662  backoff[context_arc] = Weight::Zero();
663  context_arc++;
664 
665  ++future_bit;
666  if (order_out) {
667  order_out->clear();
668  order_out->resize(num_states);
669  }
670 
671  std::queue<StateId> context_q;
672  context_q.push(context_fst.Start());
673  StateId state_number = 0;
674  while (!context_q.empty()) {
675  const StateId &state = context_q.front();
676  if (order_out) {
677  (*order_out)[state] = state_number;
678  }
679 
680  const Weight final_weight = context_fst.Final(state);
681  if (final_weight != Weight::Zero()) {
682  BitmapIndex::Set(final_bits, state_number);
683  final_probs[final_bit] = final_weight;
684  ++final_bit;
685  }
686 
687  for (ArcIterator<VectorFst<A>> aiter(context_fst, state); !aiter.Done();
688  aiter.Next()) {
689  const Arc &arc = aiter.Value();
690  context_words[context_arc] = arc.ilabel;
691  backoff[context_arc] = arc.weight;
692  ++context_arc;
693  BitmapIndex::Set(context_bits, context_bit++);
694  context_q.push(arc.nextstate);
695  }
696  ++context_bit;
697 
698  for (ArcIterator<Fst<A>> aiter(fst, state); !aiter.Done(); aiter.Next()) {
699  const Arc &arc = aiter.Value();
700  if (arc.ilabel != 0) {
701  future_words[future_arc] = arc.ilabel;
702  future_probs[future_arc] = arc.weight;
703  ++future_arc;
704  BitmapIndex::Set(future_bits, future_bit++);
705  }
706  }
707  ++future_bit;
708  ++state_number;
709  context_q.pop();
710  }
711 
712  if ((state_number != num_states) || (context_bit != num_states * 2 + 1) ||
713  (context_arc != num_states) || (future_arc != num_futures) ||
714  (future_bit != num_futures + num_states + 1) ||
715  (final_bit != num_final)) {
716  FSTERROR() << "Structure problems detected during construction";
717  SetProperties(kError, kError);
718  return;
719  }
720 
721  Init(data, std::move(data_region));
722 }
723 
724 template <typename A>
725 inline void NGramFstImpl<A>::Init(const char *data,
726  std::unique_ptr<MappedFile> data_region) {
727  data_region_ = std::move(data_region);
728  data_ = data;
729  size_t offset = 0;
730  num_states_ = *(reinterpret_cast<const uint64_t *>(data_ + offset));
731  offset += sizeof(num_states_);
732  num_futures_ = *(reinterpret_cast<const uint64_t *>(data_ + offset));
733  offset += sizeof(num_futures_);
734  num_final_ = *(reinterpret_cast<const uint64_t *>(data_ + offset));
735  offset += sizeof(num_final_);
736  uint64_t bits;
737  size_t context_bits = num_states_ * 2 + 1;
738  size_t future_bits = num_futures_ + num_states_ + 1;
739  context_ = reinterpret_cast<const uint64_t *>(data_ + offset);
740  offset += BitmapIndex::StorageSize(context_bits) * sizeof(bits);
741  future_ = reinterpret_cast<const uint64_t *>(data_ + offset);
742  offset += BitmapIndex::StorageSize(future_bits) * sizeof(bits);
743  final_ = reinterpret_cast<const uint64_t *>(data_ + offset);
744  offset += BitmapIndex::StorageSize(num_states_) * sizeof(bits);
745  context_words_ = reinterpret_cast<const Label *>(data_ + offset);
746  offset += (num_states_ + 1) * sizeof(*context_words_);
747  future_words_ = reinterpret_cast<const Label *>(data_ + offset);
748  offset += num_futures_ * sizeof(*future_words_);
749  offset = (offset + sizeof(*backoff_) - 1) & ~(sizeof(*backoff_) - 1);
750  backoff_ = reinterpret_cast<const Weight *>(data_ + offset);
751  offset += (num_states_ + 1) * sizeof(*backoff_);
752  final_probs_ = reinterpret_cast<const Weight *>(data_ + offset);
753  offset += num_final_ * sizeof(*final_probs_);
754  future_probs_ = reinterpret_cast<const Weight *>(data_ + offset);
755 
756  context_index_.BuildIndex(context_, context_bits,
757  /*enable_select_0_index=*/true,
758  /*enable_select_1_index=*/true);
759  future_index_.BuildIndex(future_, future_bits,
760  /*enable_select_0_index=*/true,
761  /*enable_select_1_index=*/false);
762  final_index_.BuildIndex(final_, num_states_);
763 
764  select_root_ = context_index_.Select0s(0);
765  if (context_index_.Rank1(0) != 0 || select_root_.first != 1 ||
766  context_index_.Get(2) == false) {
767  FSTERROR() << "Malformed file";
768  SetProperties(kError, kError);
769  return;
770  }
771  root_children_ = context_words_ + context_index_.Rank1(2);
772  start_ = 1;
773 }
774 
775 template <typename A>
776 inline typename A::StateId NGramFstImpl<A>::Transition(
777  const std::vector<Label> &context, Label future) const {
778  const Label *children = root_children_;
779  size_t num_children = select_root_.second - 2;
780  const Label *loc =
781  std::lower_bound(children, children + num_children, future);
782  if (loc == children + num_children || *loc != future) {
783  return context_index_.Rank1(0);
784  }
785  size_t node = 2 + loc - children;
786  size_t node_rank = context_index_.Rank1(node);
787  std::pair<size_t, size_t> zeros =
788  (node_rank == 0) ? select_root_ : context_index_.Select0s(node_rank);
789  size_t first_child = zeros.first + 1;
790  if (context_index_.Get(first_child) == false) {
791  return node_rank;
792  }
793  size_t last_child = zeros.second - 1;
794  for (int word = context.size() - 1; word >= 0; --word) {
795  children = context_words_ + context_index_.Rank1(first_child);
796  loc = std::lower_bound(children, children + last_child - first_child + 1,
797  context[word]);
798  if (loc == children + last_child - first_child + 1 ||
799  *loc != context[word]) {
800  break;
801  }
802  node = first_child + loc - children;
803  node_rank = context_index_.Rank1(node);
804  zeros =
805  (node_rank == 0) ? select_root_ : context_index_.Select0s(node_rank);
806  first_child = zeros.first + 1;
807  if (context_index_.Get(first_child) == false) break;
808  last_child = zeros.second - 1;
809  }
810  return node_rank;
811 }
812 
813 } // namespace internal
814 
815 /*****************************************************************************/
816 template <class A>
817 class NGramFstMatcher : public MatcherBase<A> {
818  public:
819  typedef A Arc;
820  typedef typename A::Label Label;
821  typedef typename A::StateId StateId;
822  typedef typename A::Weight Weight;
823 
824  // This makes a copy of the FST.
826  : owned_fst_(fst.Copy()),
827  fst_(*owned_fst_),
828  inst_(fst_.inst_),
829  match_type_(match_type),
830  current_loop_(false),
831  loop_(kNoLabel, 0, A::Weight::One(), kNoStateId) {
832  if (match_type_ == MATCH_OUTPUT) {
833  std::swap(loop_.ilabel, loop_.olabel);
834  }
835  }
836 
837  // This doesn't copy the FST.
839  : fst_(*fst),
840  inst_(fst_.inst_),
841  match_type_(match_type),
842  current_loop_(false),
843  loop_(kNoLabel, 0, A::Weight::One(), kNoStateId) {
844  if (match_type_ == MATCH_OUTPUT) {
845  std::swap(loop_.ilabel, loop_.olabel);
846  }
847  }
848 
849  // This makes a copy of the FST.
850  NGramFstMatcher(const NGramFstMatcher<A> &matcher, bool safe = false)
851  : owned_fst_(matcher.fst_.Copy(safe)),
852  fst_(*owned_fst_),
853  inst_(matcher.inst_),
854  match_type_(matcher.match_type_),
855  current_loop_(false),
856  loop_(kNoLabel, 0, A::Weight::One(), kNoStateId) {
857  if (match_type_ == MATCH_OUTPUT) {
858  std::swap(loop_.ilabel, loop_.olabel);
859  }
860  }
861 
862  NGramFstMatcher<A> *Copy(bool safe = false) const override {
863  return new NGramFstMatcher<A>(*this, safe);
864  }
865 
866  MatchType Type(bool test) const override { return match_type_; }
867 
868  const Fst<A> &GetFst() const override { return fst_; }
869 
870  uint64_t Properties(uint64_t props) const override { return props; }
871 
872  void SetState(StateId s) final {
873  fst_.GetImpl()->SetInstFuture(s, &inst_);
874  current_loop_ = false;
875  }
876 
877  bool Find(Label label) final {
878  const Label nolabel = kNoLabel;
879  done_ = true;
880  if (label == 0 || label == nolabel) {
881  if (label == 0) {
882  current_loop_ = true;
883  loop_.nextstate = inst_.state_;
884  }
885  // The unigram state has no epsilon arc.
886  if (inst_.state_ != 0) {
887  arc_.ilabel = arc_.olabel = 0;
888  fst_.GetImpl()->SetInstNode(&inst_);
889  arc_.nextstate = fst_.GetImpl()->context_index_.Rank1(
890  fst_.GetImpl()->context_index_.Select1(
891  fst_.GetImpl()->context_index_.Rank0(inst_.node_) - 1));
892  arc_.weight = fst_.GetImpl()->backoff_[inst_.state_];
893  done_ = false;
894  }
895  } else {
896  current_loop_ = false;
897  const Label *start = fst_.GetImpl()->future_words_ + inst_.offset_;
898  const Label *end = start + inst_.num_futures_;
899  const Label *search = std::lower_bound(start, end, label);
900  if (search != end && *search == label) {
901  size_t state = search - start;
902  arc_.ilabel = arc_.olabel = label;
903  arc_.weight = fst_.GetImpl()->future_probs_[inst_.offset_ + state];
904  fst_.GetImpl()->SetInstContext(&inst_);
905  arc_.nextstate = fst_.GetImpl()->Transition(inst_.context_, label);
906  done_ = false;
907  }
908  }
909  return !Done();
910  }
911 
912  bool Done() const final { return !current_loop_ && done_; }
913 
914  const Arc &Value() const final { return (current_loop_) ? loop_ : arc_; }
915 
916  void Next() final {
917  if (current_loop_) {
918  current_loop_ = false;
919  } else {
920  done_ = true;
921  }
922  }
923 
924  ssize_t Priority(StateId s) final { return fst_.NumArcs(s); }
925 
926  private:
927  std::unique_ptr<NGramFst<A>> owned_fst_;
928  const NGramFst<A> &fst_;
929  NGramFstInst<A> inst_;
930  MatchType match_type_; // Supplied by caller
931  bool done_;
932  Arc arc_;
933  bool current_loop_; // Current arc is the implicit loop
934  Arc loop_;
935 };
936 
937 /*****************************************************************************/
938 // Specialization for NGramFst; see generic version in fst.h
939 // for sample usage (but use the ProdLmFst type!). This version
940 // should inline.
941 template <class A>
942 class StateIterator<NGramFst<A>> : public StateIteratorBase<A> {
943  public:
944  typedef typename A::StateId StateId;
945 
946  explicit StateIterator(const NGramFst<A> &fst)
947  : s_(0), num_states_(fst.NumStates()) {}
948 
949  bool Done() const final { return s_ >= num_states_; }
950 
951  StateId Value() const final {
952  DCHECK(!Done());
953  return s_;
954  }
955 
956  void Next() final { ++s_; }
957 
958  void Reset() final { s_ = 0; }
959 
960  private:
961  StateId s_;
962  StateId num_states_;
963 };
964 
965 /*****************************************************************************/
966 template <class A>
967 class ArcIterator<NGramFst<A>> : public ArcIteratorBase<A> {
968  public:
969  typedef A Arc;
970  typedef typename A::Label Label;
971  typedef typename A::StateId StateId;
972  typedef typename A::Weight Weight;
973 
974  ArcIterator(const NGramFst<A> &fst, StateId state)
975  : lazy_(~0), impl_(fst.GetImpl()), i_(0), flags_(kArcValueFlags) {
976  inst_ = fst.inst_;
977  impl_->SetInstFuture(state, &inst_);
978  impl_->SetInstNode(&inst_);
979  }
980 
981  bool Done() const final {
982  return i_ >=
983  ((inst_.node_ == 0) ? inst_.num_futures_ : inst_.num_futures_ + 1);
984  }
985 
986  const Arc &Value() const final {
987  DCHECK(!Done());
988  bool eps = (inst_.node_ != 0 && i_ == 0);
989  StateId state = (inst_.node_ == 0) ? i_ : i_ - 1;
990  if (flags_ & lazy_ & (kArcILabelValue | kArcOLabelValue)) {
991  arc_.ilabel = arc_.olabel =
992  eps ? 0 : impl_->future_words_[inst_.offset_ + state];
993  lazy_ &= ~(kArcILabelValue | kArcOLabelValue);
994  }
995  if (flags_ & lazy_ & kArcNextStateValue) {
996  if (eps) {
997  arc_.nextstate =
998  impl_->context_index_.Rank1(impl_->context_index_.Select1(
999  impl_->context_index_.Rank0(inst_.node_) - 1));
1000  } else {
1001  if (lazy_ & kArcNextStateValue) {
1002  impl_->SetInstContext(&inst_); // first time only.
1003  }
1004  arc_.nextstate = impl_->Transition(
1005  inst_.context_, impl_->future_words_[inst_.offset_ + state]);
1006  }
1007  lazy_ &= ~kArcNextStateValue;
1008  }
1009  if (flags_ & lazy_ & kArcWeightValue) {
1010  arc_.weight = eps ? impl_->backoff_[inst_.state_]
1011  : impl_->future_probs_[inst_.offset_ + state];
1012  lazy_ &= ~kArcWeightValue;
1013  }
1014  return arc_;
1015  }
1016 
1017  void Next() final {
1018  ++i_;
1019  lazy_ = ~0;
1020  }
1021 
1022  size_t Position() const final { return i_; }
1023 
1024  void Reset() final {
1025  i_ = 0;
1026  lazy_ = ~0;
1027  }
1028 
1029  void Seek(size_t a) final {
1030  if (i_ != a) {
1031  i_ = a;
1032  lazy_ = ~0;
1033  }
1034  }
1035 
1036  uint8_t Flags() const final { return flags_; }
1037 
1038  void SetFlags(uint8_t flags, uint8_t mask) final {
1039  flags_ &= ~mask;
1040  flags_ |= (flags & kArcValueFlags);
1041  }
1042 
1043  private:
1044  mutable Arc arc_;
1045  mutable uint8_t lazy_;
1046  const internal::NGramFstImpl<A> *impl_; // Borrowed reference.
1047  mutable NGramFstInst<A> inst_;
1048 
1049  size_t i_;
1050  uint8_t flags_;
1051 };
1052 
1053 } // namespace fst
1054 #endif // FST_EXTENSIONS_NGRAM_NGRAM_FST_H_
constexpr uint64_t kCyclic
Definition: properties.h:109
internal::NGramFstImpl< A > Impl
Definition: ngram-fst.h:359
void GetStates(const std::vector< Label > &context, std::vector< StateId > *state) const
Definition: ngram-fst.h:392
constexpr uint64_t kNotString
Definition: properties.h:139
void AddArc(StateId s, const Arc &arc) override
Definition: mutable-fst.h:331
NGramFst< A > * Copy(bool safe=false) const override
Definition: ngram-fst.h:401
void SetStart(StateId s) override
Definition: mutable-fst.h:303
constexpr uint8_t kArcValueFlags
Definition: fst.h:453
size_t num_futures_
Definition: ngram-fst.h:69
void Next() final
Definition: ngram-fst.h:916
constexpr int kNoLabel
Definition: fst.h:195
size_t NumInputEpsilons(StateId state) const
Definition: ngram-fst.h:175
virtual uint64_t Properties(uint64_t mask, bool test) const =0
constexpr uint64_t kOEpsilons
Definition: properties.h:89
const Arc & Value() const final
Definition: ngram-fst.h:914
void SetFinal(StateId s, Weight weight=Weight::One()) override
Definition: mutable-fst.h:308
virtual size_t NumArcs(StateId) const =0
static size_t Storage(uint64_t num_states, uint64_t num_futures, uint64_t num_final)
Definition: ngram-fst.h:192
constexpr uint64_t kCoAccessible
Definition: properties.h:129
void SetInstContext(NGramFstInst< A > *inst) const
Definition: ngram-fst.h:228
uint8_t Flags() const final
Definition: ngram-fst.h:1036
static void Set(uint64_t *bits, size_t index)
Definition: bitmap-index.h:116
constexpr uint64_t kNotTopSorted
Definition: properties.h:121
MatchType
Definition: fst.h:187
StateId state_
Definition: ngram-fst.h:68
size_t Position() const final
Definition: ngram-fst.h:1022
constexpr uint64_t kError
Definition: properties.h:52
size_t StorageSize() const
Definition: ngram-fst.h:443
constexpr uint64_t kInitialAcyclic
Definition: properties.h:116
static NGramFstImpl< A > * Read(std::istream &strm, const FstReadOptions &opts)
Definition: ngram-fst.h:120
#define LOG(type)
Definition: log.h:53
StateId Start() const override
Definition: impl-to-fst.h:47
virtual Weight Final(StateId) const =0
SetType
Definition: set-weight.h:59
StateId node_state_
Definition: ngram-fst.h:72
uint64_t Properties(uint64_t props) const override
Definition: ngram-fst.h:870
constexpr uint8_t kArcILabelValue
Definition: fst.h:444
constexpr uint64_t kEpsilons
Definition: properties.h:79
constexpr uint64_t kODeterministic
Definition: properties.h:74
const Arc & Value() const final
Definition: ngram-fst.h:986
constexpr int kNoStateId
Definition: fst.h:196
NGramFstMatcher< A > * Copy(bool safe=false) const override
Definition: ngram-fst.h:862
const Arc & Value() const
Definition: fst.h:536
NGramFstMatcher(const NGramFstMatcher< A > &matcher, bool safe=false)
Definition: ngram-fst.h:850
static MappedFile * Allocate(size_t size, size_t align=kArchAlignment)
Definition: mapped-file.cc:197
void InitArcIterator(StateId s, ArcIteratorData< A > *data) const override
Definition: ngram-fst.h:489
NGramFst(const Fst< A > &fst, std::vector< StateId > *order_out)
Definition: ngram-fst.h:364
const Fst< A > & GetFst() const override
Definition: ngram-fst.h:868
virtual size_t NumInputEpsilons(StateId) const =0
#define FSTERROR()
Definition: util.h:56
StateId AddState() override
Definition: mutable-fst.h:321
NGramFst(const Fst< A > &dst)
Definition: ngram-fst.h:361
size_t NumArcs(StateId state, NGramFstInst< A > *inst=nullptr) const
Definition: ngram-fst.h:165
const std::vector< Label > & GetContext(StateId s, NGramFstInst< A > *inst) const
Definition: ngram-fst.h:252
void SetOutputSymbols(const SymbolTable *osyms) override
Definition: mutable-fst.h:401
static size_t StorageSize(size_t num_bits)
Definition: bitmap-index.h:94
NGramFst(const NGramFst< A > &fst, bool safe=false)
Definition: ngram-fst.h:369
A::Weight Weight
Definition: ngram-fst.h:67
ssize_t NumInputEpsilons(const ExpandedFst< Arc > &fst, typename Arc::StateId s)
Definition: expanded-fst.h:118
const char * GetData(size_t *data_size) const
Definition: ngram-fst.h:382
A::StateId StateId
Definition: ngram-fst.h:66
void ArcSort(MutableFst< Arc > *fst, Compare comp)
Definition: arcsort.h:109
constexpr uint64_t kOLabelSorted
Definition: properties.h:99
void SetInstNode(NGramFstInst< A > *inst) const
Definition: ngram-fst.h:221
void SetFlags(uint8_t flags, uint8_t mask) final
Definition: ngram-fst.h:1038
static NGramFst< A > * Read(std::istream &strm, const FstReadOptions &opts)
Definition: ngram-fst.h:405
void SetInputSymbols(const SymbolTable *isyms) override
Definition: mutable-fst.h:396
std::vector< Label > context_
Definition: ngram-fst.h:73
MatcherBase< A > * InitMatcher(MatchType match_type) const override
Definition: ngram-fst.h:439
bool Write(const std::string &source) const override
Definition: ngram-fst.h:428
StateId Value() const final
Definition: ngram-fst.h:951
StateId NumStates() const override
Definition: expanded-fst.h:144
NGramFstMatcher(const NGramFst< A > &fst, MatchType match_type)
Definition: ngram-fst.h:825
StateId nstates
Definition: fst.h:384
constexpr uint64_t kAccessible
Definition: properties.h:124
constexpr uint8_t kArcOLabelValue
Definition: fst.h:446
const char * GetData(size_t *data_size) const
Definition: ngram-fst.h:244
MatchType Type(bool test) const override
Definition: ngram-fst.h:866
size_t NumArcs(StateId s) const override
Definition: ngram-fst.h:397
void SetInstFuture(StateId state, NGramFstInst< A > *inst) const
Definition: ngram-fst.h:212
virtual StateId Start() const =0
bool Done() const
Definition: fst.h:532
std::unique_ptr< StateIteratorBase< Arc > > base
Definition: fst.h:382
A::StateId StateId
Definition: ngram-fst.h:821
constexpr uint64_t kIDeterministic
Definition: properties.h:69
void SetState(StateId s) final
Definition: ngram-fst.h:872
NGramFstImpl(const Fst< A > &fst)
Definition: ngram-fst.h:113
std::unique_ptr< ArcIteratorBase< Arc > > base
Definition: fst.h:494
const std::vector< Label > GetContext(StateId s) const
Definition: ngram-fst.h:386
constexpr uint64_t kIEpsilons
Definition: properties.h:84
void Seek(size_t a) final
Definition: ngram-fst.h:1029
bool WriteFile(const std::string &source) const
Definition: fst.h:332
StateId NumStates() const
Definition: ngram-fst.h:185
A::Label Label
Definition: ngram-fst.h:357
constexpr uint8_t kArcWeightValue
Definition: fst.h:448
size_t StorageSize() const
Definition: ngram-fst.h:258
constexpr uint64_t kILabelSorted
Definition: properties.h:94
void SetNumStates(int64_t numstates)
Definition: fst.h:164
bool Write(std::ostream &strm, const FstWriteOptions &opts) const override
Definition: ngram-fst.h:424
StateId context_state_
Definition: ngram-fst.h:74
virtual const SymbolTable * InputSymbols() const =0
Arc::StateId CountStates(const Fst< Arc > &fst)
Definition: expanded-fst.h:179
constexpr uint8_t kArcNextStateValue
Definition: fst.h:450
ssize_t Priority(StateId s) final
Definition: ngram-fst.h:924
ArcIterator(const NGramFst< A > &fst, StateId state)
Definition: ngram-fst.h:974
Weight Final(StateId s) const override
Definition: impl-to-fst.h:49
size_t NumOutputEpsilons(StateId state) const
Definition: ngram-fst.h:181
NGramFstImpl(const NGramFstImpl &other)
Definition: ngram-fst.h:115
void SetStart(int64_t start)
Definition: fst.h:162
bool Done() const final
Definition: ngram-fst.h:912
StateId Start() const
Definition: ngram-fst.h:155
NGramFstMatcher(const NGramFst< A > *fst, MatchType match_type)
Definition: ngram-fst.h:838
A::Label Label
Definition: ngram-fst.h:65
NGramFst(const char *data)
Definition: ngram-fst.h:376
A::Weight Weight
Definition: ngram-fst.h:358
void InitStateIterator(StateIteratorData< A > *data) const override
Definition: ngram-fst.h:432
Weight Final(StateId state) const
Definition: ngram-fst.h:157
void InitStateIterator(StateIteratorData< A > *data) const
Definition: ngram-fst.h:187
bool Write(std::ostream &strm, const FstWriteOptions &opts) const
Definition: ngram-fst.h:146
#define DCHECK(x)
Definition: log.h:74
A::StateId StateId
Definition: ngram-fst.h:356
constexpr uint64_t kWeighted
Definition: properties.h:104
bool Find(Label label) final
Definition: ngram-fst.h:877
constexpr uint64_t kExpanded
Definition: properties.h:46
static bool HasRequiredStructure(const Fst< A > &fst)
Definition: ngram-fst.h:451
StateIterator(const NGramFst< A > &fst)
Definition: ngram-fst.h:946
static NGramFst< A > * Read(const std::string &source)
Definition: ngram-fst.h:410
static bool HasRequiredProps(const Fst< A > &fst)
Definition: ngram-fst.h:445
uint64_t Properties(uint64_t mask, bool test) const override
Definition: impl-to-fst.h:63
constexpr uint64_t kAcceptor
Definition: properties.h:64
virtual const SymbolTable * OutputSymbols() const =0
void Next()
Definition: fst.h:540