FST  openfst-1.7.2
OpenFst Library
bi-table.h
Go to the documentation of this file.
1 // See www.openfst.org for extensive documentation on this weighted
2 // finite-state transducer library.
3 //
4 // Classes for representing a bijective mapping between an arbitrary entry
5 // of type T and a signed integral ID.
6 
7 #ifndef FST_BI_TABLE_H_
8 #define FST_BI_TABLE_H_
9 
10 #include <deque>
11 #include <functional>
12 #include <memory>
13 #include <type_traits>
14 #include <unordered_map>
15 #include <unordered_set>
16 #include <vector>
17 
18 #include <fst/log.h>
19 #include <fst/memory.h>
20 #include <unordered_set>
21 
22 namespace fst {
23 
24 // Bitables model bijective mappings between entries of an arbitrary type T and
25 // an signed integral ID of type I. The IDs are allocated starting from 0 in
26 // order.
27 //
28 // template <class I, class T>
29 // class BiTable {
30 // public:
31 //
32 // // Required constructors.
33 // BiTable();
34 //
35 // // Looks up integer ID from entry. If it doesn't exist and insert
36 // / is true, adds it; otherwise, returns -1.
37 // I FindId(const T &entry, bool insert = true);
38 //
39 // // Looks up entry from integer ID.
40 // const T &FindEntry(I) const;
41 //
42 // // Returns number of stored entries.
43 // I Size() const;
44 // };
45 
46 // An implementation using a hash map for the entry to ID mapping. H is the
47 // hash function and E is the equality function. If passed to the constructor,
48 // ownership is given to this class.
49 template <class I, class T, class H, class E = std::equal_to<T>>
50 class HashBiTable {
51  public:
52  // Reserves space for table_size elements. If passing H and E to the
53  // constructor, this class owns them.
54  explicit HashBiTable(size_t table_size = 0, H *h = nullptr, E *e = nullptr) :
55  hash_func_(h ? h : new H()), hash_equal_(e ? e : new E()),
56  entry2id_(table_size, *hash_func_, *hash_equal_) {
57  if (table_size) id2entry_.reserve(table_size);
58  }
59 
61  : hash_func_(new H(*table.hash_func_)),
62  hash_equal_(new E(*table.hash_equal_)),
63  entry2id_(table.entry2id_.begin(), table.entry2id_.end(),
64  table.entry2id_.size(), *hash_func_, *hash_equal_),
65  id2entry_(table.id2entry_) {}
66 
67  I FindId(const T &entry, bool insert = true) {
68  if (!insert) {
69  const auto it = entry2id_.find(entry);
70  return it == entry2id_.end() ? -1 : it->second - 1;
71  }
72  I &id_ref = entry2id_[entry];
73  if (id_ref == 0) { // T not found; stores and assigns a new ID.
74  id2entry_.push_back(entry);
75  id_ref = id2entry_.size();
76  }
77  return id_ref - 1; // NB: id_ref = ID + 1.
78  }
79 
80  const T &FindEntry(I s) const { return id2entry_[s]; }
81 
82  I Size() const { return id2entry_.size(); }
83 
84  // TODO(riley): Add fancy clear-to-size, as in CompactHashBiTable.
85  void Clear() {
86  entry2id_.clear();
87  id2entry_.clear();
88  }
89 
90  private:
91  std::unique_ptr<H> hash_func_;
92  std::unique_ptr<E> hash_equal_;
93  std::unordered_map<T, I, H, E> entry2id_;
94  std::vector<T> id2entry_;
95 };
96 
97 // Enables alternative hash set representations below.
98 enum HSType { HS_STL = 0, HS_DENSE = 1, HS_SPARSE = 2, HS_FLAT = 3 };
99 
100 // Default hash set is STL hash_set.
101 template <class K, class H, class E, HSType HS>
102 struct HashSet : public std::unordered_set<K, H, E, PoolAllocator<K>> {
103  explicit HashSet(size_t n = 0, const H &h = H(), const E &e = E())
104  : std::unordered_set<K, H, E, PoolAllocator<K>>(n, h, e) {}
105 
106  void rehash(size_t n) {}
107 };
108 
109 // An implementation using a hash set for the entry to ID mapping. The hash set
110 // holds keys which are either the ID or kCurrentKey. These keys can be mapped
111 // to entries either by looking up in the entry vector or, if kCurrentKey, in
112 // current_entry_. The hash and key equality functions map to entries first. H
113 // is the hash function and E is the equality function. If passed to the
114 // constructor, ownership is given to this class.
115 // TODO(rybach): remove support for (deprecated and unused) HS_DENSE, HS_SPARSE.
116 template <class I, class T, class H, class E = std::equal_to<T>,
117  HSType HS = HS_FLAT>
119  public:
120  friend class HashFunc;
121  friend class HashEqual;
122 
123  // Reserves space for table_size elements. If passing H and E to the
124  // constructor, this class owns them.
125  explicit CompactHashBiTable(size_t table_size = 0, H *h = nullptr,
126  E *e = nullptr) :
127  hash_func_(h ? h : new H()), hash_equal_(e ? e : new E()),
128  compact_hash_func_(*this), compact_hash_equal_(*this),
129  keys_(table_size, compact_hash_func_, compact_hash_equal_) {
130  if (table_size) id2entry_.reserve(table_size);
131  }
132 
134  : hash_func_(new H(*table.hash_func_)),
135  hash_equal_(new E(*table.hash_equal_)),
136  compact_hash_func_(*this), compact_hash_equal_(*this),
137  keys_(table.keys_.size(), compact_hash_func_, compact_hash_equal_),
138  id2entry_(table.id2entry_) {
139  keys_.insert(table.keys_.begin(), table.keys_.end());
140  }
141 
142  I FindId(const T &entry, bool insert = true) {
143  current_entry_ = &entry;
144  if (insert) {
145  auto result = keys_.insert(kCurrentKey);
146  if (!result.second) return *result.first; // Already exists.
147  // Overwrites kCurrentKey with a new key value; this is safe because it
148  // doesn't affect hashing or equality testing.
149  I key = id2entry_.size();
150  const_cast<I &>(*result.first) = key;
151  id2entry_.push_back(entry);
152  return key;
153  }
154  const auto it = keys_.find(kCurrentKey);
155  return it == keys_.end() ? -1 : *it;
156  }
157 
158  const T &FindEntry(I s) const { return id2entry_[s]; }
159 
160  I Size() const { return id2entry_.size(); }
161 
162  // Clears content; with argument, erases last n IDs.
163  void Clear(ssize_t n = -1) {
164  if (n < 0 || n >= id2entry_.size()) { // Clears completely.
165  keys_.clear();
166  id2entry_.clear();
167  } else if (n == id2entry_.size() - 1) { // Leaves only key 0.
168  const T entry = FindEntry(0);
169  keys_.clear();
170  id2entry_.clear();
171  FindId(entry, true);
172  } else {
173  while (n-- > 0) {
174  I key = id2entry_.size() - 1;
175  keys_.erase(key);
176  id2entry_.pop_back();
177  }
178  keys_.rehash(0);
179  }
180  }
181 
182  private:
183  static_assert(std::is_signed<I>::value, "I must be a signed type");
184  // ... otherwise >= kCurrentKey comparisons as used below don't work.
185  // TODO(rybach): (1) remove kEmptyKey, kDeletedKey, (2) don't use >= for key
186  // comparison, (3) allow unsigned key types.
187  static constexpr I kCurrentKey = -1;
188  static constexpr I kEmptyKey = -2;
189  static constexpr I kDeletedKey = -3;
190 
191  class HashFunc {
192  public:
193  explicit HashFunc(const CompactHashBiTable &ht) : ht_(&ht) {}
194 
195  size_t operator()(I k) const {
196  if (k >= kCurrentKey) {
197  return (*ht_->hash_func_)(ht_->Key2Entry(k));
198  } else {
199  return 0;
200  }
201  }
202 
203  private:
204  const CompactHashBiTable *ht_;
205  };
206 
207  class HashEqual {
208  public:
209  explicit HashEqual(const CompactHashBiTable &ht) : ht_(&ht) {}
210 
211  bool operator()(I k1, I k2) const {
212  if (k1 == k2) {
213  return true;
214  } else if (k1 >= kCurrentKey && k2 >= kCurrentKey) {
215  return (*ht_->hash_equal_)(ht_->Key2Entry(k1), ht_->Key2Entry(k2));
216  } else {
217  return false;
218  }
219  }
220 
221  private:
222  const CompactHashBiTable *ht_;
223  };
224 
226 
227  const T &Key2Entry(I k) const {
228  if (k == kCurrentKey) {
229  return *current_entry_;
230  } else {
231  return id2entry_[k];
232  }
233  }
234 
235  std::unique_ptr<H> hash_func_;
236  std::unique_ptr<E> hash_equal_;
237  HashFunc compact_hash_func_;
238  HashEqual compact_hash_equal_;
239  KeyHashSet keys_;
240  std::vector<T> id2entry_;
241  const T *current_entry_;
242 };
243 
244 template <class I, class T, class H, class E, HSType HS>
246 
247 template <class I, class T, class H, class E, HSType HS>
249 
250 template <class I, class T, class H, class E, HSType HS>
252 
253 // An implementation using a vector for the entry to ID mapping. It is passed a
254 // function object FP that should fingerprint entries uniquely to an integer
255 // that can used as a vector index. Normally, VectorBiTable constructs the FP
256 // object. The user can instead pass in this object; in that case, VectorBiTable
257 // takes its ownership.
258 template <class I, class T, class FP>
260  public:
261  // Reserves table_size cells of space. If passing FP argument to the
262  // constructor, this class owns it.
263  explicit VectorBiTable(FP *fp = nullptr, size_t table_size = 0) :
264  fp_(fp ? fp : new FP()) {
265  if (table_size) id2entry_.reserve(table_size);
266  }
267 
269  : fp_(new FP(*table.fp_)), fp2id_(table.fp2id_),
270  id2entry_(table.id2entry_) {}
271 
272  I FindId(const T &entry, bool insert = true) {
273  ssize_t fp = (*fp_)(entry);
274  if (fp >= fp2id_.size()) fp2id_.resize(fp + 1);
275  I &id_ref = fp2id_[fp];
276  if (id_ref == 0) { // T not found.
277  if (insert) { // Stores and assigns a new ID.
278  id2entry_.push_back(entry);
279  id_ref = id2entry_.size();
280  } else {
281  return -1;
282  }
283  }
284  return id_ref - 1; // NB: id_ref = ID + 1.
285  }
286 
287  const T &FindEntry(I s) const { return id2entry_[s]; }
288 
289  I Size() const { return id2entry_.size(); }
290 
291  const FP &Fingerprint() const { return *fp_; }
292 
293  private:
294  std::unique_ptr<FP> fp_;
295  std::vector<I> fp2id_;
296  std::vector<T> id2entry_;
297 };
298 
299 // An implementation using a vector and a compact hash table. The selecting
300 // functor S returns true for entries to be hashed in the vector. The
301 // fingerprinting functor FP returns a unique fingerprint for each entry to be
302 // hashed in the vector (these need to be suitable for indexing in a vector).
303 // The hash functor H is used when hashing entry into the compact hash table.
304 // If passed to the constructor, ownership is given to this class.
305 template <class I, class T, class S, class FP, class H, HSType HS = HS_DENSE>
307  public:
308  friend class HashFunc;
309  friend class HashEqual;
310 
311  explicit VectorHashBiTable(S *s, FP *fp, H *h, size_t vector_size = 0,
312  size_t entry_size = 0)
313  : selector_(s), fp_(fp), h_(h), hash_func_(*this), hash_equal_(*this),
314  keys_(0, hash_func_, hash_equal_) {
315  if (vector_size) fp2id_.reserve(vector_size);
316  if (entry_size) id2entry_.reserve(entry_size);
317  }
318 
320  : selector_(new S(table.s_)), fp_(new FP(*table.fp_)),
321  h_(new H(*table.h_)), id2entry_(table.id2entry_),
322  fp2id_(table.fp2id_), hash_func_(*this), hash_equal_(*this),
323  keys_(table.keys_.size(), hash_func_, hash_equal_) {
324  keys_.insert(table.keys_.begin(), table.keys_.end());
325  }
326 
327  I FindId(const T &entry, bool insert = true) {
328  if ((*selector_)(entry)) { // Uses the vector if selector_(entry) == true.
329  uint64 fp = (*fp_)(entry);
330  if (fp2id_.size() <= fp) fp2id_.resize(fp + 1, 0);
331  if (fp2id_[fp] == 0) { // T not found.
332  if (insert) { // Stores and assigns a new ID.
333  id2entry_.push_back(entry);
334  fp2id_[fp] = id2entry_.size();
335  } else {
336  return -1;
337  }
338  }
339  return fp2id_[fp] - 1; // NB: assoc_value = ID + 1.
340  } else { // Uses the hash table otherwise.
341  current_entry_ = &entry;
342  const auto it = keys_.find(kCurrentKey);
343  if (it == keys_.end()) {
344  if (insert) {
345  I key = id2entry_.size();
346  id2entry_.push_back(entry);
347  keys_.insert(key);
348  return key;
349  } else {
350  return -1;
351  }
352  } else {
353  return *it;
354  }
355  }
356  }
357 
358  const T &FindEntry(I s) const { return id2entry_[s]; }
359 
360  I Size() const { return id2entry_.size(); }
361 
362  const S &Selector() const { return *selector_; }
363 
364  const FP &Fingerprint() const { return *fp_; }
365 
366  const H &Hash() const { return *h_; }
367 
368  private:
369  static constexpr I kCurrentKey = -1;
370  static constexpr I kEmptyKey = -2;
371 
372  class HashFunc {
373  public:
374  explicit HashFunc(const VectorHashBiTable &ht) : ht_(&ht) {}
375 
376  size_t operator()(I k) const {
377  if (k >= kCurrentKey) {
378  return (*(ht_->h_))(ht_->Key2Entry(k));
379  } else {
380  return 0;
381  }
382  }
383 
384  private:
385  const VectorHashBiTable *ht_;
386  };
387 
388  class HashEqual {
389  public:
390  explicit HashEqual(const VectorHashBiTable &ht) : ht_(&ht) {}
391 
392  bool operator()(I k1, I k2) const {
393  if (k1 >= kCurrentKey && k2 >= kCurrentKey) {
394  return ht_->Key2Entry(k1) == ht_->Key2Entry(k2);
395  } else {
396  return k1 == k2;
397  }
398  }
399 
400  private:
401  const VectorHashBiTable *ht_;
402  };
403 
405 
406  const T &Key2Entry(I k) const {
407  if (k == kCurrentKey) {
408  return *current_entry_;
409  } else {
410  return id2entry_[k];
411  }
412  }
413 
414  std::unique_ptr<S> selector_; // True if entry hashed into vector.
415  std::unique_ptr<FP> fp_; // Fingerprint used for hashing into vector.
416  std::unique_ptr<H> h_; // Hash funcion used for hashing into hash_set.
417 
418  std::vector<T> id2entry_; // Maps state IDs to entry.
419  std::vector<I> fp2id_; // Maps entry fingerprints to IDs.
420 
421  // Compact implementation of the hash table mapping entries to state IDs
422  // using the hash function h_.
423  HashFunc hash_func_;
424  HashEqual hash_equal_;
425  KeyHashSet keys_;
426  const T *current_entry_;
427 };
428 
429 template <class I, class T, class S, class FP, class H, HSType HS>
431 
432 template <class I, class T, class S, class FP, class H, HSType HS>
434 
435 // An implementation using a hash map for the entry to ID mapping. This version
436 // permits erasing of arbitrary states. The entry T must have == defined and
437 // its default constructor must produce a entry that will never be seen. F is
438 // the hash function.
439 template <class I, class T, class F>
441  public:
442  ErasableBiTable() : first_(0) {}
443 
444  I FindId(const T &entry, bool insert = true) {
445  I &id_ref = entry2id_[entry];
446  if (id_ref == 0) { // T not found.
447  if (insert) { // Stores and assigns a new ID.
448  id2entry_.push_back(entry);
449  id_ref = id2entry_.size() + first_;
450  } else {
451  return -1;
452  }
453  }
454  return id_ref - 1; // NB: id_ref = ID + 1.
455  }
456 
457  const T &FindEntry(I s) const { return id2entry_[s - first_]; }
458 
459  I Size() const { return id2entry_.size(); }
460 
461  void Erase(I s) {
462  auto &ref = id2entry_[s - first_];
463  entry2id_.erase(ref);
464  ref = empty_entry_;
465  while (!id2entry_.empty() && id2entry_.front() == empty_entry_) {
466  id2entry_.pop_front();
467  ++first_;
468  }
469  }
470 
471  private:
472  std::unordered_map<T, I, F> entry2id_;
473  std::deque<T> id2entry_;
474  const T empty_entry_;
475  I first_; // I of first element in the deque.
476 };
477 
478 } // namespace fst
479 
480 #endif // FST_BI_TABLE_H_
VectorHashBiTable(const VectorHashBiTable< I, T, S, FP, H, HS > &table)
Definition: bi-table.h:319
uint64_t uint64
Definition: types.h:32
I FindId(const T &entry, bool insert=true)
Definition: bi-table.h:272
HashSet(size_t n=0, const H &h=H(), const E &e=E())
Definition: bi-table.h:103
const T & FindEntry(I s) const
Definition: bi-table.h:158
const T & FindEntry(I s) const
Definition: bi-table.h:80
const S & Selector() const
Definition: bi-table.h:362
I FindId(const T &entry, bool insert=true)
Definition: bi-table.h:327
CompactHashBiTable(size_t table_size=0, H *h=nullptr, E *e=nullptr)
Definition: bi-table.h:125
const FP & Fingerprint() const
Definition: bi-table.h:291
const T & FindEntry(I s) const
Definition: bi-table.h:457
I Size() const
Definition: bi-table.h:82
HashBiTable(const HashBiTable< I, T, H, E > &table)
Definition: bi-table.h:60
I Size() const
Definition: bi-table.h:459
const T & FindEntry(I s) const
Definition: bi-table.h:287
void Clear()
Definition: bi-table.h:85
CompactHashBiTable(const CompactHashBiTable< I, T, H, E, HS > &table)
Definition: bi-table.h:133
VectorHashBiTable(S *s, FP *fp, H *h, size_t vector_size=0, size_t entry_size=0)
Definition: bi-table.h:311
I Size() const
Definition: bi-table.h:289
const FP & Fingerprint() const
Definition: bi-table.h:364
I FindId(const T &entry, bool insert=true)
Definition: bi-table.h:444
HSType
Definition: bi-table.h:98
VectorBiTable(const VectorBiTable< I, T, FP > &table)
Definition: bi-table.h:268
const H & Hash() const
Definition: bi-table.h:366
I FindId(const T &entry, bool insert=true)
Definition: bi-table.h:67
const T & FindEntry(I s) const
Definition: bi-table.h:358
VectorBiTable(FP *fp=nullptr, size_t table_size=0)
Definition: bi-table.h:263
HashBiTable(size_t table_size=0, H *h=nullptr, E *e=nullptr)
Definition: bi-table.h:54
void Clear(ssize_t n=-1)
Definition: bi-table.h:163
I FindId(const T &entry, bool insert=true)
Definition: bi-table.h:142
void rehash(size_t n)
Definition: bi-table.h:106