20 #ifndef FST_SYMBOL_TABLE_H_ 21 #define FST_SYMBOL_TABLE_H_ 23 #include <sys/types.h> 36 #include <type_traits> 47 #include <string_view> 70 std::pair<int64_t, bool>
InsertOrFind(std::string_view key);
72 int64_t
Find(std::string_view key)
const;
74 size_t Size()
const {
return symbols_.size(); }
76 const std::string &
GetSymbol(
size_t idx)
const {
return symbols_[idx]; }
83 static constexpr int64_t kEmptyBucket = -1;
86 void Rehash(
size_t num_buckets);
88 size_t GetHash(std::string_view key)
const {
89 return str_hash_(key) & hash_mask_;
92 const std::hash<std::string_view> str_hash_;
93 std::vector<std::string> symbols_;
94 std::vector<int64_t> buckets_;
110 virtual std::unique_ptr<SymbolTableImplBase> Copy()
const = 0;
112 virtual bool Write(std::ostream &strm)
const = 0;
114 virtual int64_t AddSymbol(std::string_view symbol, int64_t key) = 0;
116 virtual int64_t AddSymbol(std::string_view symbol) = 0;
124 virtual std::string
Find(int64_t key)
const = 0;
127 virtual int64_t
Find(std::string_view symbol)
const = 0;
129 virtual bool Member(int64_t key)
const {
return !
Find(key).empty(); }
131 virtual bool Member(std::string_view symbol)
const {
135 virtual void AddTable(
const SymbolTable &table) = 0;
137 virtual int64_t GetNthKey(ssize_t pos)
const = 0;
139 virtual const std::string &Name()
const = 0;
141 virtual void SetName(std::string_view new_name) = 0;
143 virtual const std::string &CheckSum()
const = 0;
145 virtual const std::string &LabeledCheckSum()
const = 0;
147 virtual int64_t AvailableKey()
const = 0;
149 virtual size_t NumSymbols()
const = 0;
151 virtual bool IsMutable()
const = 0;
164 std::unique_ptr<SymbolTableImplBase> Copy()
const final;
166 int64_t AddSymbol(std::string_view symbol, int64_t key)
final;
168 int64_t AddSymbol(std::string_view symbol)
final;
172 void SetName(std::string_view new_name)
final;
186 check_sum_finalized_(false) {}
190 available_key_(impl.available_key_),
191 dense_key_limit_(impl.dense_key_limit_),
192 symbols_(impl.symbols_),
193 idx_key_(impl.idx_key_),
194 key_map_(impl.key_map_),
195 check_sum_finalized_(false) {}
197 std::unique_ptr<SymbolTableImplBase>
Copy()
const override {
198 return std::make_unique<SymbolTableImpl>(*this);
201 int64_t AddSymbol(std::string_view symbol, int64_t key)
override;
204 return AddSymbol(symbol, available_key_);
213 std::istream &strm, std::string_view name,
218 const std::string &sep = FST_FLAGS_fst_field_separator);
222 std::string_view source);
224 bool Write(std::ostream &strm)
const override;
228 std::string
Find(int64_t key)
const override;
232 int64_t
Find(std::string_view symbol)
const override {
233 int64_t idx = symbols_.Find(symbol);
234 if (idx == kNoSymbol || idx < dense_key_limit_)
return idx;
235 return idx_key_[idx - dense_key_limit_];
239 if (pos < 0 || static_cast<size_t>(pos) >= symbols_.Size()) {
241 }
else if (pos < dense_key_limit_) {
244 return Find(symbols_.GetSymbol(pos));
247 const std::string &
Name()
const override {
return name_; }
249 void SetName(std::string_view new_name)
override {
250 name_ = std::string(new_name);
254 MaybeRecomputeCheckSum();
255 return check_sum_string_;
259 MaybeRecomputeCheckSum();
260 return labeled_check_sum_string_;
265 size_t NumSymbols()
const override {
return symbols_.Size(); }
274 void MaybeRecomputeCheckSum()
const;
277 int64_t available_key_;
278 int64_t dense_key_limit_;
283 std::vector<int64_t> idx_key_;
286 std::map<int64_t, int64_t> key_map_;
288 mutable bool check_sum_finalized_;
289 mutable std::string check_sum_string_;
290 mutable std::string labeled_check_sum_string_;
291 mutable Mutex check_sum_mutex_;
317 int64_t
Label()
const {
return key_; }
321 std::string
Symbol()
const {
return table_->Find(key_); }
325 : table_(&table), key_(table.
GetNthKey(pos)) {}
328 void SetPosition(ssize_t pos) { key_ = table_->GetNthKey(pos); }
342 if (static_cast<size_t>(pos_) < nsymbols_) iter_item_.SetPosition(pos_);
362 : pos_(pos), nsymbols_(table.
NumSymbols()), iter_item_(table, pos) {}
375 : impl_(std::make_shared<internal::SymbolTableImpl>(name)) {}
382 std::istream &strm, std::string_view name,
383 const std::string &sep = FST_FLAGS_fst_field_separator) {
386 return impl ?
new SymbolTable(std::move(impl)) :
nullptr;
391 const std::string &source,
392 const std::string &sep = FST_FLAGS_fst_field_separator);
397 return impl ?
new SymbolTable(std::move(impl)) :
nullptr;
402 std::ifstream strm(source, std::ios_base::in | std::ios_base::binary);
404 LOG(ERROR) <<
"SymbolTable::Read: Can't open file: " << source;
407 return Read(strm, source);
420 impl_->AddTable(table);
425 int64_t
AddSymbol(std::string_view symbol, int64_t key) {
427 return impl_->AddSymbol(symbol, key);
434 return impl_->AddSymbol(symbol);
444 const std::
string &CheckSum()
const {
return impl_->CheckSum(); }
446 int64_t
GetNthKey(ssize_t pos)
const {
return impl_->GetNthKey(pos); }
450 std::string
Find(int64_t key)
const {
return impl_->Find(key); }
454 int64_t
Find(std::string_view symbol)
const {
return impl_->Find(symbol); }
458 return impl_->LabeledCheckSum();
461 bool Member(int64_t key)
const {
return impl_->Member(key); }
463 bool Member(std::string_view symbol)
const {
return impl_->Member(symbol); }
466 const std::string &
Name()
const {
return impl_->Name(); }
474 return impl_->RemoveSymbol(key);
480 impl_->SetName(new_name);
483 bool Write(std::ostream &strm)
const {
return impl_->Write(strm); }
485 bool Write(
const std::string &source)
const;
494 const std::string &sep = FST_FLAGS_fst_field_separator)
const;
497 bool WriteText(
const std::string &sink,
498 const std::string &sep = FST_FLAGS_fst_field_separator)
const;
509 explicit SymbolTable(std::shared_ptr<internal::SymbolTableImplBase> impl)
510 : impl_(std::move(impl)) {}
512 template <
class T =
internal::SymbolTableImplBase>
514 return down_cast<
const T *>(impl_.get());
517 template <
class T =
internal::SymbolTableImplBase>
525 if (impl_.unique() || !impl_->IsMutable())
return;
526 std::unique_ptr<internal::SymbolTableImplBase> copy = impl_->Copy();
527 CHECK(copy !=
nullptr);
528 impl_ = std::move(copy);
531 std::shared_ptr<internal::SymbolTableImplBase> impl_;
536 "Use
SymbolTable::iterator, a C++ compliant iterator, instead")
537 SymbolTableIterator {
539 explicit SymbolTableIterator(
const SymbolTable &table)
540 : table_(table), iter_(table.
begin()), end_(table.
end()) {}
542 ~SymbolTableIterator() =
default;
545 bool Done()
const {
return (iter_ == end_); }
548 int64_t Value()
const {
return iter_->Label(); }
551 std::string Symbol()
const {
return iter_->Symbol(); }
554 void Next() { ++iter_; }
557 void Reset() { iter_ = table_.begin(); }
571 template <
class Label>
574 const std::vector<std::pair<Label, Label>> &pairs) {
575 auto new_table = std::make_unique<SymbolTable>(
576 table->
Name().empty() ? std::string()
577 : (std::string(
"relabeled_") + table->
Name()));
578 for (
const auto &[old_label, new_label] : pairs) {
579 new_table->AddSymbol(table->
Find(old_label), new_label);
581 return new_table.release();
587 bool warning =
true);
597 #endif // FST_SYMBOL_TABLE_H_ const std::string & Name() const
bool Write(std::ostream &strm) const
int64_t AddSymbol(std::string_view symbol, int64_t key)
void SymbolTableToString(const SymbolTable *table, std::string *result)
class OPENFST_DEPRECATED("allow_negative is no-op") StringCompiler
void SetName(std::string_view new_name) override
void AddTable(const SymbolTable &table)
std::pair< int64_t, bool > InsertOrFind(std::string_view key)
virtual bool Member(std::string_view symbol) const
const_iterator end() const
pointer operator->() const
static SymbolTableImpl * Read(std::istream &strm, std::string_view source)
const_iterator cbegin() const
SymbolTable(std::string_view name="<unspecified>")
virtual SymbolTable * Copy() const
void SetName(std::string_view new_name)
std::input_iterator_tag iterator_category
SymbolTable * StringToSymbolTable(const std::string &str)
int64_t AddSymbol(std::string_view symbol) override
std::unique_ptr< T > WrapUnique(T *ptr)
constexpr int64_t kNoSymbol
const std::string & GetSymbol(size_t idx) const
size_t NumSymbols() const
SymbolTableImpl(const SymbolTableImpl &impl)
bool operator==(const iterator &that) const
std::ptrdiff_t difference_type
void RemoveSymbol(int64_t key)
DECLARE_string(fst_field_separator)
const value_type *const pointer
bool IsMutable() const final
int64_t Find(std::string_view key) const
size_t NumSymbols() const override
int64_t GetNthKey(ssize_t pos) const override
bool IsMutable() const final
static SymbolTable * ReadText(std::istream &strm, std::string_view name, const std::string &sep=FST_FLAGS_fst_field_separator)
SymbolTable(std::shared_ptr< internal::SymbolTableImplBase > impl)
bool operator!=(const iterator &that) const
std::string Symbol() const
int64_t AddSymbol(std::string_view symbol)
int64_t Find(std::string_view symbol) const override
const std::string & LabeledCheckSum() const override
const std::string & Name() const override
static SymbolTableImpl * ReadText(std::istream &strm, std::string_view name, const std::string &sep=FST_FLAGS_fst_field_separator)
int64_t AvailableKey() const
int64_t AvailableKey() const override
virtual bool Member(int64_t key) const
DECLARE_bool(fst_compat_symbols)
bool CompatSymbols(const SymbolTable *syms1, const SymbolTable *syms2, bool warning=true)
std::string Find(int64_t key) const
const std::string & LabeledCheckSum() const
const_iterator begin() const
std::unique_ptr< SymbolTableImplBase > Copy() const override
int64_t GetNthKey(ssize_t pos) const
static SymbolTable * Read(const std::string &source)
const std::string & CheckSum() const override
static SymbolTable * Read(std::istream &strm, std::string_view source)
SymbolTable * RelabelSymbolTable(const SymbolTable *table, const std::vector< std::pair< Label, Label >> &pairs)
SymbolTableImpl(std::string_view name)
bool Member(std::string_view symbol) const
bool Member(int64_t key) const
int64_t Find(std::string_view symbol) const
const_iterator cend() const
void RemoveSymbol(size_t idx)