20 #ifndef FST_STRING_WEIGHT_H_ 21 #define FST_STRING_WEIGHT_H_ 32 #include <string_view> 59 template <
typename L, StringType S = STRING_LEFT>
72 template <
typename Iterator>
74 for (
auto iter = begin; iter != end; ++iter)
PushBack(*iter);
94 static const std::string &
Type() {
95 static const std::string *
const type =
new std::string(
98 : (S ==
STRING_RIGHT ?
"right_string" :
"restricted_string"));
104 std::istream &
Read(std::istream &strm);
106 std::ostream &
Write(std::ostream &strm)
const;
133 size_t Size()
const {
return first_ ? rest_.size() + 1 : 0; }
136 if (first_) rest_.push_front(first_);
144 rest_.push_back(label);
150 std::list<Label> rest_;
154 template <
class StringWeight_>
158 using Label =
typename Weight::Label;
161 : first_(w.first_), rest_(w.rest_), init_(true), iter_(rest_.begin()) {}
167 return iter_ == rest_.end();
171 const Label &
Value()
const {
return init_ ? first_ : *iter_; }
183 iter_ = rest_.begin();
188 const decltype(Weight::rest_) &rest_;
190 typename decltype(Weight::rest_)::const_iterator iter_;
194 template <
class StringWeight_>
198 using Label =
typename Weight::Label;
203 fin_(first_ ==
Label()),
204 iter_(rest_.rbegin()) {}
206 bool Done()
const {
return fin_; }
208 const Label &
Value()
const {
return iter_ == rest_.rend() ? first_ : *iter_; }
211 if (iter_ == rest_.rend()) {
220 iter_ = rest_.rbegin();
225 const decltype(Weight::rest_) &rest_;
227 typename decltype(Weight::rest_)::const_reverse_iterator iter_;
233 template <
typename Label, StringType S>
238 for (int32_t i = 0; i < size; ++i) {
246 template <
typename Label, StringType S>
248 const int32_t size =
Size();
256 template <
typename Label, StringType S>
262 template <
typename Label, StringType S>
272 template <
typename Label, StringType S>
276 h ^= h << 1 ^ iter.Value();
281 template <
typename Label, StringType S>
284 if (w1.
Size() != w2.
Size())
return false;
288 for (; !iter1.Done(); iter1.Next(), iter2.Next()) {
289 if (iter1.Value() != iter2.Value())
return false;
294 template <
typename Label, StringType S>
300 template <
typename Label, StringType S>
307 template <
typename Label, StringType S>
312 return strm <<
"Epsilon";
313 }
else if (iter.Value() ==
Label(kStringInfinity)) {
314 return strm <<
"Infinity";
315 }
else if (iter.Value() ==
Label(kStringBad)) {
316 return strm <<
"BadString";
318 for (
size_t i = 0; !iter.Done(); ++i, iter.Next()) {
320 strm << iter.Value();
326 template <
typename Label, StringType S>
332 if (str ==
"Infinity") {
333 weight = Weight::Zero();
334 }
else if (str ==
"Epsilon") {
335 weight = Weight::One();
338 for (std::string_view sv :
StrSplit(str, kStringSeparator)) {
340 if (!maybe_label.has_value()) {
341 strm.clear(std::ios::badbit);
353 template <
typename Label, StringType S>
357 if (!w1.
Member() || !w2.
Member())
return Weight::NoWeight();
358 if (w1 == Weight::Zero())
return w2;
359 if (w2 == Weight::Zero())
return w1;
361 FSTERROR() <<
"StringWeight::Plus: Unequal arguments " 362 <<
"(non-functional FST?)" 363 <<
" w1 = " << w1 <<
" w2 = " << w2;
364 return Weight::NoWeight();
370 template <
typename Label>
375 if (!w1.
Member() || !w2.
Member())
return Weight::NoWeight();
376 if (w1 == Weight::Zero())
return w2;
377 if (w2 == Weight::Zero())
return w1;
379 typename Weight::Iterator iter1(w1);
380 typename Weight::Iterator iter2(w2);
381 for (; !iter1.Done() && !iter2.Done() && iter1.Value() == iter2.Value();
382 iter1.Next(), iter2.Next()) {
383 sum.PushBack(iter1.Value());
389 template <
typename Label>
394 if (!w1.
Member() || !w2.
Member())
return Weight::NoWeight();
395 if (w1 == Weight::Zero())
return w2;
396 if (w2 == Weight::Zero())
return w1;
398 typename Weight::ReverseIterator iter1(w1);
399 typename Weight::ReverseIterator iter2(w2);
400 for (; !iter1.Done() && !iter2.Done() && iter1.Value() == iter2.Value();
401 iter1.Next(), iter2.Next()) {
402 sum.PushFront(iter1.Value());
407 template <
typename Label, StringType S>
411 if (!w1.
Member() || !w2.
Member())
return Weight::NoWeight();
412 if (w1 == Weight::Zero() || w2 == Weight::Zero())
return Weight::Zero();
414 for (
typename Weight::Iterator iter(w2); !iter.Done(); iter.Next()) {
415 product.PushBack(iter.Value());
421 template <
typename Label, StringType S>
425 if (!w1.
Member() || !w2.
Member())
return Weight::NoWeight();
426 if (w2 == Weight::Zero()) {
427 return Weight(
Label(kStringBad));
428 }
else if (w1 == Weight::Zero()) {
429 return Weight::Zero();
432 typename Weight::Iterator iter(w1);
434 for (; !iter.Done() && i < w2.
Size(); iter.Next(), ++i) {
436 for (; !iter.Done(); iter.Next()) result.PushBack(iter.Value());
441 template <
typename Label, StringType S>
445 if (!w1.
Member() || !w2.
Member())
return Weight::NoWeight();
446 if (w2 == Weight::Zero()) {
447 return Weight(
Label(kStringBad));
448 }
else if (w1 == Weight::Zero()) {
449 return Weight::Zero();
452 typename Weight::ReverseIterator iter(w1);
454 for (; !iter.Done() && i < w2.
Size(); iter.Next(), ++i) {
456 for (; !iter.Done(); iter.Next()) result.PushFront(iter.Value());
461 template <
typename Label, StringType S>
471 FSTERROR() <<
"StringWeight::Divide: " 472 <<
"Only explicit left or right division is defined " 473 <<
"for the " << Weight::Type() <<
" semiring";
474 return Weight::NoWeight();
479 template <
typename Label>
484 FSTERROR() <<
"StringWeight::Divide: Only left division is defined " 485 <<
"for the left string semiring";
492 template <
typename Label>
497 FSTERROR() <<
"StringWeight::Divide: Only right division is defined " 498 <<
"for the right string semiring";
507 template <
class Label, StringType S>
513 bool allow_zero =
true,
517 allow_zero_(allow_zero),
518 alphabet_size_(alphabet_size),
519 max_string_length_(max_string_length) {}
522 const int n = std::uniform_int_distribution<>(
523 0, max_string_length_ + allow_zero_)(rand_);
524 if (allow_zero_ && n == max_string_length_)
return Weight::Zero();
525 std::vector<Label> labels;
527 for (
int i = 0; i < n; ++i) {
529 std::uniform_int_distribution<>(1, alphabet_size_)(rand_));
531 return Weight(labels.begin(), labels.end());
535 mutable std::mt19937_64 rand_;
536 const bool allow_zero_;
537 const size_t alphabet_size_;
538 const size_t max_string_length_;
571 template <
class Label,
class W, GallicType G = GALLIC_LEFT>
573 :
public ProductWeight<StringWeight<Label, GallicStringType(G)>, W> {
605 static const std::string &
Type() {
606 static const std::string *
const type =
new std::string(
612 ?
"restricted_gallic" 613 : (G ==
GALLIC_MIN ?
"min_gallic" :
"gallic"))));
627 template <
class Label,
class W, GallicType G>
635 template <
class Label,
class W>
643 template <
class Label,
class W, GallicType G>
650 template <
class Label,
class W, GallicType G>
660 template <
class Label,
class W, GallicType G>
663 ProductWeight<StringWeight<Label, GallicStringType(G)>, W>> {
670 bool allow_zero =
true)
671 : generate_(seed, allow_zero) {}
680 template <
class Label,
class W>
692 if (s1.
Size() < s2.
Size())
return true;
693 if (s1.
Size() > s2.
Size())
return false;
696 while (!iter1.
Done()) {
697 const auto l1 = iter1.
Value();
698 const auto l2 = iter2.
Value();
699 if (l1 < l2)
return true;
700 if (l1 > l2)
return false;
717 template <
class Label,
class W>
719 :
public UnionWeight<GallicWeight<Label, W, GALLIC_RESTRICT>,
720 GallicUnionWeightOptions<Label, W>> {
728 using UW::Properties;
761 static const std::string &
Type() {
762 static const std::string *
const type =
new std::string(
"gallic");
767 return UW::Quantize(delta);
774 template <
class Label,
class W>
780 return Plus(static_cast<UW>(w1), static_cast<UW>(w2));
784 template <
class Label,
class W>
790 return Times(static_cast<UW>(w1), static_cast<UW>(w2));
794 template <
class Label,
class W>
801 return Divide(static_cast<UW>(w1), static_cast<UW>(w2), divide_type);
806 template <
class Label,
class W>
808 :
public WeightGenerate<UnionWeight<GallicWeight<Label, W, GALLIC_RESTRICT>,
809 GallicUnionWeightOptions<Label, W>>> {
817 bool allow_zero =
true)
818 : generate_(seed, allow_zero) {}
828 #endif // FST_STRING_WEIGHT_H_ WeightGenerate(uint64_t seed=std::random_device()(), bool allow_zero=true)
GallicWeight< Label, W, GALLIC > Quantize(float delta=kDelta) const
StringWeightIterator(const Weight &w)
GW operator()(const GW &w1, const GW &w2) const
StringWeight< Label, ReverseStringType(S)> ReverseWeight
Weight operator()() const
static const GallicWeight & Zero()
static const StringWeight & One()
StringWeight< Label, S > DivideLeft(const StringWeight< Label, S > &w1, const StringWeight< Label, S > &w2)
ErrorWeight Plus(const ErrorWeight &, const ErrorWeight &)
void PushBack(Label label)
void Reverse(const FstClass &ifst, const std::vector< std::pair< int64_t, int64_t >> &parens, std::vector< int64_t > *assignments, MutableFstClass *ofst)
Weight operator()() const
const Label & Value() const
StringWeight(const Iterator begin, const Iterator end)
static constexpr uint64_t Properties()
StringWeightIterator< StringWeight > Iterator
constexpr GallicType ReverseGallicType(GallicType g)
ErrorWeight Times(const ErrorWeight &, const ErrorWeight &)
static const GallicWeight< Label, W, GALLIC > & One()
GallicWeight(const std::string &str, int *nread=nullptr)
constexpr uint64_t kIdempotent
typename Weight::Label Label
GallicWeight(SW w1, W w2)
GallicWeight(SW w1, W w2)
static const StringWeight & Zero()
constexpr char kStringSeparator
internal::StringSplitter StrSplit(std::string_view full, ByAnyChar delim)
const Label & Value() const
static const StringWeight & NoWeight()
constexpr uint64_t kRightSemiring
std::ostream & WriteType(std::ostream &strm, const T t)
static const std::string & Type()
std::optional< int64_t > ParseInt64(std::string_view s, int base=10)
constexpr int kStringInfinity
std::istream & operator>>(std::istream &strm, FloatWeightTpl< T > &w)
GallicWeight(const UW &weight)
bool operator!=(const ErrorWeight &, const ErrorWeight &)
static const std::string & Type()
constexpr StringType ReverseStringType(StringType s)
std::ostream & operator<<(std::ostream &strm, const ErrorWeight &)
StringWeight(Label label)
static const GallicWeight & One()
StringWeight< Label, S > DivideRight(const StringWeight< Label, S > &w1, const StringWeight< Label, S > &w2)
Weight operator()() const
GallicWeight Quantize(float delta=kDelta) const
WeightGenerate(uint64_t seed=std::random_device()(), bool allow_zero=true)
GallicWeight(const std::string &s, int *nread=nullptr)
std::istream & Read(std::istream &strm)
GallicWeight(const ProductWeight< SW, W > &w)
std::ostream & Write(std::ostream &strm) const
typename Weight::Label Label
bool operator()(const GW &w1, const GW &w2) const
static const GallicWeight< Label, W, GALLIC > & Zero()
ErrorWeight Divide(const ErrorWeight &, const ErrorWeight &)
bool operator==(const ErrorWeight &, const ErrorWeight &)
constexpr size_t kNumRandomWeights
std::istream & ReadType(std::istream &strm, T *t)
WeightGenerate(uint64_t seed=std::random_device()(), bool allow_zero=true, size_t alphabet_size=kNumRandomWeights, size_t max_string_length=kNumRandomWeights)
static const GallicWeight & NoWeight()
ReverseWeight Reverse() const
void PushFront(Label label)
constexpr uint64_t kLeftSemiring
StringWeightReverseIterator(const Weight &w)
static const GallicWeight< Label, W, GALLIC > & NoWeight()
ReverseWeight Reverse() const
bool ApproxEqual(const ErrorWeight &, const ErrorWeight &, float)
StringWeight Quantize(float delta=kDelta) const
ReverseWeight Reverse() const
const StringWeight< Label, GallicStringType(G)> & Value1() const
GallicWeight(const GW &weight)
constexpr StringType GallicStringType(GallicType g)
static const std::string & Type()