20 #ifndef FST_STRING_WEIGHT_H_ 21 #define FST_STRING_WEIGHT_H_ 39 #include <string_view> 65 template <
typename L, StringType S = STRING_LEFT>
78 template <
typename Iterator>
80 for (
auto iter = begin; iter != end; ++iter)
PushBack(*iter);
100 static const std::string &
Type() {
101 static const std::string *
const type =
new std::string(
104 : (S ==
STRING_RIGHT ?
"right_string" :
"restricted_string"));
110 std::istream &
Read(std::istream &strm);
112 std::ostream &
Write(std::ostream &strm)
const;
139 size_t Size()
const {
return first_ ? rest_.size() + 1 : 0; }
142 if (first_) rest_.push_front(first_);
150 rest_.push_back(label);
156 std::list<Label> rest_;
160 template <
class StringWeight_>
164 using Label =
typename Weight::Label;
167 : first_(w.first_), rest_(w.rest_), init_(true), iter_(rest_.begin()) {}
173 return iter_ == rest_.end();
177 const Label &
Value()
const {
return init_ ? first_ : *iter_; }
189 iter_ = rest_.begin();
194 const decltype(Weight::rest_) &rest_;
196 typename decltype(Weight::rest_)::const_iterator iter_;
200 template <
class StringWeight_>
204 using Label =
typename Weight::Label;
209 fin_(first_ ==
Label()),
210 iter_(rest_.rbegin()) {}
212 bool Done()
const {
return fin_; }
214 const Label &
Value()
const {
return iter_ == rest_.rend() ? first_ : *iter_; }
217 if (iter_ == rest_.rend()) {
226 iter_ = rest_.rbegin();
231 const decltype(Weight::rest_) &rest_;
233 typename decltype(Weight::rest_)::const_reverse_iterator iter_;
239 template <
typename Label, StringType S>
244 for (int32_t i = 0; i < size; ++i) {
252 template <
typename Label, StringType S>
254 const int32_t size =
Size();
262 template <
typename Label, StringType S>
268 template <
typename Label, StringType S>
278 template <
typename Label, StringType S>
282 h ^= h << 1 ^ iter.Value();
287 template <
typename Label, StringType S>
290 if (w1.
Size() != w2.
Size())
return false;
294 for (; !iter1.Done(); iter1.Next(), iter2.Next()) {
295 if (iter1.Value() != iter2.Value())
return false;
300 template <
typename Label, StringType S>
306 template <
typename Label, StringType S>
313 template <
typename Label, StringType S>
318 return strm <<
"Epsilon";
319 }
else if (iter.Value() ==
Label(kStringInfinity)) {
320 return strm <<
"Infinity";
321 }
else if (iter.Value() ==
Label(kStringBad)) {
322 return strm <<
"BadString";
324 for (
size_t i = 0; !iter.Done(); ++i, iter.Next()) {
326 strm << iter.Value();
332 template <
typename Label, StringType S>
338 if (str ==
"Infinity") {
339 weight = Weight::Zero();
340 }
else if (str ==
"Epsilon") {
341 weight = Weight::One();
344 for (std::string_view sv :
StrSplit(str, kStringSeparator)) {
346 if (!maybe_label.has_value()) {
347 strm.clear(std::ios::badbit);
359 template <
typename Label, StringType S>
363 if (!w1.
Member() || !w2.
Member())
return Weight::NoWeight();
364 if (w1 == Weight::Zero())
return w2;
365 if (w2 == Weight::Zero())
return w1;
367 FSTERROR() <<
"StringWeight::Plus: Unequal arguments " 368 <<
"(non-functional FST?)" 369 <<
" w1 = " << w1 <<
" w2 = " << w2;
370 return Weight::NoWeight();
376 template <
typename Label>
381 if (!w1.
Member() || !w2.
Member())
return Weight::NoWeight();
382 if (w1 == Weight::Zero())
return w2;
383 if (w2 == Weight::Zero())
return w1;
385 typename Weight::Iterator iter1(w1);
386 typename Weight::Iterator iter2(w2);
387 for (; !iter1.Done() && !iter2.Done() && iter1.Value() == iter2.Value();
388 iter1.Next(), iter2.Next()) {
389 sum.PushBack(iter1.Value());
395 template <
typename Label>
400 if (!w1.
Member() || !w2.
Member())
return Weight::NoWeight();
401 if (w1 == Weight::Zero())
return w2;
402 if (w2 == Weight::Zero())
return w1;
404 typename Weight::ReverseIterator iter1(w1);
405 typename Weight::ReverseIterator iter2(w2);
406 for (; !iter1.Done() && !iter2.Done() && iter1.Value() == iter2.Value();
407 iter1.Next(), iter2.Next()) {
408 sum.PushFront(iter1.Value());
413 template <
typename Label, StringType S>
417 if (!w1.
Member() || !w2.
Member())
return Weight::NoWeight();
418 if (w1 == Weight::Zero() || w2 == Weight::Zero())
return Weight::Zero();
420 for (
typename Weight::Iterator iter(w2); !iter.Done(); iter.Next()) {
421 product.PushBack(iter.Value());
427 template <
typename Label, StringType S>
431 if (!w1.
Member() || !w2.
Member())
return Weight::NoWeight();
432 if (w2 == Weight::Zero()) {
433 return Weight(
Label(kStringBad));
434 }
else if (w1 == Weight::Zero()) {
435 return Weight::Zero();
438 typename Weight::Iterator iter(w1);
440 for (; !iter.Done() && i < w2.
Size(); iter.Next(), ++i) {
442 for (; !iter.Done(); iter.Next()) result.PushBack(iter.Value());
447 template <
typename Label, StringType S>
451 if (!w1.
Member() || !w2.
Member())
return Weight::NoWeight();
452 if (w2 == Weight::Zero()) {
453 return Weight(
Label(kStringBad));
454 }
else if (w1 == Weight::Zero()) {
455 return Weight::Zero();
458 typename Weight::ReverseIterator iter(w1);
460 for (; !iter.Done() && i < w2.
Size(); iter.Next(), ++i) {
462 for (; !iter.Done(); iter.Next()) result.PushFront(iter.Value());
467 template <
typename Label, StringType S>
477 FSTERROR() <<
"StringWeight::Divide: " 478 <<
"Only explicit left or right division is defined " 479 <<
"for the " << Weight::Type() <<
" semiring";
480 return Weight::NoWeight();
485 template <
typename Label>
490 FSTERROR() <<
"StringWeight::Divide: Only left division is defined " 491 <<
"for the left string semiring";
498 template <
typename Label>
503 FSTERROR() <<
"StringWeight::Divide: Only right division is defined " 504 <<
"for the right string semiring";
513 template <
class Label, StringType S>
519 bool allow_zero =
true,
523 allow_zero_(allow_zero),
524 alphabet_size_(alphabet_size),
525 max_string_length_(max_string_length) {}
528 const int n = std::uniform_int_distribution<>(
529 0, max_string_length_ + allow_zero_)(rand_);
530 if (allow_zero_ && n == max_string_length_)
return Weight::Zero();
531 std::vector<Label> labels;
533 for (
int i = 0; i < n; ++i) {
535 std::uniform_int_distribution<>(1, alphabet_size_)(rand_));
537 return Weight(labels.begin(), labels.end());
541 mutable std::mt19937_64 rand_;
542 const bool allow_zero_;
543 const size_t alphabet_size_;
544 const size_t max_string_length_;
577 template <
class Label,
class W, GallicType G = GALLIC_LEFT>
579 :
public ProductWeight<StringWeight<Label, GallicStringType(G)>, W> {
611 static const std::string &
Type() {
612 static const std::string *
const type =
new std::string(
618 ?
"restricted_gallic" 619 : (G ==
GALLIC_MIN ?
"min_gallic" :
"gallic"))));
633 template <
class Label,
class W, GallicType G>
641 template <
class Label,
class W>
649 template <
class Label,
class W, GallicType G>
656 template <
class Label,
class W, GallicType G>
666 template <
class Label,
class W, GallicType G>
669 ProductWeight<StringWeight<Label, GallicStringType(G)>, W>> {
676 bool allow_zero =
true)
677 : generate_(seed, allow_zero) {}
686 template <
class Label,
class W>
698 if (s1.
Size() < s2.
Size())
return true;
699 if (s1.
Size() > s2.
Size())
return false;
702 while (!iter1.
Done()) {
703 const auto l1 = iter1.
Value();
704 const auto l2 = iter2.
Value();
705 if (l1 < l2)
return true;
706 if (l1 > l2)
return false;
723 template <
class Label,
class W>
725 :
public UnionWeight<GallicWeight<Label, W, GALLIC_RESTRICT>,
726 GallicUnionWeightOptions<Label, W>> {
734 using UW::Properties;
767 static const std::string &
Type() {
768 static const std::string *
const type =
new std::string(
"gallic");
773 return UW::Quantize(delta);
780 template <
class Label,
class W>
786 return Plus(static_cast<UW>(w1), static_cast<UW>(w2));
790 template <
class Label,
class W>
796 return Times(static_cast<UW>(w1), static_cast<UW>(w2));
800 template <
class Label,
class W>
807 return Divide(static_cast<UW>(w1), static_cast<UW>(w2), divide_type);
812 template <
class Label,
class W>
814 :
public WeightGenerate<UnionWeight<GallicWeight<Label, W, GALLIC_RESTRICT>,
815 GallicUnionWeightOptions<Label, W>>> {
823 bool allow_zero =
true)
824 : generate_(seed, allow_zero) {}
834 #endif // FST_STRING_WEIGHT_H_ WeightGenerate(uint64_t seed=std::random_device()(), bool allow_zero=true)
GallicWeight< Label, W, GALLIC > Quantize(float delta=kDelta) const
StringWeightIterator(const Weight &w)
GW operator()(const GW &w1, const GW &w2) const
StringWeight< Label, ReverseStringType(S)> ReverseWeight
Weight operator()() const
static const GallicWeight & Zero()
static const StringWeight & One()
StringWeight< Label, S > DivideLeft(const StringWeight< Label, S > &w1, const StringWeight< Label, S > &w2)
ErrorWeight Plus(const ErrorWeight &, const ErrorWeight &)
void PushBack(Label label)
void Reverse(const FstClass &ifst, const std::vector< std::pair< int64_t, int64_t >> &parens, std::vector< int64_t > *assignments, MutableFstClass *ofst)
Weight operator()() const
const Label & Value() const
StringWeight(const Iterator begin, const Iterator end)
static constexpr uint64_t Properties()
StringWeightIterator< StringWeight > Iterator
constexpr GallicType ReverseGallicType(GallicType g)
ErrorWeight Times(const ErrorWeight &, const ErrorWeight &)
static const GallicWeight< Label, W, GALLIC > & One()
constexpr uint64_t kIdempotent
typename Weight::Label Label
GallicWeight(SW w1, W w2)
GallicWeight(SW w1, W w2)
static const StringWeight & Zero()
constexpr char kStringSeparator
internal::StringSplitter StrSplit(std::string_view full, ByAnyChar delim)
const Label & Value() const
static const StringWeight & NoWeight()
constexpr uint64_t kRightSemiring
std::ostream & WriteType(std::ostream &strm, const T t)
static const std::string & Type()
std::optional< int64_t > ParseInt64(std::string_view s, int base=10)
constexpr int kStringInfinity
std::istream & operator>>(std::istream &strm, FloatWeightTpl< T > &w)
GallicWeight(const UW &weight)
bool operator!=(const ErrorWeight &, const ErrorWeight &)
static const std::string & Type()
GallicWeight(std::string_view str, int *nread=nullptr)
constexpr StringType ReverseStringType(StringType s)
std::ostream & operator<<(std::ostream &strm, const ErrorWeight &)
StringWeight(Label label)
static const GallicWeight & One()
StringWeight< Label, S > DivideRight(const StringWeight< Label, S > &w1, const StringWeight< Label, S > &w2)
Weight operator()() const
GallicWeight Quantize(float delta=kDelta) const
WeightGenerate(uint64_t seed=std::random_device()(), bool allow_zero=true)
std::istream & Read(std::istream &strm)
GallicWeight(const ProductWeight< SW, W > &w)
std::ostream & Write(std::ostream &strm) const
GallicWeight(std::string_view s, int *nread=nullptr)
typename Weight::Label Label
bool operator()(const GW &w1, const GW &w2) const
static const GallicWeight< Label, W, GALLIC > & Zero()
ErrorWeight Divide(const ErrorWeight &, const ErrorWeight &)
bool operator==(const ErrorWeight &, const ErrorWeight &)
constexpr size_t kNumRandomWeights
std::istream & ReadType(std::istream &strm, T *t)
WeightGenerate(uint64_t seed=std::random_device()(), bool allow_zero=true, size_t alphabet_size=kNumRandomWeights, size_t max_string_length=kNumRandomWeights)
static const GallicWeight & NoWeight()
ReverseWeight Reverse() const
void PushFront(Label label)
constexpr uint64_t kLeftSemiring
StringWeightReverseIterator(const Weight &w)
static const GallicWeight< Label, W, GALLIC > & NoWeight()
ReverseWeight Reverse() const
bool ApproxEqual(const ErrorWeight &, const ErrorWeight &, float)
StringWeight Quantize(float delta=kDelta) const
ReverseWeight Reverse() const
const StringWeight< Label, GallicStringType(G)> & Value1() const
GallicWeight(const GW &weight)
constexpr StringType GallicStringType(GallicType g)
static const std::string & Type()