FST  openfst-1.7.2
OpenFst Library
pdtreplace.cc
Go to the documentation of this file.
1 // See www.openfst.org for extensive documentation on this weighted
2 // finite-state transducer library.
3 //
4 // Converts an RTN represented by FSTs and non-terminal labels into a PDT.
5 
6 #include <cstring>
7 
8 #include <string>
9 #include <vector>
10 
11 #include <fst/flags.h>
12 
15 #include <fst/util.h>
16 #include <fst/vector-fst.h>
17 
18 DEFINE_string(pdt_parentheses, "", "PDT parenthesis label pairs");
19 DEFINE_string(pdt_parser_type, "left",
20  "Construction method, one of: \"left\", \"left_sr\"");
21 DEFINE_int64(start_paren_labels, fst::kNoLabel,
22  "Index to use for the first inserted parentheses; if not "
23  "specified, the next available label beyond the highest output "
24  "label is used");
25 DEFINE_string(left_paren_prefix, "(_", "Prefix to attach to SymbolTable "
26  "labels for inserted left parentheses");
27 DEFINE_string(right_paren_prefix, ")_", "Prefix to attach to SymbolTable "
28  "labels for inserted right parentheses");
29 
30 void Cleanup(std::vector<fst::script::LabelFstClassPair> *pairs) {
31  for (const auto &pair : *pairs) {
32  delete pair.second;
33  }
34  pairs->clear();
35 }
36 
37 int main(int argc, char **argv) {
38  namespace s = fst::script;
41  using fst::PdtParserType;
43 
44  string usage = "Converts an RTN represented by FSTs";
45  usage += " and non-terminal labels into PDT.\n\n Usage: ";
46  usage += argv[0];
47  usage += " root.fst rootlabel [rule1.fst label1 ...] [out.fst]\n";
48 
49  std::set_new_handler(FailedNewHandler);
50  SET_FLAGS(usage.c_str(), &argc, &argv, true);
51  if (argc < 4) {
52  ShowUsage();
53  return 1;
54  }
55 
56  const string in_name = argv[1];
57  const string out_name = argc % 2 == 0 ? argv[argc - 1] : "";
58 
59  auto *ifst = FstClass::Read(in_name);
60  if (!ifst) return 1;
61 
62  PdtParserType parser_type;
63  if (!s::GetPdtParserType(FLAGS_pdt_parser_type, &parser_type)) {
64  LOG(ERROR) << argv[0] << ": Unknown PDT parser type: "
65  << FLAGS_pdt_parser_type;
66  delete ifst;
67  return 1;
68  }
69 
70  std::vector<s::LabelFstClassPair> pairs;
71  // Note that if the root label is beyond the range of the underlying FST's
72  // labels, truncation will occur.
73  const auto root = atoll(argv[2]);
74  pairs.emplace_back(root, ifst);
75 
76  for (auto i = 3; i < argc - 1; i += 2) {
77  ifst = FstClass::Read(argv[i]);
78  if (!ifst) {
79  Cleanup(&pairs);
80  return 1;
81  }
82  // Note that if the root label is beyond the range of the underlying FST's
83  // labels, truncation will occur.
84  const auto label = atoll(argv[i + 1]);
85  pairs.emplace_back(label, ifst);
86  }
87 
88  VectorFstClass ofst(ifst->ArcType());
89  std::vector<s::LabelPair> parens;
90  s::PdtReplace(pairs, &ofst, &parens, root, parser_type,
91  FLAGS_start_paren_labels, FLAGS_left_paren_prefix,
92  FLAGS_right_paren_prefix);
93  Cleanup(&pairs);
94 
95  if (!FLAGS_pdt_parentheses.empty()) {
96  if (!WriteLabelPairs(FLAGS_pdt_parentheses, parens)) return 1;
97  }
98 
99  ofst.Write(out_name);
100 
101  return 0;
102 }
void ShowUsage(bool long_usage=true)
Definition: flags.cc:124
void PdtReplace(const std::vector< LabelFstClassPair > &pairs, MutableFstClass *ofst, std::vector< LabelPair > *parens, int64 root, PdtParserType parser_type, int64 start_paren_labels, const string &left_paren_prefix, const string &right_paren_prefix)
Definition: pdtscript.cc:50
DEFINE_int64(start_paren_labels, fst::kNoLabel,"Index to use for the first inserted parentheses; if not ""specified, the next available label beyond the highest output ""label is used")
constexpr int kNoLabel
Definition: fst.h:179
DEFINE_string(pdt_parentheses,"","PDT parenthesis label pairs")
#define LOG(type)
Definition: log.h:48
void Cleanup(std::vector< fst::script::LabelFstClassPair > *pairs)
Definition: pdtreplace.cc:30
void FailedNewHandler()
Definition: compat.cc:25
#define SET_FLAGS(usage, argc, argv, rmflags)
Definition: flags.h:214
bool GetPdtParserType(const string &str, PdtParserType *pt)
Definition: getters.cc:22
int main(int argc, char **argv)
Definition: pdtreplace.cc:37
PdtParserType
Definition: replace.h:43
bool WriteLabelPairs(const string &filename, const std::vector< std::pair< Label, Label >> &pairs)
Definition: util.h:338