FST  openfst-1.8.3
OpenFst Library
read_write_utils.h
Go to the documentation of this file.
1 // Copyright 2005-2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 // Definition of ReadLabelTriples based on ReadLabelPairs, like that in
19 // nlp/fst/lib/util.h for pairs, and similarly for WriteLabelTriples.
20 
21 #ifndef FST_EXTENSIONS_MPDT_READ_WRITE_UTILS_H_
22 #define FST_EXTENSIONS_MPDT_READ_WRITE_UTILS_H_
23 
24 #include <cstddef>
25 #include <istream>
26 #include <string>
27 #include <utility>
28 #include <vector>
29 
30 #include <fst/log.h>
31 #include <fstream>
32 #include <fst/util.h>
33 #include <string_view>
34 
35 namespace fst {
36 
37 // Returns true on success.
38 template <typename Label>
39 bool ReadLabelTriples(const std::string &source,
40  std::vector<std::pair<Label, Label>> *pairs,
41  std::vector<Label> *assignments) {
42  std::ifstream fstrm(source);
43  if (!fstrm) {
44  LOG(ERROR) << "ReadIntTriples: Can't open file: " << source;
45  return false;
46  }
47  static constexpr auto kLineLen = 8096;
48  char line[kLineLen];
49  size_t nline = 0;
50  pairs->clear();
51  while (fstrm.getline(line, kLineLen)) {
52  ++nline;
53  std::vector<std::string_view> col =
54  StrSplit(line, ByAnyChar("\n\t "), SkipEmpty());
55  // Empty line or comment?
56  if (col.empty() || col[0].empty() || col[0][0] == '#') continue;
57  if (col.size() != 3) {
58  LOG(ERROR) << "ReadLabelTriples: Bad number of columns, "
59  << "file = " << source << ", line = " << nline;
60  return false;
61  }
62  bool err;
63  const Label i1 = StrToInt64(col[0], source, nline, &err);
64  if (err) return false;
65  const Label i2 = StrToInt64(col[1], source, nline, &err);
66  if (err) return false;
67  using Level = Label;
68  const Level i3 = StrToInt64(col[2], source, nline, &err);
69  if (err) return false;
70  pairs->push_back(std::make_pair(i1, i2));
71  assignments->push_back(i3);
72  }
73  return true;
74 }
75 
76 // Returns true on success.
77 template <typename Label>
78 bool WriteLabelTriples(const std::string &source,
79  const std::vector<std::pair<Label, Label>> &pairs,
80  const std::vector<Label> &assignments) {
81  if (pairs.size() != assignments.size()) {
82  LOG(ERROR) << "WriteLabelTriples: Pairs and assignments of different sizes";
83  return false;
84  }
85  std::ofstream fstrm(source);
86  if (!fstrm) {
87  LOG(ERROR) << "WriteLabelTriples: Can't open file: " << source;
88  return false;
89  }
90  for (size_t n = 0; n < pairs.size(); ++n)
91  fstrm << pairs[n].first << "\t" << pairs[n].second << "\t" << assignments[n]
92  << "\n";
93  if (!fstrm) {
94  LOG(ERROR) << "WriteLabelTriples: Write failed: "
95  << (source.empty() ? "standard output" : source);
96  return false;
97  }
98  return true;
99 }
100 
101 } // namespace fst
102 
103 #endif // FST_EXTENSIONS_MPDT_READ_WRITE_UTILS_H_
#define LOG(type)
Definition: log.h:53
constexpr int kLineLen
Definition: symbol-table.h:62
internal::StringSplitter StrSplit(std::string_view full, ByAnyChar delim)
Definition: compat.cc:77
bool ReadLabelTriples(const std::string &source, std::vector< std::pair< Label, Label >> *pairs, std::vector< Label > *assignments)
bool WriteLabelTriples(const std::string &source, const std::vector< std::pair< Label, Label >> &pairs, const std::vector< Label > &assignments)
int64_t StrToInt64(std::string_view s, std::string_view source, size_t nline, bool *error=nullptr)
Definition: util.cc:62