FST  openfst-1.8.3
OpenFst Library
stlist.h
Go to the documentation of this file.
1 // Copyright 2005-2024 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 // A generic (string,type) list file format.
19 //
20 // This is a stripped-down version of STTable that does not support the Find()
21 // operation but that does support reading/writting from standard in/out.
22 
23 #ifndef FST_EXTENSIONS_FAR_STLIST_H_
24 #define FST_EXTENSIONS_FAR_STLIST_H_
25 
26 #include <algorithm>
27 #include <cstddef>
28 #include <cstdint>
29 #include <functional>
30 #include <ios>
31 #include <iostream>
32 #include <istream>
33 #include <memory>
34 #include <ostream>
35 #include <queue>
36 #include <string>
37 #include <utility>
38 #include <vector>
39 
40 #include <fst/log.h>
41 #include <fstream>
42 #include <fst/util.h>
43 #include <string_view>
44 
45 namespace fst {
46 
47 inline constexpr int32_t kSTListMagicNumber = 5656924;
48 inline constexpr int32_t kSTListFileVersion = 1;
49 
50 // String-type list writing class for object of type T using a functor Writer.
51 // The Writer functor must provide at least the following interface:
52 //
53 // struct Writer {
54 // void operator()(std::ostream &, const T &) const;
55 // };
56 template <class T, class Writer>
57 class STListWriter {
58  public:
59  explicit STListWriter(std::string_view source)
60  : stream_(source.empty()
61  ? &std::cout
62  : new std::ofstream(
63  std::string(source),
64  std::ios_base::out | std::ios_base::binary)),
65  error_(false) {
66  WriteType(*stream_, kSTListMagicNumber);
67  WriteType(*stream_, kSTListFileVersion);
68  if (!stream_) {
69  FSTERROR() << "STListWriter::STListWriter: Error writing to file: "
70  << source;
71  error_ = true;
72  }
73  }
74 
75  static STListWriter<T, Writer> *Create(std::string_view source) {
76  return new STListWriter<T, Writer>(source);
77  }
78 
79  void Add(std::string_view key, const T &t) {
80  if (key.empty()) {
81  FSTERROR() << "STListWriter::Add: Key empty: " << key;
82  error_ = true;
83  } else if (key < last_key_) {
84  FSTERROR() << "STListWriter::Add: Key out of order: " << key;
85  error_ = true;
86  }
87  if (error_) return;
88  // TODO(jrosenstock,glebm): Use assign(key) when C++17 is required
89  last_key_.assign(key.data(), key.size());
90  WriteType(*stream_, key);
91  entry_writer_(*stream_, t);
92  }
93 
94  bool Error() const { return error_; }
95 
97  WriteType(*stream_, std::string());
98  if (stream_ != &std::cout) delete stream_;
99  }
100 
101  private:
102  Writer entry_writer_;
103  std::ostream *stream_; // Output stream.
104  std::string last_key_; // Last key.
105  bool error_;
106 
107  STListWriter(const STListWriter &) = delete;
108  STListWriter &operator=(const STListWriter &) = delete;
109 };
110 
111 // String-type list reading class for object of type T using a functor Reader.
112 // Reader must provide at least the following interface:
113 //
114 // struct Reader {
115 // T *operator()(std::istream &) const;
116 // };
117 template <class T, class Reader>
119  public:
120  explicit STListReader(const std::vector<std::string> &sources)
121  : sources_(sources), error_(false) {
122  streams_.resize(sources.size(), nullptr);
123  bool has_stdin = false;
124  for (size_t i = 0; i < sources.size(); ++i) {
125  if (sources[i].empty()) {
126  if (!has_stdin) {
127  streams_[i] = &std::cin;
128  sources_[i] = "stdin";
129  has_stdin = true;
130  } else {
131  FSTERROR() << "STListReader::STListReader: Cannot read multiple "
132  << "inputs from standard input";
133  error_ = true;
134  return;
135  }
136  } else {
137  streams_[i] = new std::ifstream(
138  sources[i], std::ios_base::in | std::ios_base::binary);
139  if (streams_[i]->fail()) {
140  FSTERROR() << "STListReader::STListReader: Error reading file: "
141  << sources[i];
142  error_ = true;
143  return;
144  }
145  }
146  int32_t magic_number = 0;
147  ReadType(*streams_[i], &magic_number);
148  int32_t file_version = 0;
149  ReadType(*streams_[i], &file_version);
150  if (magic_number != kSTListMagicNumber) {
151  FSTERROR() << "STListReader::STListReader: Wrong file type: "
152  << sources[i];
153  error_ = true;
154  return;
155  }
156  if (file_version != kSTListFileVersion) {
157  FSTERROR() << "STListReader::STListReader: Wrong file version: "
158  << sources[i];
159  error_ = true;
160  return;
161  }
162  std::string key;
163  ReadType(*streams_[i], &key);
164  if (!key.empty()) heap_.push(std::make_pair(key, i));
165  if (!*streams_[i]) {
166  FSTERROR() << "STListReader: Error reading file: " << sources_[i];
167  error_ = true;
168  return;
169  }
170  }
171  if (heap_.empty()) return;
172  const auto current = heap_.top().second;
173  entry_.reset(entry_reader_(*streams_[current]));
174  if (!entry_ || !*streams_[current]) {
175  FSTERROR() << "STListReader: Error reading entry for key "
176  << heap_.top().first << ", file " << sources_[current];
177  error_ = true;
178  }
179  }
180 
182  for (auto &stream : streams_) {
183  if (stream != &std::cin) delete stream;
184  }
185  }
186 
187  static STListReader<T, Reader> *Open(std::string_view source) {
188  std::vector<std::string> sources;
189  sources.push_back(std::string(source));
190  return new STListReader<T, Reader>(sources);
191  }
192 
194  const std::vector<std::string> &sources) {
195  return new STListReader<T, Reader>(sources);
196  }
197 
198  void Reset() {
199  FSTERROR() << "STListReader::Reset: Operation not supported";
200  error_ = true;
201  }
202 
203  bool Find(std::string_view key) {
204  FSTERROR() << "STListReader::Find: Operation not supported";
205  error_ = true;
206  return false;
207  }
208 
209  bool Done() const { return error_ || heap_.empty(); }
210 
211  void Next() {
212  if (error_) return;
213  auto current = heap_.top().second;
214  std::string key;
215  heap_.pop();
216  ReadType(*(streams_[current]), &key);
217  if (!*streams_[current]) {
218  FSTERROR() << "STListReader: Error reading file: " << sources_[current];
219  error_ = true;
220  return;
221  }
222  if (!key.empty()) heap_.push(std::make_pair(key, current));
223  if (!heap_.empty()) {
224  current = heap_.top().second;
225  entry_.reset(entry_reader_(*streams_[current]));
226  if (!entry_ || !*streams_[current]) {
227  FSTERROR() << "STListReader: Error reading entry for key: "
228  << heap_.top().first << ", file: " << sources_[current];
229  error_ = true;
230  }
231  }
232  }
233 
234  const std::string &GetKey() const { return heap_.top().first; }
235 
236  const T *GetEntry() const { return entry_.get(); }
237 
238  bool Error() const { return error_; }
239 
240  private:
241  Reader entry_reader_; // Read functor.
242  std::vector<std::istream *> streams_; // Input streams.
243  std::vector<std::string> sources_; // Corresponding sources.
244  std::priority_queue<std::pair<std::string, size_t>,
245  std::vector<std::pair<std::string, size_t>>,
246  std::greater<std::pair<std::string, size_t>>>
247  heap_; // (Key, stream id) heap
248  mutable std::unique_ptr<T> entry_; // The currently read entry.
249  bool error_;
250 
251  STListReader(const STListReader &) = delete;
252  STListReader &operator=(const STListReader &) = delete;
253 };
254 
255 // String-type list header reading function, templated on the entry header type.
256 // The Header type must provide at least the following interface:
257 //
258 // struct Header {
259 // void Read(std::istream &strm, const string &source);
260 // };
261 template <class Header>
262 bool ReadSTListHeader(const std::string &source, Header *header) {
263  if (source.empty()) {
264  LOG(ERROR) << "ReadSTListHeader: Can't read header from standard input";
265  return false;
266  }
267  std::ifstream strm(source, std::ios_base::in | std::ios_base::binary);
268  if (!strm) {
269  LOG(ERROR) << "ReadSTListHeader: Could not open file: " << source;
270  return false;
271  }
272  int32_t magic_number = 0;
273  ReadType(strm, &magic_number);
274  int32_t file_version = 0;
275  ReadType(strm, &file_version);
276  if (magic_number != kSTListMagicNumber) {
277  LOG(ERROR) << "ReadSTListHeader: Wrong file type: " << source;
278  return false;
279  }
280  if (file_version != kSTListFileVersion) {
281  LOG(ERROR) << "ReadSTListHeader: Wrong file version: " << source;
282  return false;
283  }
284  std::string key;
285  ReadType(strm, &key);
286  if (!strm) {
287  LOG(ERROR) << "ReadSTListHeader: Error reading key: " << source;
288  return false;
289  }
290  // Empty key is written last, so this is an empty STList.
291  if (key.empty()) return true;
292  if (!header->Read(strm, source + ":" + key)) {
293  LOG(ERROR) << "ReadSTListHeader: Error reading FstHeader: " << source;
294  return false;
295  }
296  if (!strm) {
297  LOG(ERROR) << "ReadSTListHeader: Error reading file: " << source;
298  return false;
299  }
300  return true;
301 }
302 
303 bool IsSTList(std::string_view source);
304 
305 } // namespace fst
306 
307 #endif // FST_EXTENSIONS_FAR_STLIST_H_
bool Find(std::string_view key)
Definition: stlist.h:203
const T * GetEntry() const
Definition: stlist.h:236
constexpr int32_t kSTListFileVersion
Definition: stlist.h:48
static STListReader< T, Reader > * Open(const std::vector< std::string > &sources)
Definition: stlist.h:193
bool Done() const
Definition: stlist.h:209
#define LOG(type)
Definition: log.h:53
STListWriter(std::string_view source)
Definition: stlist.h:59
void Reset()
Definition: stlist.h:198
void Add(std::string_view key, const T &t)
Definition: stlist.h:79
std::ostream & WriteType(std::ostream &strm, const T t)
Definition: util.h:228
#define FSTERROR()
Definition: util.h:56
bool ReadSTListHeader(const std::string &source, Header *header)
Definition: stlist.h:262
const std::string & GetKey() const
Definition: stlist.h:234
bool Error() const
Definition: stlist.h:238
bool Error() const
Definition: stlist.h:94
bool IsSTList(std::string_view source)
Definition: stlist.cc:30
static STListReader< T, Reader > * Open(std::string_view source)
Definition: stlist.h:187
static STListWriter< T, Writer > * Create(std::string_view source)
Definition: stlist.h:75
std::istream & ReadType(std::istream &strm, T *t)
Definition: util.h:80
STListReader(const std::vector< std::string > &sources)
Definition: stlist.h:120
constexpr int32_t kSTListMagicNumber
Definition: stlist.h:47