FST  openfst-1.8.2
OpenFst Library
stlist.h
Go to the documentation of this file.
1 // Copyright 2005-2020 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the 'License');
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an 'AS IS' BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // See www.openfst.org for extensive documentation on this weighted
16 // finite-state transducer library.
17 //
18 // A generic (string,type) list file format.
19 //
20 // This is a stripped-down version of STTable that does not support the Find()
21 // operation but that does support reading/writting from standard in/out.
22 
23 #ifndef FST_EXTENSIONS_FAR_STLIST_H_
24 #define FST_EXTENSIONS_FAR_STLIST_H_
25 
26 #include <algorithm>
27 #include <cstdint>
28 #include <functional>
29 #include <iostream>
30 #include <memory>
31 #include <queue>
32 #include <string>
33 #include <utility>
34 #include <vector>
35 
36 #include <fstream>
37 #include <fst/util.h>
38 #include <string_view>
39 
40 namespace fst {
41 
42 inline constexpr int32_t kSTListMagicNumber = 5656924;
43 inline constexpr int32_t kSTListFileVersion = 1;
44 
45 // String-type list writing class for object of type T using a functor Writer.
46 // The Writer functor must provide at least the following interface:
47 //
48 // struct Writer {
49 // void operator()(std::ostream &, const T &) const;
50 // };
51 template <class T, class Writer>
52 class STListWriter {
53  public:
54  explicit STListWriter(const std::string &source)
55  : stream_(source.empty()
56  ? &std::cout
57  : new std::ofstream(
58  source, std::ios_base::out | std::ios_base::binary)),
59  error_(false) {
60  WriteType(*stream_, kSTListMagicNumber);
61  WriteType(*stream_, kSTListFileVersion);
62  if (!stream_) {
63  FSTERROR() << "STListWriter::STListWriter: Error writing to file: "
64  << source;
65  error_ = true;
66  }
67  }
68 
69  static STListWriter<T, Writer> *Create(const std::string &source) {
70  return new STListWriter<T, Writer>(source);
71  }
72 
73  void Add(std::string_view key, const T &t) {
74  if (key.empty()) {
75  FSTERROR() << "STListWriter::Add: Key empty: " << key;
76  error_ = true;
77  } else if (key < last_key_) {
78  FSTERROR() << "STListWriter::Add: Key out of order: " << key;
79  error_ = true;
80  }
81  if (error_) return;
82  // TODO(jrosenstock,glebm): Use assign(key) when C++17 is required
83  last_key_.assign(key.data(), key.size());
84  WriteType(*stream_, key);
85  entry_writer_(*stream_, t);
86  }
87 
88  bool Error() const { return error_; }
89 
91  WriteType(*stream_, std::string());
92  if (stream_ != &std::cout) delete stream_;
93  }
94 
95  private:
96  Writer entry_writer_;
97  std::ostream *stream_; // Output stream.
98  std::string last_key_; // Last key.
99  bool error_;
100 
101  STListWriter(const STListWriter &) = delete;
102  STListWriter &operator=(const STListWriter &) = delete;
103 };
104 
105 // String-type list reading class for object of type T using a functor Reader.
106 // Reader must provide at least the following interface:
107 //
108 // struct Reader {
109 // T *operator()(std::istream &) const;
110 // };
111 template <class T, class Reader>
113  public:
114  explicit STListReader(const std::vector<std::string> &sources)
115  : sources_(sources), error_(false) {
116  streams_.resize(sources.size(), nullptr);
117  bool has_stdin = false;
118  for (size_t i = 0; i < sources.size(); ++i) {
119  if (sources[i].empty()) {
120  if (!has_stdin) {
121  streams_[i] = &std::cin;
122  sources_[i] = "stdin";
123  has_stdin = true;
124  } else {
125  FSTERROR() << "STListReader::STListReader: Cannot read multiple "
126  << "inputs from standard input";
127  error_ = true;
128  return;
129  }
130  } else {
131  streams_[i] = new std::ifstream(
132  sources[i], std::ios_base::in | std::ios_base::binary);
133  if (streams_[i]->fail()) {
134  FSTERROR() << "STListReader::STListReader: Error reading file: "
135  << sources[i];
136  error_ = true;
137  return;
138  }
139  }
140  int32_t magic_number = 0;
141  ReadType(*streams_[i], &magic_number);
142  int32_t file_version = 0;
143  ReadType(*streams_[i], &file_version);
144  if (magic_number != kSTListMagicNumber) {
145  FSTERROR() << "STListReader::STListReader: Wrong file type: "
146  << sources[i];
147  error_ = true;
148  return;
149  }
150  if (file_version != kSTListFileVersion) {
151  FSTERROR() << "STListReader::STListReader: Wrong file version: "
152  << sources[i];
153  error_ = true;
154  return;
155  }
156  std::string key;
157  ReadType(*streams_[i], &key);
158  if (!key.empty()) heap_.push(std::make_pair(key, i));
159  if (!*streams_[i]) {
160  FSTERROR() << "STListReader: Error reading file: " << sources_[i];
161  error_ = true;
162  return;
163  }
164  }
165  if (heap_.empty()) return;
166  const auto current = heap_.top().second;
167  entry_.reset(entry_reader_(*streams_[current]));
168  if (!entry_ || !*streams_[current]) {
169  FSTERROR() << "STListReader: Error reading entry for key "
170  << heap_.top().first << ", file " << sources_[current];
171  error_ = true;
172  }
173  }
174 
176  for (auto &stream : streams_) {
177  if (stream != &std::cin) delete stream;
178  }
179  }
180 
181  static STListReader<T, Reader> *Open(const std::string &source) {
182  std::vector<std::string> sources;
183  sources.push_back(source);
184  return new STListReader<T, Reader>(sources);
185  }
186 
188  const std::vector<std::string> &sources) {
189  return new STListReader<T, Reader>(sources);
190  }
191 
192  void Reset() {
193  FSTERROR() << "STListReader::Reset: Operation not supported";
194  error_ = true;
195  }
196 
197  bool Find(std::string_view key) {
198  FSTERROR() << "STListReader::Find: Operation not supported";
199  error_ = true;
200  return false;
201  }
202 
203  bool Done() const { return error_ || heap_.empty(); }
204 
205  void Next() {
206  if (error_) return;
207  auto current = heap_.top().second;
208  std::string key;
209  heap_.pop();
210  ReadType(*(streams_[current]), &key);
211  if (!*streams_[current]) {
212  FSTERROR() << "STListReader: Error reading file: " << sources_[current];
213  error_ = true;
214  return;
215  }
216  if (!key.empty()) heap_.push(std::make_pair(key, current));
217  if (!heap_.empty()) {
218  current = heap_.top().second;
219  entry_.reset(entry_reader_(*streams_[current]));
220  if (!entry_ || !*streams_[current]) {
221  FSTERROR() << "STListReader: Error reading entry for key: "
222  << heap_.top().first << ", file: " << sources_[current];
223  error_ = true;
224  }
225  }
226  }
227 
228  const std::string &GetKey() const { return heap_.top().first; }
229 
230  const T *GetEntry() const { return entry_.get(); }
231 
232  bool Error() const { return error_; }
233 
234  private:
235  Reader entry_reader_; // Read functor.
236  std::vector<std::istream *> streams_; // Input streams.
237  std::vector<std::string> sources_; // Corresponding sources.
238  std::priority_queue<std::pair<std::string, size_t>,
239  std::vector<std::pair<std::string, size_t>>,
240  std::greater<std::pair<std::string, size_t>>>
241  heap_; // (Key, stream id) heap
242  mutable std::unique_ptr<T> entry_; // The currently read entry.
243  bool error_;
244 
245  STListReader(const STListReader &) = delete;
246  STListReader &operator=(const STListReader &) = delete;
247 };
248 
249 // String-type list header reading function, templated on the entry header type.
250 // The Header type must provide at least the following interface:
251 //
252 // struct Header {
253 // void Read(std::istream &strm, const string &source);
254 // };
255 template <class Header>
256 bool ReadSTListHeader(const std::string &source, Header *header) {
257  if (source.empty()) {
258  LOG(ERROR) << "ReadSTListHeader: Can't read header from standard input";
259  return false;
260  }
261  std::ifstream strm(source, std::ios_base::in | std::ios_base::binary);
262  if (!strm) {
263  LOG(ERROR) << "ReadSTListHeader: Could not open file: " << source;
264  return false;
265  }
266  int32_t magic_number = 0;
267  ReadType(strm, &magic_number);
268  int32_t file_version = 0;
269  ReadType(strm, &file_version);
270  if (magic_number != kSTListMagicNumber) {
271  LOG(ERROR) << "ReadSTListHeader: Wrong file type: " << source;
272  return false;
273  }
274  if (file_version != kSTListFileVersion) {
275  LOG(ERROR) << "ReadSTListHeader: Wrong file version: " << source;
276  return false;
277  }
278  std::string key;
279  ReadType(strm, &key);
280  if (!strm) {
281  LOG(ERROR) << "ReadSTListHeader: Error reading key: " << source;
282  return false;
283  }
284  // Empty key is written last, so this is an empty STList.
285  if (key.empty()) return true;
286  if (!header->Read(strm, source + ":" + key)) {
287  LOG(ERROR) << "ReadSTListHeader: Error reading FstHeader: " << source;
288  return false;
289  }
290  if (!strm) {
291  LOG(ERROR) << "ReadSTListHeader: Error reading file: " << source;
292  return false;
293  }
294  return true;
295 }
296 
297 bool IsSTList(const std::string &source);
298 
299 } // namespace fst
300 
301 #endif // FST_EXTENSIONS_FAR_STLIST_H_
bool Find(std::string_view key)
Definition: stlist.h:197
const T * GetEntry() const
Definition: stlist.h:230
constexpr int32_t kSTListFileVersion
Definition: stlist.h:43
static STListReader< T, Reader > * Open(const std::vector< std::string > &sources)
Definition: stlist.h:187
bool Done() const
Definition: stlist.h:203
#define LOG(type)
Definition: log.h:49
void Reset()
Definition: stlist.h:192
void Add(std::string_view key, const T &t)
Definition: stlist.h:73
std::ostream & WriteType(std::ostream &strm, const T t)
Definition: util.h:211
#define FSTERROR()
Definition: util.h:53
static STListReader< T, Reader > * Open(const std::string &source)
Definition: stlist.h:181
bool ReadSTListHeader(const std::string &source, Header *header)
Definition: stlist.h:256
STListWriter(const std::string &source)
Definition: stlist.h:54
const std::string & GetKey() const
Definition: stlist.h:228
bool Error() const
Definition: stlist.h:232
static STListWriter< T, Writer > * Create(const std::string &source)
Definition: stlist.h:69
bool IsSTList(const std::string &source)
Definition: stlist.cc:28
bool Error() const
Definition: stlist.h:88
std::istream & ReadType(std::istream &strm, T *t)
Definition: util.h:65
STListReader(const std::vector< std::string > &sources)
Definition: stlist.h:114
constexpr int32_t kSTListMagicNumber
Definition: stlist.h:42