FST  openfst-1.7.2
OpenFst Library
stlist.h
Go to the documentation of this file.
1 // See www.openfst.org for extensive documentation on this weighted
2 // finite-state transducer library.
3 //
4 // A generic (string,type) list file format.
5 //
6 // This is a stripped-down version of STTable that does not support the Find()
7 // operation but that does support reading/writting from standard in/out.
8 
9 #ifndef FST_EXTENSIONS_FAR_STLIST_H_
10 #define FST_EXTENSIONS_FAR_STLIST_H_
11 
12 #include <algorithm>
13 #include <functional>
14 #include <iostream>
15 #include <memory>
16 #include <queue>
17 #include <string>
18 #include <utility>
19 #include <vector>
20 
21 #include <fstream>
22 #include <fst/util.h>
23 
24 namespace fst {
25 
26 static constexpr int32 kSTListMagicNumber = 5656924;
27 static constexpr int32 kSTListFileVersion = 1;
28 
29 // String-type list writing class for object of type T using a functor Writer.
30 // The Writer functor must provide at least the following interface:
31 //
32 // struct Writer {
33 // void operator()(std::ostream &, const T &) const;
34 // };
35 template <class T, class Writer>
36 class STListWriter {
37  public:
38  explicit STListWriter(const string &filename)
39  : stream_(filename.empty() ? &std::cout : new std::ofstream(
40  filename,
41  std::ios_base::out |
42  std::ios_base::binary)),
43  error_(false) {
44  WriteType(*stream_, kSTListMagicNumber);
45  WriteType(*stream_, kSTListFileVersion);
46  if (!stream_) {
47  FSTERROR() << "STListWriter::STListWriter: Error writing to file: "
48  << filename;
49  error_ = true;
50  }
51  }
52 
53  static STListWriter<T, Writer> *Create(const string &filename) {
54  return new STListWriter<T, Writer>(filename);
55  }
56 
57  void Add(const string &key, const T &t) {
58  if (key == "") {
59  FSTERROR() << "STListWriter::Add: Key empty: " << key;
60  error_ = true;
61  } else if (key < last_key_) {
62  FSTERROR() << "STListWriter::Add: Key out of order: " << key;
63  error_ = true;
64  }
65  if (error_) return;
66  last_key_ = key;
67  WriteType(*stream_, key);
68  entry_writer_(*stream_, t);
69  }
70 
71  bool Error() const { return error_; }
72 
74  WriteType(*stream_, string());
75  if (stream_ != &std::cout) delete stream_;
76  }
77 
78  private:
79  Writer entry_writer_;
80  std::ostream *stream_; // Output stream.
81  string last_key_; // Last key.
82  bool error_;
83 
84  STListWriter(const STListWriter &) = delete;
85  STListWriter &operator=(const STListWriter &) = delete;
86 };
87 
88 // String-type list reading class for object of type T using a functor Reader.
89 // Reader must provide at least the following interface:
90 //
91 // struct Reader {
92 // T *operator()(std::istream &) const;
93 // };
94 template <class T, class Reader>
95 class STListReader {
96  public:
97  explicit STListReader(const std::vector<string> &filenames)
98  : sources_(filenames), error_(false) {
99  streams_.resize(filenames.size(), 0);
100  bool has_stdin = false;
101  for (size_t i = 0; i < filenames.size(); ++i) {
102  if (filenames[i].empty()) {
103  if (!has_stdin) {
104  streams_[i] = &std::cin;
105  sources_[i] = "stdin";
106  has_stdin = true;
107  } else {
108  FSTERROR() << "STListReader::STListReader: Cannot read multiple "
109  << "inputs from standard input";
110  error_ = true;
111  return;
112  }
113  } else {
114  streams_[i] = new std::ifstream(
115  filenames[i], std::ios_base::in | std::ios_base::binary);
116  if (streams_[i]->fail()) {
117  FSTERROR() << "STListReader::STListReader: Error reading file: "
118  << filenames[i];
119  error_ = true;
120  return;
121  }
122  }
123  int32 magic_number = 0;
124  ReadType(*streams_[i], &magic_number);
125  int32 file_version = 0;
126  ReadType(*streams_[i], &file_version);
127  if (magic_number != kSTListMagicNumber) {
128  FSTERROR() << "STListReader::STListReader: Wrong file type: "
129  << filenames[i];
130  error_ = true;
131  return;
132  }
133  if (file_version != kSTListFileVersion) {
134  FSTERROR() << "STListReader::STListReader: Wrong file version: "
135  << filenames[i];
136  error_ = true;
137  return;
138  }
139  string key;
140  ReadType(*streams_[i], &key);
141  if (!key.empty()) heap_.push(std::make_pair(key, i));
142  if (!*streams_[i]) {
143  FSTERROR() << "STListReader: Error reading file: " << sources_[i];
144  error_ = true;
145  return;
146  }
147  }
148  if (heap_.empty()) return;
149  const auto current = heap_.top().second;
150  entry_.reset(entry_reader_(*streams_[current]));
151  if (!entry_ || !*streams_[current]) {
152  FSTERROR() << "STListReader: Error reading entry for key "
153  << heap_.top().first << ", file " << sources_[current];
154  error_ = true;
155  }
156  }
157 
159  for (auto &stream : streams_) {
160  if (stream != &std::cin) delete stream;
161  }
162  }
163 
164  static STListReader<T, Reader> *Open(const string &filename) {
165  std::vector<string> filenames;
166  filenames.push_back(filename);
167  return new STListReader<T, Reader>(filenames);
168  }
169 
170  static STListReader<T, Reader> *Open(const std::vector<string> &filenames) {
171  return new STListReader<T, Reader>(filenames);
172  }
173 
174  void Reset() {
175  FSTERROR() << "STListReader::Reset: Operation not supported";
176  error_ = true;
177  }
178 
179  bool Find(const string &key) {
180  FSTERROR() << "STListReader::Find: Operation not supported";
181  error_ = true;
182  return false;
183  }
184 
185  bool Done() const { return error_ || heap_.empty(); }
186 
187  void Next() {
188  if (error_) return;
189  auto current = heap_.top().second;
190  string key;
191  heap_.pop();
192  ReadType(*(streams_[current]), &key);
193  if (!*streams_[current]) {
194  FSTERROR() << "STListReader: Error reading file: " << sources_[current];
195  error_ = true;
196  return;
197  }
198  if (!key.empty()) heap_.push(std::make_pair(key, current));
199  if (!heap_.empty()) {
200  current = heap_.top().second;
201  entry_.reset(entry_reader_(*streams_[current]));
202  if (!entry_ || !*streams_[current]) {
203  FSTERROR() << "STListReader: Error reading entry for key: "
204  << heap_.top().first << ", file: " << sources_[current];
205  error_ = true;
206  }
207  }
208  }
209 
210  const string &GetKey() const { return heap_.top().first; }
211 
212  const T *GetEntry() const { return entry_.get(); }
213 
214  bool Error() const { return error_; }
215 
216  private:
217  Reader entry_reader_; // Read functor.
218  std::vector<std::istream *> streams_; // Input streams.
219  std::vector<string> sources_; // Corresponding filenames.
220  std::priority_queue<
221  std::pair<string, size_t>, std::vector<std::pair<string, size_t>>,
222  std::greater<std::pair<string, size_t>>> heap_; // (Key, stream id) heap
223  mutable std::unique_ptr<T> entry_; // The currently read entry.
224  bool error_;
225 
226  STListReader(const STListReader &) = delete;
227  STListReader &operator=(const STListReader &) = delete;
228 };
229 
230 // String-type list header reading function, templated on the entry header type.
231 // The Header type must provide at least the following interface:
232 //
233 // struct Header {
234 // void Read(std::istream &strm, const string &filename);
235 // };
236 template <class Header>
237 bool ReadSTListHeader(const string &filename, Header *header) {
238  if (filename.empty()) {
239  LOG(ERROR) << "ReadSTListHeader: Can't read header from standard input";
240  return false;
241  }
242  std::ifstream strm(filename, std::ios_base::in | std::ios_base::binary);
243  if (!strm) {
244  LOG(ERROR) << "ReadSTListHeader: Could not open file: " << filename;
245  return false;
246  }
247  int32 magic_number = 0;
248  ReadType(strm, &magic_number);
249  int32 file_version = 0;
250  ReadType(strm, &file_version);
251  if (magic_number != kSTListMagicNumber) {
252  LOG(ERROR) << "ReadSTListHeader: Wrong file type: " << filename;
253  return false;
254  }
255  if (file_version != kSTListFileVersion) {
256  LOG(ERROR) << "ReadSTListHeader: Wrong file version: " << filename;
257  return false;
258  }
259  string key;
260  ReadType(strm, &key);
261  header->Read(strm, filename + ":" + key);
262  if (!strm) {
263  LOG(ERROR) << "ReadSTListHeader: Error reading file: " << filename;
264  return false;
265  }
266  return true;
267 }
268 
269 bool IsSTList(const string &filename);
270 
271 } // namespace fst
272 
273 #endif // FST_EXTENSIONS_FAR_STLIST_H_
STListReader(const std::vector< string > &filenames)
Definition: stlist.h:97
const T * GetEntry() const
Definition: stlist.h:212
bool Find(const string &key)
Definition: stlist.h:179
bool Done() const
Definition: stlist.h:185
#define LOG(type)
Definition: log.h:48
static STListReader< T, Reader > * Open(const std::vector< string > &filenames)
Definition: stlist.h:170
const string & GetKey() const
Definition: stlist.h:210
bool IsSTList(const string &filename)
Definition: stlist.cc:11
void Reset()
Definition: stlist.h:174
std::ostream & WriteType(std::ostream &strm, const T t)
Definition: util.h:155
#define FSTERROR()
Definition: util.h:35
static STListReader< T, Reader > * Open(const string &filename)
Definition: stlist.h:164
bool Error() const
Definition: stlist.h:214
void Add(const string &key, const T &t)
Definition: stlist.h:57
bool ReadSTListHeader(const string &filename, Header *header)
Definition: stlist.h:237
bool Error() const
Definition: stlist.h:71
int32_t int32
Definition: types.h:26
std::istream & ReadType(std::istream &strm, T *t)
Definition: util.h:47
static STListWriter< T, Writer > * Create(const string &filename)
Definition: stlist.h:53
STListWriter(const string &filename)
Definition: stlist.h:38