FST  openfst-1.7.1
OpenFst Library
stlist.h
Go to the documentation of this file.
1 // See www.openfst.org for extensive documentation on this weighted
2 // finite-state transducer library.
3 //
4 // A generic (string,type) list file format.
5 //
6 // This is a stripped-down version of STTable that does not support the Find()
7 // operation but that does support reading/writting from standard in/out.
8 
9 #ifndef FST_EXTENSIONS_FAR_STLIST_H_
10 #define FST_EXTENSIONS_FAR_STLIST_H_
11 
12 #include <algorithm>
13 #include <functional>
14 #include <iostream>
15 #include <memory>
16 #include <queue>
17 #include <string>
18 #include <utility>
19 #include <vector>
20 
21 #include <fstream>
22 #include <fst/util.h>
23 
24 namespace fst {
25 
26 static constexpr int32 kSTListMagicNumber = 5656924;
27 static constexpr int32 kSTListFileVersion = 1;
28 
29 // String-type list writing class for object of type T using a functor Writer.
30 // The Writer functor must provide at least the following interface:
31 //
32 // struct Writer {
33 // void operator()(std::ostream &, const T &) const;
34 // };
35 template <class T, class Writer>
36 class STListWriter {
37  public:
38  explicit STListWriter(const string &filename)
39  : stream_(filename.empty() ? &std::cout : new std::ofstream(
40  filename,
41  std::ios_base::out |
42  std::ios_base::binary)),
43  error_(false) {
44  WriteType(*stream_, kSTListMagicNumber);
45  WriteType(*stream_, kSTListFileVersion);
46  if (!stream_) {
47  FSTERROR() << "STListWriter::STListWriter: Error writing to file: "
48  << filename;
49  error_ = true;
50  }
51  }
52 
53  static STListWriter<T, Writer> *Create(const string &filename) {
54  return new STListWriter<T, Writer>(filename);
55  }
56 
57  void Add(const string &key, const T &t) {
58  if (key == "") {
59  FSTERROR() << "STListWriter::Add: Key empty: " << key;
60  error_ = true;
61  } else if (key < last_key_) {
62  FSTERROR() << "STListWriter::Add: Key out of order: " << key;
63  error_ = true;
64  }
65  if (error_) return;
66  last_key_ = key;
67  WriteType(*stream_, key);
68  entry_writer_(*stream_, t);
69  }
70 
71  bool Error() const { return error_; }
72 
74  WriteType(*stream_, string());
75  if (stream_ != &std::cout) delete stream_;
76  }
77 
78  private:
79  Writer entry_writer_;
80  std::ostream *stream_; // Output stream.
81  string last_key_; // Last key.
82  bool error_;
83 
84  STListWriter(const STListWriter &) = delete;
85  STListWriter &operator=(const STListWriter &) = delete;
86 };
87 
88 // String-type list reading class for object of type T using a functor Reader.
89 // Reader must provide at least the following interface:
90 //
91 // struct Reader {
92 // T *operator()(std::istream &) const;
93 // };
94 template <class T, class Reader>
95 class STListReader {
96  public:
97  explicit STListReader(const std::vector<string> &filenames)
98  : sources_(filenames), error_(false) {
99  streams_.resize(filenames.size(), 0);
100  bool has_stdin = false;
101  for (size_t i = 0; i < filenames.size(); ++i) {
102  if (filenames[i].empty()) {
103  if (!has_stdin) {
104  streams_[i] = &std::cin;
105  sources_[i] = "stdin";
106  has_stdin = true;
107  } else {
108  FSTERROR() << "STListReader::STListReader: Cannot read multiple "
109  << "inputs from standard input";
110  error_ = true;
111  return;
112  }
113  } else {
114  streams_[i] = new std::ifstream(
115  filenames[i], std::ios_base::in | std::ios_base::binary);
116  }
117  int32 magic_number = 0;
118  ReadType(*streams_[i], &magic_number);
119  int32 file_version = 0;
120  ReadType(*streams_[i], &file_version);
121  if (magic_number != kSTListMagicNumber) {
122  FSTERROR() << "STListReader::STListReader: Wrong file type: "
123  << filenames[i];
124  error_ = true;
125  return;
126  }
127  if (file_version != kSTListFileVersion) {
128  FSTERROR() << "STListReader::STListReader: Wrong file version: "
129  << filenames[i];
130  error_ = true;
131  return;
132  }
133  string key;
134  ReadType(*streams_[i], &key);
135  if (!key.empty()) heap_.push(std::make_pair(key, i));
136  if (!*streams_[i]) {
137  FSTERROR() << "STListReader: Error reading file: " << sources_[i];
138  error_ = true;
139  return;
140  }
141  }
142  if (heap_.empty()) return;
143  const auto current = heap_.top().second;
144  entry_.reset(entry_reader_(*streams_[current]));
145  if (!entry_ || !*streams_[current]) {
146  FSTERROR() << "STListReader: Error reading entry for key "
147  << heap_.top().first << ", file " << sources_[current];
148  error_ = true;
149  }
150  }
151 
153  for (auto &stream : streams_) {
154  if (stream != &std::cin) delete stream;
155  }
156  }
157 
158  static STListReader<T, Reader> *Open(const string &filename) {
159  std::vector<string> filenames;
160  filenames.push_back(filename);
161  return new STListReader<T, Reader>(filenames);
162  }
163 
164  static STListReader<T, Reader> *Open(const std::vector<string> &filenames) {
165  return new STListReader<T, Reader>(filenames);
166  }
167 
168  void Reset() {
169  FSTERROR() << "STListReader::Reset: Operation not supported";
170  error_ = true;
171  }
172 
173  bool Find(const string &key) {
174  FSTERROR() << "STListReader::Find: Operation not supported";
175  error_ = true;
176  return false;
177  }
178 
179  bool Done() const { return error_ || heap_.empty(); }
180 
181  void Next() {
182  if (error_) return;
183  auto current = heap_.top().second;
184  string key;
185  heap_.pop();
186  ReadType(*(streams_[current]), &key);
187  if (!*streams_[current]) {
188  FSTERROR() << "STListReader: Error reading file: " << sources_[current];
189  error_ = true;
190  return;
191  }
192  if (!key.empty()) heap_.push(std::make_pair(key, current));
193  if (!heap_.empty()) {
194  current = heap_.top().second;
195  entry_.reset(entry_reader_(*streams_[current]));
196  if (!entry_ || !*streams_[current]) {
197  FSTERROR() << "STListReader: Error reading entry for key: "
198  << heap_.top().first << ", file: " << sources_[current];
199  error_ = true;
200  }
201  }
202  }
203 
204  const string &GetKey() const { return heap_.top().first; }
205 
206  const T *GetEntry() const { return entry_.get(); }
207 
208  bool Error() const { return error_; }
209 
210  private:
211  Reader entry_reader_; // Read functor.
212  std::vector<std::istream *> streams_; // Input streams.
213  std::vector<string> sources_; // Corresponding filenames.
214  std::priority_queue<
215  std::pair<string, size_t>, std::vector<std::pair<string, size_t>>,
216  std::greater<std::pair<string, size_t>>> heap_; // (Key, stream id) heap
217  mutable std::unique_ptr<T> entry_; // The currently read entry.
218  bool error_;
219 
220  STListReader(const STListReader &) = delete;
221  STListReader &operator=(const STListReader &) = delete;
222 };
223 
224 // String-type list header reading function, templated on the entry header type.
225 // The Header type must provide at least the following interface:
226 //
227 // struct Header {
228 // void Read(std::istream &strm, const string &filename);
229 // };
230 template <class Header>
231 bool ReadSTListHeader(const string &filename, Header *header) {
232  if (filename.empty()) {
233  LOG(ERROR) << "ReadSTListHeader: Can't read header from standard input";
234  return false;
235  }
236  std::ifstream strm(filename, std::ios_base::in | std::ios_base::binary);
237  if (!strm) {
238  LOG(ERROR) << "ReadSTListHeader: Could not open file: " << filename;
239  return false;
240  }
241  int32 magic_number = 0;
242  ReadType(strm, &magic_number);
243  int32 file_version = 0;
244  ReadType(strm, &file_version);
245  if (magic_number != kSTListMagicNumber) {
246  LOG(ERROR) << "ReadSTListHeader: Wrong file type: " << filename;
247  return false;
248  }
249  if (file_version != kSTListFileVersion) {
250  LOG(ERROR) << "ReadSTListHeader: Wrong file version: " << filename;
251  return false;
252  }
253  string key;
254  ReadType(strm, &key);
255  header->Read(strm, filename + ":" + key);
256  if (!strm) {
257  LOG(ERROR) << "ReadSTListHeader: Error reading file: " << filename;
258  return false;
259  }
260  return true;
261 }
262 
263 bool IsSTList(const string &filename);
264 
265 } // namespace fst
266 
267 #endif // FST_EXTENSIONS_FAR_STLIST_H_
STListReader(const std::vector< string > &filenames)
Definition: stlist.h:97
const T * GetEntry() const
Definition: stlist.h:206
bool Find(const string &key)
Definition: stlist.h:173
bool Done() const
Definition: stlist.h:179
#define LOG(type)
Definition: log.h:48
static STListReader< T, Reader > * Open(const std::vector< string > &filenames)
Definition: stlist.h:164
const string & GetKey() const
Definition: stlist.h:204
bool IsSTList(const string &filename)
Definition: stlist.cc:11
void Reset()
Definition: stlist.h:168
std::ostream & WriteType(std::ostream &strm, const T t)
Definition: util.h:155
#define FSTERROR()
Definition: util.h:35
static STListReader< T, Reader > * Open(const string &filename)
Definition: stlist.h:158
bool Error() const
Definition: stlist.h:208
void Add(const string &key, const T &t)
Definition: stlist.h:57
bool ReadSTListHeader(const string &filename, Header *header)
Definition: stlist.h:231
bool Error() const
Definition: stlist.h:71
int32_t int32
Definition: types.h:26
std::istream & ReadType(std::istream &strm, T *t)
Definition: util.h:47
static STListWriter< T, Writer > * Create(const string &filename)
Definition: stlist.h:53
STListWriter(const string &filename)
Definition: stlist.h:38