24 #ifndef FST_EXTENSIONS_FAR_STTABLE_H_ 25 #define FST_EXTENSIONS_FAR_STTABLE_H_ 39 #include <string_view> 52 template <
class T,
class Writer>
56 : stream_(std::string(source),
57 std::ios_base::out | std::ios_base::binary),
62 FSTERROR() <<
"STTableWriter::STTableWriter: Error writing to file: " 70 LOG(ERROR) <<
"STTableWriter: Writing to standard out unsupported.";
76 void Add(std::string_view key,
const T &t) {
78 FSTERROR() <<
"STTableWriter::Add: Key empty: " << key;
80 }
else if (key < last_key_) {
81 FSTERROR() <<
"STTableWriter::Add: Key out of order: " << key;
85 last_key_.assign(key.data(), key.size());
86 positions_.push_back(stream_.tellp());
88 entry_writer_(stream_, t);
91 bool Error()
const {
return error_; }
95 WriteType(stream_, static_cast<int64_t>(positions_.size()));
100 std::ofstream stream_;
101 std::vector<int64_t> positions_;
102 std::string last_key_;
116 template <
class T,
class Reader>
120 : sources_(sources), error_(false) {
121 compare_.reset(
new Compare(&keys_));
122 keys_.resize(sources.size());
123 streams_.resize(sources.size(),
nullptr);
124 positions_.resize(sources.size());
125 for (
size_t i = 0; i < sources.size(); ++i) {
126 streams_[i] =
new std::ifstream(
127 sources[i], std::ios_base::in | std::ios_base::binary);
128 if (streams_[i]->fail()) {
129 FSTERROR() <<
"STTableReader::STTableReader: Error reading file: " 134 int32_t magic_number = 0;
135 ReadType(*streams_[i], &magic_number);
136 int32_t file_version = 0;
137 ReadType(*streams_[i], &file_version);
138 if (magic_number != kSTTableMagicNumber) {
139 FSTERROR() <<
"STTableReader::STTableReader: Wrong file type: " 144 if (file_version != kSTTableFileVersion) {
145 FSTERROR() <<
"STTableReader::STTableReader: Wrong file version: " 151 streams_[i]->seekg(-static_cast<int>(
sizeof(int64_t)),
153 ReadType(*streams_[i], &num_entries);
154 if (num_entries > 0) {
156 -static_cast<int>(
sizeof(int64_t)) * (num_entries + 1),
158 positions_[i].resize(num_entries);
159 for (
size_t j = 0; (j < num_entries) && (!streams_[i]->fail()); ++j) {
160 ReadType(*streams_[i], &(positions_[i][j]));
162 streams_[i]->seekg(positions_[i][0]);
163 if (streams_[i]->fail()) {
164 FSTERROR() <<
"STTableReader::STTableReader: Error reading file: " 175 for (
auto &stream : streams_)
delete stream;
179 if (source.empty()) {
180 LOG(ERROR) <<
"STTableReader: Operation not supported on standard input";
183 std::vector<std::string> sources;
184 sources.push_back(std::string(source));
189 const std::vector<std::string> &sources) {
195 for (
size_t i = 0; i < streams_.size(); ++i) {
196 if (!positions_[i].empty()) {
197 streams_[i]->seekg(positions_[i].front());
203 bool Find(std::string_view key) {
204 if (error_)
return false;
205 for (
size_t i = 0; i < streams_.size(); ++i) LowerBound(i, key);
207 if (heap_.empty())
return false;
208 return keys_[current_] == key;
211 bool Done()
const {
return error_ || heap_.empty(); }
215 if (streams_[current_]->tellg() <= positions_[current_].back()) {
216 ReadType(*(streams_[current_]), &(keys_[current_]));
217 if (streams_[current_]->fail()) {
218 FSTERROR() <<
"STTableReader: Error reading file: " 219 << sources_[current_];
223 std::push_heap(heap_.begin(), heap_.end(), *compare_);
227 if (!heap_.empty()) PopHeap();
230 const std::string &
GetKey()
const {
return keys_[current_]; }
234 bool Error()
const {
return error_; }
239 explicit Compare(
const std::vector<std::string> *keys) : keys(keys) {}
241 bool operator()(
size_t i,
size_t j)
const {
242 return (*keys)[i] > (*keys)[j];
246 const std::vector<std::string> *keys;
251 void LowerBound(
size_t id, std::string_view find_key) {
252 auto *strm = streams_[id];
253 const auto &positions = positions_[id];
254 if (positions.empty())
return;
256 size_t high = positions.size() - 1;
258 size_t mid = (low + high) / 2;
259 strm->seekg(positions[mid]);
262 if (key > find_key) {
264 }
else if (key < find_key) {
267 for (
size_t i = mid; i > low; --i) {
268 strm->seekg(positions[i - 1]);
270 if (key != find_key) {
271 strm->seekg(positions[i]);
275 strm->seekg(positions[low]);
279 strm->seekg(positions[low]);
285 for (
size_t i = 0; i < streams_.size(); ++i) {
286 if (positions_[i].empty())
continue;
287 ReadType(*streams_[i], &(keys_[i]));
288 if (streams_[i]->fail()) {
289 FSTERROR() <<
"STTableReader: Error reading file: " << sources_[i];
295 if (heap_.empty())
return;
296 std::make_heap(heap_.begin(), heap_.end(), *compare_);
304 std::pop_heap(heap_.begin(), heap_.end(), *compare_);
305 current_ = heap_.back();
306 entry_.reset(entry_reader_(*streams_[current_]));
307 if (!entry_) error_ =
true;
308 if (streams_[current_]->fail()) {
309 FSTERROR() <<
"STTableReader: Error reading entry for key: " 310 << keys_[current_] <<
", file: " << sources_[current_];
315 Reader entry_reader_;
316 std::vector<std::istream *> streams_;
317 std::vector<std::string> sources_;
318 std::vector<std::vector<int64_t>> positions_;
319 std::vector<std::string> keys_;
323 std::unique_ptr<Compare> compare_;
324 mutable std::unique_ptr<T> entry_;
334 template <
class Header>
336 if (source.empty()) {
337 LOG(ERROR) <<
"ReadSTTable: Can't read header from standard input";
340 std::ifstream strm(source, std::ios_base::in | std::ios_base::binary);
342 LOG(ERROR) <<
"ReadSTTableHeader: Could not open file: " << source;
345 int32_t magic_number = 0;
347 int32_t file_version = 0;
349 if (magic_number != kSTTableMagicNumber) {
350 LOG(ERROR) <<
"ReadSTTableHeader: Wrong file type: " << source;
353 if (file_version != kSTTableFileVersion) {
354 LOG(ERROR) <<
"ReadSTTableHeader: Wrong file version: " << source;
358 strm.seekg(-static_cast<int>(
sizeof(int64_t)), std::ios_base::end);
361 LOG(ERROR) <<
"ReadSTTableHeader: Error reading file: " << source;
364 if (i == 0)
return true;
365 strm.seekg(-2 * static_cast<int>(
sizeof(int64_t)), std::ios_base::end);
370 if (!header->Read(strm, source +
":" + key)) {
371 LOG(ERROR) <<
"ReadSTTableHeader: Error reading FstHeader: " << source;
375 LOG(ERROR) <<
"ReadSTTableHeader: Error reading file: " << source;
385 #endif // FST_EXTENSIONS_FAR_STTABLE_H_ constexpr int32_t kSTTableMagicNumber
constexpr int32_t kSTTableFileVersion
void Add(std::string_view key, const T &t)
bool Find(std::string_view key)
static STTableReader< T, Reader > * Open(const std::vector< std::string > &sources)
std::ostream & WriteType(std::ostream &strm, const T t)
STTableReader(const std::vector< std::string > &sources)
const std::string & GetKey() const
bool ReadSTTableHeader(const std::string &source, Header *header)
STTableWriter(std::string_view source)
static STTableWriter< T, Writer > * Create(std::string_view source)
const T * GetEntry() const
std::istream & ReadType(std::istream &strm, T *t)
bool IsSTTable(std::string_view source)
static STTableReader< T, Reader > * Open(std::string_view source)