// robinsonizer engine by Bob Polis // copyright (c) 1994-2019 // TODO Use correct plurals: adjust numeral file format to include this. // TODO Don't assume 26-letter alphabet, but allow for arbitrary character list. // TODO Switch to get_long_options in main. // C++ #include #include #include #include using namespace std; // libcommon #include // Project #include "engine.hpp" const int s_index = 's' - 'a'; engine::engine(std::string start, int maxiter, int maxseed, std::string numerals_file, std::string characters_file, unsigned int vl, robinsonizer_mode mode, bool easy_parsing, int engine_id) : _start {start}, _maxiter {maxiter}, _verbosity_level {vl}, _mode {mode}, _easy_parsing {easy_parsing}, _engine_id {engine_id} { // read numerals for desired language from text file, init letter frequency table { ifstream file {numerals_file}; string line; while (getline(file, line)) { _numerals.push_back(line); } } // setup random distribution _dist.param(uniform_int_distribution<>::param_type {0, min(abs(maxseed), _numerals.size())}); // now we know how many numerals we have, so we can allocate our efficient buffers int n; for (n = 0; n < _numerals.size(); ++n) { vector vec; vec.resize(26, 0); _frequencies.push_back(vec); _freq.push_back(_frequencies.back().data()); } _start_freq.resize(26, 0); _old.resize(26, 0); _new.resize(26, 0); _used.resize(26, 0); // init numeral letter frequency table for (n = 0; n < _numerals.size(); n++) { for (char c : _numerals[n]) { if (c >= 'a' && c <= 'z') { ++_freq[n][c - 'a']; } } } if (_verbosity_level > 1) { numeral_frequencies(cerr); } // get letter frequencies from sentence start for (char c : start) { // optionally translate upper- to lowercase const char up_lo_dif = 'a' - 'A'; if (c >= 'A' && c <= 'Z') { c += up_lo_dif; } // skip non-alphabetical chars if (c >= 'a' && c <= 'z') { ++_start_freq[c - 'a']; } } // add letters from 'and', reset 'and' vector for (n = 0; n < 26; n++) { _start_freq[n] += _freq[0][n]; _freq[0][n] = 0; } // build 'used' table for correct autogram seeding for (n = 0; n < 26; ++n) { _used[n] = _start_freq[n]; } for (n = 0; n < _numerals.size(); ++n) { for (int i = 0; i < 26; ++i) { _used[i] += _freq[n][i]; } } } void engine::run() { auto prev = _old.data(); auto next = _new.data(); auto freq = _freq.data(); auto start = _start_freq.data(); do { // setup int num_iter = 0; unsigned int k, n; // create random seed vector switch (_mode) { case robinsonizer_mode::pangram: case robinsonizer_mode::strict_autogram: for (n = 0; n < 26; n++) { prev[n] = _dist(_random_engine); } break; case robinsonizer_mode::lax_autogram: for (n = 0; n < 26; ++n) { if (_used[n]) { // only if letter occurs in numerals or sentence start prev[n] = _dist(_random_engine); } else { prev[n] = 0; } } break; default: break; } #if DEBUG // logging, if desired if (_verbosity_level > 1) { frequencies(cerr, const_cast(prev)); } #endif do { // setup num_iter++; _total_iterations++; memcpy(next, start, 26 * sizeof(int)); // count letters in resulting sentence by incrementing result freqmap elements for (n = 0; n < 26; n++) { if (static_cast(prev[n]) < _numerals.size()) { auto p = freq[prev[n]]; for (k = 0; k < 26; k++) { next[k] += p[k]; } } else { char c = 'a' + n; if (_easy_parsing) { cout << "OVFL[" << _engine_id << "] " << c << " (" << prev[n] << ")" << endl; } else { cerr << endl << "overflow: " << c << " (" << prev[n] << ")"; } break; } } // increment frequency for 's' for every letter which occurs more than once, // and increment the count for every letter which is (or should be) mentioned for (n = 0; n < 26; n++) { switch (_mode) { case robinsonizer_mode::pangram: ++next[n]; break; case robinsonizer_mode::strict_autogram: if (next[n]) { ++next[n]; } break; case robinsonizer_mode::lax_autogram: if (prev[n]) { ++next[n]; } break; default: break; } if (next[n] > 1) { ++next[s_index]; } } #if DEBUG // debug output, only if verbosity level is high enough if (_verbosity_level > 1) { write_result(cerr); cerr << endl; } #endif // test if our result equals the previous one (if so, we found a valid sentence) _found = memcmp(next, prev, 26 * sizeof(int)) == 0; if (_found) { break; } if (num_iter == _maxiter) { break; } else { memcpy(prev, next, 26 * sizeof(int)); } } while (true); } while (!_found); } void engine::frequencies(ostream& os, const int fm[]) const { bool output = false; for (unsigned int n = 0; n < 26; n++) { if (fm[n]) { if (output) { os << ", "; } char c = n + 'a'; os << c << " (" << fm[n] << ")"; output = true; } } os << endl; } void engine::numeral_frequencies(ostream& os) const { for (unsigned int i = 0; i < _numerals.size(); i++) { os << _numerals[i] << ": "; frequencies(os, _freq[i]); } } void engine::write_result(ostream& os) const { if (_easy_parsing) { os << "RSLT[" << _engine_id << "] "; } os << _start << " "; unsigned int n; unsigned int first = 0; unsigned int last = 25; bool first_found = false; for (n = 0; n < 26; n++) { // pre-scan if (_new[n]) { if (!first_found) { first = n; first_found = true; } last = n; } } for (n = 0; n < 26; n++) { if (_new[n]) { if (n == last) { os << " " << _numerals[0] << " "; } else if (n > first) { os << ", "; } char c = n + 'a'; os << _numerals[_new[n]] << " " << c; if (_new[n] > 1) { os << "'s"; } } } os << "."; } ostream& operator<<(ostream& os, const engine& engine) { engine.write_result(os); return os; }