259 lines
7.1 KiB
C++
259 lines
7.1 KiB
C++
// robinsonizer engine by Bob Polis
|
|
// copyright (c) 1994-2019
|
|
|
|
// TODO Use correct plurals: adjust numeral file format to include this.
|
|
|
|
// TODO Don't assume 26-letter alphabet, but allow for arbitrary character list.
|
|
|
|
#include <iostream>
|
|
#include <string>
|
|
#include <fstream>
|
|
#include <cstring>
|
|
#include "engine.hpp"
|
|
|
|
const int s_index = 's' - 'a';
|
|
|
|
engine::engine(const std::string& start,
|
|
std::vector<std::string>&& numerals,
|
|
int maxiter,
|
|
robinsonizer_mode mode,
|
|
unsigned int verbose) :
|
|
_start {start},
|
|
_numerals {numerals},
|
|
_maxiter {maxiter},
|
|
_mode {mode},
|
|
_verbosity_level {verbose}
|
|
{
|
|
// setup random distribution
|
|
_dist.param(std::uniform_int_distribution<>::param_type {0, static_cast<int>(_numerals.size()) - 1});
|
|
|
|
// now we know how many numerals we have, so we can allocate our efficient buffers
|
|
unsigned int n;
|
|
for (n = 0; n < _numerals.size(); ++n) {
|
|
std::vector<int> vec;
|
|
vec.resize(26, 0);
|
|
_frequencies.push_back(vec);
|
|
_freq.push_back(_frequencies.back().data());
|
|
}
|
|
_start_freq.resize(26, 0);
|
|
_old.resize(26, 0);
|
|
_new.resize(26, 0);
|
|
_used.resize(26, 0);
|
|
|
|
// init numeral letter frequency table
|
|
for (n = 0; n < _numerals.size(); n++) {
|
|
for (char c : _numerals[n]) {
|
|
if (c >= 'a' && c <= 'z') {
|
|
++_freq[n][c - 'a'];
|
|
}
|
|
}
|
|
}
|
|
if (_verbosity_level > 1) {
|
|
numeral_frequencies(std::cerr);
|
|
}
|
|
|
|
// get letter frequencies from sentence start
|
|
for (char c : start) {
|
|
// translate upper- to lowercase
|
|
const char up_lo_dif = 'a' - 'A';
|
|
if (c >= 'A' && c <= 'Z') {
|
|
c += up_lo_dif;
|
|
}
|
|
// skip non-alphabetical chars
|
|
if (c >= 'a' && c <= 'z') {
|
|
++_start_freq[c - 'a'];
|
|
}
|
|
}
|
|
// add letters from 'and', reset 'and' vector
|
|
for (n = 0; n < 26; n++) {
|
|
_start_freq[n] += _freq[0][n];
|
|
_freq[0][n] = 0;
|
|
}
|
|
// build 'used' table for correct autogram seeding
|
|
for (n = 0; n < 26; ++n) {
|
|
_used[n] = _start_freq[n];
|
|
}
|
|
for (n = 0; n < _numerals.size(); ++n) {
|
|
for (int i = 0; i < 26; ++i) {
|
|
_used[i] += _freq[n][i];
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
void engine::run()
|
|
{
|
|
auto prev = _old.data();
|
|
auto next = _new.data();
|
|
auto freq = _freq.data();
|
|
auto start = _start_freq.data();
|
|
|
|
do {
|
|
// setup
|
|
int num_iter = 0;
|
|
unsigned int k, n;
|
|
|
|
// create random seed vector
|
|
switch (_mode) {
|
|
case robinsonizer_mode::pangram:
|
|
case robinsonizer_mode::strict_autogram:
|
|
for (n = 0; n < 26; n++) {
|
|
prev[n] = _dist(_random_engine);
|
|
}
|
|
break;
|
|
case robinsonizer_mode::lax_autogram:
|
|
for (n = 0; n < 26; ++n) {
|
|
if (_used[n]) { // only if letter occurs in numerals or sentence start
|
|
prev[n] = _dist(_random_engine);
|
|
} else {
|
|
prev[n] = 0;
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
#if DEBUG
|
|
// logging, if desired
|
|
if (_verbosity_level > 1) {
|
|
frequencies(std::cerr, const_cast<const int*>(prev));
|
|
}
|
|
#endif
|
|
|
|
do {
|
|
// setup
|
|
num_iter++;
|
|
_total_iterations++;
|
|
std::memcpy(next, start, 26 * sizeof(int));
|
|
|
|
// count letters in resulting sentence by incrementing result freqmap elements
|
|
for (n = 0; n < 26; n++) {
|
|
#if DEBUG
|
|
if (static_cast<unsigned int>(prev[n]) < _numerals.size()) {
|
|
#endif
|
|
auto p = freq[prev[n]];
|
|
for (k = 0; k < 26; k++) {
|
|
next[k] += p[k];
|
|
}
|
|
#if DEBUG
|
|
} else {
|
|
char c = 'a' + n;
|
|
std::cerr << std::endl << "overflow: " << c << " (" << prev[n] << ")";
|
|
break;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
// increment frequency for 's' for every letter which occurs more than once,
|
|
// and increment the count for every letter which is (or should be) mentioned
|
|
for (n = 0; n < 26; n++) {
|
|
switch (_mode) {
|
|
case robinsonizer_mode::pangram:
|
|
++next[n];
|
|
break;
|
|
case robinsonizer_mode::strict_autogram:
|
|
if (next[n]) {
|
|
++next[n];
|
|
}
|
|
break;
|
|
case robinsonizer_mode::lax_autogram:
|
|
if (prev[n]) {
|
|
++next[n];
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
if (next[n] > 1) {
|
|
++next[s_index];
|
|
}
|
|
}
|
|
|
|
#if DEBUG
|
|
// debug output, only if verbosity level is high enough
|
|
if (_verbosity_level > 1) {
|
|
write_result(std::cerr);
|
|
std::cerr << std::endl;
|
|
}
|
|
#endif
|
|
|
|
// test if our result equals the previous one (if so, we found a valid sentence)
|
|
_found = std::memcmp(next, prev, 26 * sizeof(int)) == 0;
|
|
|
|
if (_found) {
|
|
break;
|
|
}
|
|
if (num_iter == _maxiter) {
|
|
break;
|
|
} else {
|
|
std::memcpy(prev, next, 26 * sizeof(int));
|
|
}
|
|
|
|
} while (true);
|
|
|
|
} while (!_found);
|
|
}
|
|
|
|
void engine::frequencies(std::ostream& os, const int fm[]) const
|
|
{
|
|
bool output = false;
|
|
for (unsigned int n = 0; n < 26; n++) {
|
|
if (fm[n]) {
|
|
if (output) {
|
|
os << ", ";
|
|
}
|
|
char c = n + 'a';
|
|
os << c << " (" << fm[n] << ")";
|
|
output = true;
|
|
}
|
|
}
|
|
os << std::endl;
|
|
}
|
|
|
|
void engine::numeral_frequencies(std::ostream& os) const
|
|
{
|
|
for (unsigned int i = 0; i < _numerals.size(); i++) {
|
|
os << _numerals[i] << ": ";
|
|
frequencies(os, _freq[i]);
|
|
}
|
|
}
|
|
|
|
void engine::write_result(std::ostream& os) const
|
|
{
|
|
os << _start << " ";
|
|
unsigned int n;
|
|
unsigned int first = 0;
|
|
unsigned int last = 25;
|
|
bool first_found = false;
|
|
for (n = 0; n < 26; n++) { // pre-scan
|
|
if (_new[n]) {
|
|
if (!first_found) {
|
|
first = n;
|
|
first_found = true;
|
|
}
|
|
last = n;
|
|
}
|
|
}
|
|
for (n = 0; n < 26; n++) {
|
|
if (_new[n]) {
|
|
if (n == last) {
|
|
os << " " << _numerals[0] << " ";
|
|
} else if (n > first) {
|
|
os << ", ";
|
|
}
|
|
char c = n + 'a';
|
|
os << _numerals[_new[n]] << " " << c;
|
|
if (_new[n] > 1) {
|
|
os << "'s";
|
|
}
|
|
}
|
|
}
|
|
os << ".";
|
|
}
|
|
|
|
std::ostream& operator<<(std::ostream& os, const engine& engine) {
|
|
engine.write_result(os);
|
|
return os;
|
|
}
|