ナンクロを解く(2) 解答 - のんびり読書日記

前回からちょっと時間が経ってしまいましたが、ナンクロ解きの続きです。前回はナンクロ用の辞書をDBMに保存するところまで作りましたので、今回は実際に問題を解かせたいと思います。

まず入力する問題の形式ですが、以下の書式を使用しています。

numbers: 5
width: 5
height: 5
table: 2 4 1 3 4 4 0 2 4 0 2 2 0 2 5 0 5 3 0 1 1 3 2 5 3
assign: 1:と 2:き

各項目は以下を表しています。

numbers: 問題中の数字の数
width: 問題の横幅
height: 問題の縦幅
table: 各数字の並び一番上の行から行ごとにつなげてあります
assign: すでに分かっているカナ文字の割り当て

今回作成したコードは以下の通りです。制約充足問題を解くときの基本で、一番制約が強く働いている箇所、つまり一番解答候補数が少ないところから調べていくようにしています。もっと効率よくとけそうですが、とりあえず単純に作ってます。

#include <fstream>
#include <iostream>
#include <map>
#include <sstream>
#include <string>
#include <vector>
#include <tchdb.h>

using namespace std;

const string NOT_ASSIGNED = " ";
const string BLOCK        = "■";

typedef vector<vector<int> > Table;
typedef vector<int> Sequence;

int main(int, char **);
string make_pattern(const Sequence &);

class Dictionary {
private:
  TCHDB *hdb_;

public:
  Dictionary() { }

  void open_dbm(const string &filename) {
    hdb_ = tchdbnew();
    if (!tchdbopen(hdb_, filename.c_str(), HDBOREADER)) {
      int ecode = tchdbecode(hdb_);
      cerr << "[error] open error:%s" << tchdberrmsg(ecode) << endl;
      exit(1);
    }
  }

  void lookup(const string &pattern, vector<string> &words) {
    char *value = tchdbget2(hdb_, pattern.c_str()); 
    if (value == NULL) return;
    string word;
    stringstream ss(value);
    while (ss >> word) {
      words.push_back(word);
    }
    free(value);
  }
};

class Numcro {
private:
  int width_;
  int height_;
  int nkana_;
  Table table_;
  Dictionary dict_;
  vector<string> assign_;

  void _init_table() {
    table_.clear();
    for (int y = 0; y < height_; y++) {
      vector<int> vec;
      for (int x = 0; x < width_; x++) {
        vec.push_back(0);
      }
      table_.push_back(vec);
    }
  }

  void _init_assign() {
    assign_.push_back(BLOCK);
    for (int i =0; i < nkana_; i++) {
      assign_.push_back(NOT_ASSIGNED);
    }
  }

  void _get_sequences(vector<Sequence> &sequences) {
    // row
    for (int y = 0; y < height_; y++) {
      Sequence seq;
      for (int x = 0; x < width_; x++) {
        if (table_[y][x] != 0) {
          seq.push_back(table_[y][x]);
          if (x < width_-1) continue;
        }
        if (seq.size() > 1) {
          Sequence newseq(seq);
          sequences.push_back(newseq);
        }
        seq.clear();
      }
    }
    // column
    for (int x = 0; x < width_; x++) {
      Sequence seq;
      for (int y = 0; y < height_; y++) {
        if (table_[y][x] != 0) {
          seq.push_back(table_[y][x]);
          if (y < height_-1) continue;
        }
        if (seq.size() > 1) {
          Sequence newseq(seq);
          sequences.push_back(newseq);
        }
        seq.clear();
      }
    }
  }

  bool _solve_impl(const vector<Sequence> &sequences) {
    if (sequences.size() == 0) {
      print_table();
      return true;
    } else {
      vector<vector<string> > words_list(sequences.size());
      for (unsigned int i = 0; i < sequences.size(); i++) {
        string pattern = make_pattern(sequences[i]);
        dict_.lookup(pattern, words_list[i]);
      }

      int min_index = -1;
      int min_words = -1;
      for (unsigned int i = 0; i < sequences.size(); i++) {
        if (min_words < 0 || static_cast<int>(words_list[i].size()) < min_words) {
          min_words = words_list[i].size();
          min_index = i;
        }
      }
      Sequence seq = sequences[min_index];
      vector<Sequence> newsequences;
      for (unsigned int i = 0; i < sequences.size(); i++) {
        if (static_cast<int>(i) != min_index) newsequences.push_back(sequences[i]);
      }
      vector<string> words = words_list[min_index];

      for (unsigned int i = 0; i < words.size(); i++) {
        string word = words[i];
        vector<string> oldassign(assign_);
        bool not_match = false;
        for (unsigned int j = 0; j < word.size()/3; j++) {
          string kana = word.substr(j*3, 3);
          if (assign_[seq[j]] == NOT_ASSIGNED) {
            assign_[seq[j]] = kana;
          } else if (assign_[seq[j]] != kana) {
            not_match = true;
            break;
          }
        }
        if (!not_match && _solve_impl(newsequences)) {
          return true;
        }
        assign_ = oldassign;
      }
    }  
    return false;
  }

public:
  Numcro() {
    nkana_ = 0; 
    width_ = 0; 
    height_ = 0; 
  }

  void set_dictionary(const Dictionary &dict) {
    this->dict_ = dict; 
  }

  void read_question(const string &filename) {
    ifstream ifs(filename.c_str());
    string line;
    vector<int> table;
    map<int, string> pairs;
    while (getline(ifs, line)) {
      stringstream ss(line);
      string head;
      ss >> head;
      if (head == "numbers:") {
        ss >> nkana_;
      } else if (head == "width:") {
        ss >> width_;
      } else if (head == "height:") {
        ss >> height_;
      } else if (head == "table:") {
        int n;
        while (ss >> n) table.push_back(n);
      } else if (head == "assign:") {
        string pair;
        while (ss >> pair) {
          size_t pos = pair.find(":", 0);
          string number = pair.substr(0, pos);
          string kana = pair.substr(pos+1, pair.size()-pos);
          pairs[atoi(number.c_str())] = kana;
        }
      }
    } 
    if (nkana_ < 1 || width_ < 1 || height_ < 1
        || (int)table.size() != width_ * height_) {
      cerr << "Illegal format" << endl;
    }

    _init_table();
    _init_assign();
    for (int y = 0; y < height_; y++) {
      for (int x = 0; x < width_; x++) {
        table_[y][x] = table[x + y * width_];
      }
    }
    map<int, string>::iterator it = pairs.begin();
    while (it != pairs.end()) {
      assign_[it->first] = it->second;
      it++;
    }
  }

  bool solve() {
    vector<Sequence> sequences;
    _get_sequences(sequences);
    return _solve_impl(sequences);
  }

  void print_table() {
    stringstream ss;
    // table
    for (int y = 0; y < height_; y++) {
      for (int x = 0; x < width_; x++) {
        int number = table_[y][x];
        if (assign_[number] != NOT_ASSIGNED) {
          ss << assign_[number];
        } else {
          ss.width(2);
          ss << number;
        }
        if (x != width_-1) ss << " ";
      }
      ss << endl;
    }

    // assign
    for (unsigned int i = 1;  i < assign_.size(); i++) {
      ss << i << ":" << assign_[i];
      if (i % 10 == 0) {
        ss << endl;
      } else {
        ss << " ";
      }
    }
    ss << endl;
    cout << ss.str();
  }

};


string make_pattern(const Sequence &seq) {
  map<int, int> seqmap;
  map<int, int>::iterator it;
  stringstream ss;
  int cnt = 1;
  for (unsigned int i = 0; i < seq.size(); i++) {
    it = seqmap.find(seq[i]);
    if (it != seqmap.end()) {
      ss << it->second;
    } else {
      seqmap[seq[i]] = cnt;
      ss << cnt;
      cnt++;
    }
  }
  return ss.str();
}


int main(int argc, char **argv) {
  if (argc != 2) {
    cerr << "Usage: numcro question" << endl;
    exit(1);
  }
  Dictionary dict;
  dict.open_dbm("dic.hdb");
  Numcro numcro;
  numcro.set_dictionary(dict);
  numcro.read_question(argv[1]);
  cout << "Question:" << endl;
  numcro.print_table();
  cout << endl << "Answer:" << endl;
  numcro.solve();

  return 0;
}

実際に動かすとこんな感じ。問題はwikipediaのナンクロ説明ページに掲載されていたものを使用しています。

% g++ `tcucodec conf -l` numcro.cpp -o numcro
% ./numcro q.txt
Question:
き  4 と  3  4
 4 ■ き  4 ■
き き ■ き  5
■  5  3 ■ と
と  3 き  5  3
1:と 2:き 3:  4:  5:  

Answer:
き ん と う ん
ん ■ き ん ■
き き ■ き よ
■ よ う ■ と
と う き よ う
1:と 2:き 3:う 4:ん 5:よ

上記以外にも少し大きめの問題で解いてみたのですが、ちょっと遅いかな…。気が向いたらもう少し効率的な方法に改良したいと思います。

この手のパズルのプログラムを作っていていつも困るのが、世の中にはフリーのパズル問題がほとんど存在しないことです。大抵のパズルはパズル作家さんが作っているので、著作権の問題で勝手に問題を転載するわけにはいきません。

なので、問題を自動で作れるのが一番いいと思うのですが、どうすれば面白い問題が作れるのか？というのは結構難しい課題です。まず解くときのバックトラックの数を数えて難易度を判定したりする必要はありそうです。それ以外にも例えば、使用するワードを特定のジャンルに絞ることで楽しさを与えたり、あまり使用されないワードを使うことで難易度を上げたり、などなど。そもそも単純に問題を作るだけでも結構大変だと思いますが。

あと、人の日記で使用されているワードを使用して、ある文書集合に対して問題を作るっていうのはサービスとして面白くないかなぁ。解くときに分からなかったら、日記を読み返してワードを見つけたり。ただ問題を見て考えるだけじゃなくて、調べる楽しみも加わって面白くならないですかね。