Implementation
Let’s create an implementation of the MENACE player algorithm in Go. First we’ll create some code to represent the playing board/noughts and crosses grid:
package menace
import (
"fmt"
"strconv"
"strings"
)
const X = 'X'
const O = 'O'
const EMPTY = ' '
const DRAW = 'Z'
const NOBODY = EMPTY
type board struct {
board [9]rune
}
func newBoard() board {
return board{
board: [9]rune{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '},
}
}
// Get board string for displaying
func (b *board) rawString() string {
return fmt.Sprintf("\n 0 | 1 | 2 %c | %c | %c\n"+
"---+---+--- ---+---+---\n"+
" 3 | 4 | 5 %c | %c | %c\n"+
"---+---+--- ---+---+---\n"+
" 6 | 7 | 8 %c | %c | %c", b.board[0], b.board[1], b.board[2],
b.board[3], b.board[4], b.board[5],
b.board[6], b.board[7], b.board[8])
}
func (b *board) validMove(move string) bool {
m, err := strconv.ParseUint(move, 10, 0)
if err != nil {
return false
}
if m < 9 && b.board[m] == ' ' {
return true
}
return false
}
// Check if game is a draw
func (b *board) draw() bool {
for _, e := range b.board {
if e == ' ' {
return false
}
}
return true
}
func (b *board) playMove(position int, marker rune) {
b.board[position] = marker
}
func (b *board) toString() string {
strBoard := make([]string, len(b.board))
for i, r := range b.board {
strBoard[i] = string(r)
}
return strings.Join(strBoard, "")
}
func (b *board) winning(symbol rune) bool {
tl, c, br := false, false, false
if b.board[0] == symbol {
tl = (b.board[0] == b.board[1] && b.board[1] == b.board[2]) || (b.board[0] == b.board[3] && b.board[3] == b.board[6]) || (b.board[0] == b.board[4] && b.board[4] == b.board[8])
}
if b.board[4] == symbol {
c = (b.board[1] == b.board[4] && b.board[4] == b.board[7]) || (b.board[3] == b.board[4] && b.board[4] == b.board[5]) || (b.board[2] == b.board[4] && b.board[4] == b.board[6])
}
if b.board[8] == symbol {
br = (b.board[2] == b.board[5] && b.board[5] == b.board[8]) || (b.board[6] == b.board[7] && b.board[7] == b.board[8])
}
return tl || c || br
}
func (b *board) winner() (rune, bool) {
tl, c, br := false, false, false
winner := DRAW
if b.board[0] != EMPTY {
tl = (b.board[0] == b.board[1] && b.board[1] == b.board[2]) || (b.board[0] == b.board[3] && b.board[3] == b.board[6]) || (b.board[0] == b.board[4] && b.board[4] == b.board[8])
if tl {
winner = b.board[0]
}
}
if b.board[4] != EMPTY {
c = (b.board[1] == b.board[4] && b.board[4] == b.board[7]) || (b.board[3] == b.board[4] && b.board[4] == b.board[5]) || (b.board[2] == b.board[4] && b.board[4] == b.board[6])
if c {
winner = b.board[4]
}
}
if b.board[8] != EMPTY {
br = (b.board[2] == b.board[5] && b.board[5] == b.board[8]) || (b.board[6] == b.board[7] && b.board[7] == b.board[8])
if br {
winner = b.board[8]
}
}
if winner == DRAW {
for _, symbol := range b.board {
if symbol == EMPTY {
winner = NOBODY
break
}
}
}
return winner, (tl || c || br || winner == DRAW)
}
Next lets define an interface for a Player:
package menace
type player interface {
getName() string
getSymbol() rune
startGame()
getMove(board) int
winGame()
drawGame()
loseGame()
printProbability(board)
}
Now we’ll create an implementation of the algorithm
package menace
import (
"fmt"
"math/rand"
"strings"
"time"
)
/*******************/
/** MENACE Player **/
/*******************/
type move_played struct {
board string
bead int
}
type menace_player struct {
matchboxes map[string][]int
moves_played []move_played
wins int
draws int
losses int
name string
symbol rune
r *rand.Rand
debug bool
}
func newMenacePlayer(name string, symbol rune, debug bool) menace_player {
s1 := rand.NewSource(time.Now().UnixNano())
r := rand.New(s1)
return menace_player{
matchboxes: make(map[string][]int),
wins: 0,
draws: 0,
losses: 0,
moves_played: []move_played{},
name: name,
r: r,
debug: debug,
symbol: symbol,
}
}
func makeNewMatchbox(board string) []int {
matchbox := []int{}
for i, c := range strings.Split(board, "") {
if c == " " {
matchbox = append(matchbox, i)
}
}
// Add more beads to initial boards
var amount int = (len(matchbox) + 2) / 2
filledMatchbox := []int{}
for i := 0; i < amount; i++ {
filledMatchbox = append(filledMatchbox, matchbox...)
}
return filledMatchbox
}
func (player *menace_player) getName() string {
return player.name
}
func (player *menace_player) getSymbol() rune {
return player.symbol
}
func (player *menace_player) startGame() {
player.moves_played = []move_played{}
}
/*
Find board in matchboxes and choose a bead.
If the matchbox is empty, return -1 (resign)
*/
func (player *menace_player) getMove(b board) int {
board := b.toString()
if player.debug {
fmt.Printf("[%s] Contemplating board [%s]\n", player.name, board)
}
// Check if we have this board and initialise if not
if player.matchboxes[board] == nil {
if player.debug {
fmt.Printf("[%s] Make new matchbox for board [%s]\n", player.name, board)
}
player.matchboxes[board] = makeNewMatchbox(board)
}
beads := player.matchboxes[board]
var bead int
if len(beads) > 0 {
bead = beads[player.r.Intn(len(beads))]
if player.debug {
fmt.Printf("[%s] Chose bead %d from %v\n", player.name, bead, beads)
}
player.moves_played = append(player.moves_played, move_played{board: board, bead: bead})
} else {
bead = RESIGN
}
return bead
}
func (player *menace_player) winGame() {
// Menace won so add 3 beads to each move played
for _, move := range player.moves_played {
player.matchboxes[move.board] = append(player.matchboxes[move.board], move.bead, move.bead, move.bead)
if player.debug {
fmt.Printf("[%s] Win - I added 3 beads of %d to [%s]\n", player.name, move.bead, move.board)
}
}
player.wins++
if player.debug {
fmt.Printf("[%s] I won\n", player.name)
}
}
func (player *menace_player) drawGame() {
// Menace drew so add 1 bead to each move played
for _, move := range player.moves_played {
player.matchboxes[move.board] = append(player.matchboxes[move.board], move.bead)
if player.debug {
fmt.Printf("[%s] Draw - I added 1 bead of %d to [%s]\n", player.name, move.bead, move.board)
}
}
player.draws++
}
func (player *menace_player) loseGame() {
// Menace lost so remove a bead from each move played
for _, move := range player.moves_played {
found := 0
for i, bead := range player.matchboxes[move.board] {
if bead == move.bead {
found = i
break
}
}
matchbox := player.matchboxes[move.board]
// Copy last element over element to remove
matchbox[found] = matchbox[len(matchbox)-1]
// Truncate
matchbox = matchbox[:len(matchbox)-1]
// Don't remove all beads
if len(matchbox) > 0 {
player.matchboxes[move.board] = matchbox
}
if player.debug {
fmt.Printf("[%s] Lose - I removed 1 bead of %d from [%s]\n", player.name, move.bead, move.board)
}
}
player.losses++
if player.debug {
fmt.Printf("[%s] I lost\n", player.name)
}
}
func (player *menace_player) printStats() {
fmt.Printf("[%s] Have learnt %d boards\n", player.name, len(player.matchboxes))
fmt.Printf("[%s] Won: %d Drawn: %d Lost %d\n", player.name, player.wins, player.draws, player.losses)
}
func (player *menace_player) printProbability(b board) {
board := b.toString()
matchbox := player.matchboxes[board]
if matchbox == nil {
fmt.Printf("[%s] Never seen this board before [%s]\n", player.name, board)
} else {
freq := make(map[int]int)
depiction := [9]int{}
for i := 0; i < len(matchbox); i++ {
num := matchbox[i]
freq[num] = freq[num] + 1
depiction[num] = depiction[num] + 1
}
fmt.Printf("[%s] Stats for this board: %v\n", player.name, freq)
fmt.Printf(
"\n\t %3d | %3d | %3d \n"+
"\t-----+-----+-----\n"+
"\t %3d | %3d | %3d \n"+
"\t-----+-----+-----\n"+
"\t %3d | %3d | %3d \n\n",
depiction[0], depiction[1], depiction[2], depiction[3], depiction[4], depiction[5], depiction[6], depiction[7], depiction[8],
)
}
}
First results
Using this implementation we can actually pit 2 instances of menace_player
against each other. We’ll simply create a game loop which gets and plays a move from each player in turn until the game is ended. We can then do this multiple times to see if what happens. Let’s try 1000 iterations first.
[Menace 1] Have learnt 1167 boards
[Menace 1] Won: 598 Drawn: 133 Lost 269
[Menace 2] Have learnt 1012 boards
[Menace 2] Won: 269 Drawn: 133 Lost 598
This clearly shows that Menace 1 who goes first has a clear advantage over the Menace 2. If we look at the graph of this plotting the cumulative of wins – losses and draws for Menace 1 we can see a clear trend.
Problems
To see if Menace really was learning I created an implementation of player
to allow human interaction.
package menace
import (
"bufio"
"fmt"
"os"
"strconv"
"strings"
)
const RESIGN = -1
const NONE = -2
type player interface {
getName() string
getSymbol() rune
startGame()
getMove(board) int
winGame()
drawGame()
loseGame()
printProbability(board)
}
/*******************/
/** Human Player **/
/*******************/
type human_player struct {
moves_played []move_played
wins int
draws int
losses int
symbol rune
}
func newHumanPlayer(symbol rune) human_player {
return human_player{symbol: symbol}
}
func (player *human_player) getName() string {
return "Human"
}
func (player *human_player) getSymbol() rune {
return player.symbol
}
func (p *human_player) startGame() {
p.moves_played = []move_played{}
fmt.Print("Get ready!\n\n\n")
}
func (p *human_player) getMove(b board) int {
reader := bufio.NewReader(os.Stdin)
var move string
var err error
for {
fmt.Print("Make a move: ")
move, err = reader.ReadString('\n')
if err == nil {
move = strings.TrimSuffix(move, "\n")
if b.validMove(move) {
break
}
}
fmt.Println("Not a valid move")
}
moveInt, _ := strconv.ParseInt(move, 10, 0)
return int(moveInt)
}
func (p *human_player) winGame() {
fmt.Println("You won!")
p.wins++
}
func (p *human_player) drawGame() {
fmt.Println("It's a draw")
p.draws++
}
func (p *human_player) loseGame() {
fmt.Println("You lost")
p.losses++
}
func (player *human_player) printProbability(b board) {
// Not required
}
func (player *human_player) printStats() {
fmt.Printf("[%s] Won: %d Drawn: %d Lost %d\n", "Human", player.wins, player.draws, player.losses)
}
I then re-ran the program with 1000 iterations of Menace 1 against Menace 2 and then started a loop with Menace 1 against a human player (me). For the empty grid the top 3 opening positions Menace 1 had were 764 “beads” for the centre square, 384 for the top left, and 228 for the bottom right. Since its choices are random it decided to open with the top left corner. The play proceeded as follows:
- I play the centre
- Menace plays top middle
- I play top right to block
- Menace chooses middle left
- I play bottom left to win!
In Michie’s original tournament against MENACE the games began to draw consistently after 20 games. However, Michie was not also learning the game – he used the optimal strategy and so MENACE was learning much faster. Clearly pitting 2 MENACEs against each other is going to produce non-optimal results.
Playing Optimally
In order to recreate Michie’s findings I decided to play against my Menace code for several games. What I found was not what I expected and was disappointing. Out of 30 games I played Menace only managed to draw against me 4 times and it was never consistent. Something wasn’t quite right with how Menace was supposed to learn. In the next article we’ll have a look at what might be wrong and how we can make Menace a better learner.