Machine Learning with MENACE – part 2

Implementation

Let’s create an implementation of the MENACE player algorithm in Go. First we’ll create some code to represent the playing board/noughts and crosses grid:

package menace

import (
	"fmt"
	"strconv"
	"strings"
)

const X = 'X'
const O = 'O'
const EMPTY = ' '
const DRAW = 'Z'
const NOBODY = EMPTY

type board struct {
	board [9]rune
}

func newBoard() board {
	return board{
		board: [9]rune{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '},
	}
}

// Get board string for displaying
func (b *board) rawString() string {
	return fmt.Sprintf("\n 0 | 1 | 2     %c | %c | %c\n"+
		"---+---+---   ---+---+---\n"+
		" 3 | 4 | 5     %c | %c | %c\n"+
		"---+---+---   ---+---+---\n"+
		" 6 | 7 | 8     %c | %c | %c", b.board[0], b.board[1], b.board[2],
		b.board[3], b.board[4], b.board[5],
		b.board[6], b.board[7], b.board[8])
}

func (b *board) validMove(move string) bool {
	m, err := strconv.ParseUint(move, 10, 0)
	if err != nil {
		return false
	}
	if m < 9 && b.board[m] == ' ' {
		return true
	}

	return false
}

// Check if game is a draw
func (b *board) draw() bool {
	for _, e := range b.board {
		if e == ' ' {
			return false
		}
	}
	return true
}

func (b *board) playMove(position int, marker rune) {
	b.board[position] = marker
}

func (b *board) toString() string {
	strBoard := make([]string, len(b.board))
	for i, r := range b.board {
		strBoard[i] = string(r)
	}
	return strings.Join(strBoard, "")
}

func (b *board) winning(symbol rune) bool {
	tl, c, br := false, false, false

	if b.board[0] == symbol {
		tl = (b.board[0] == b.board[1] && b.board[1] == b.board[2]) || (b.board[0] == b.board[3] && b.board[3] == b.board[6]) || (b.board[0] == b.board[4] && b.board[4] == b.board[8])
	}
	if b.board[4] == symbol {
		c = (b.board[1] == b.board[4] && b.board[4] == b.board[7]) || (b.board[3] == b.board[4] && b.board[4] == b.board[5]) || (b.board[2] == b.board[4] && b.board[4] == b.board[6])
	}
	if b.board[8] == symbol {
		br = (b.board[2] == b.board[5] && b.board[5] == b.board[8]) || (b.board[6] == b.board[7] && b.board[7] == b.board[8])
	}

	return tl || c || br
}

func (b *board) winner() (rune, bool) {
	tl, c, br := false, false, false
	winner := DRAW

	if b.board[0] != EMPTY {
		tl = (b.board[0] == b.board[1] && b.board[1] == b.board[2]) || (b.board[0] == b.board[3] && b.board[3] == b.board[6]) || (b.board[0] == b.board[4] && b.board[4] == b.board[8])
		if tl {
			winner = b.board[0]
		}
	}
	if b.board[4] != EMPTY {
		c = (b.board[1] == b.board[4] && b.board[4] == b.board[7]) || (b.board[3] == b.board[4] && b.board[4] == b.board[5]) || (b.board[2] == b.board[4] && b.board[4] == b.board[6])
		if c {
			winner = b.board[4]
		}
	}
	if b.board[8] != EMPTY {
		br = (b.board[2] == b.board[5] && b.board[5] == b.board[8]) || (b.board[6] == b.board[7] && b.board[7] == b.board[8])
		if br {
			winner = b.board[8]
		}
	}

	if winner == DRAW {
		for _, symbol := range b.board {
			if symbol == EMPTY {
				winner = NOBODY
				break
			}
		}
	}

	return winner, (tl || c || br || winner == DRAW)
}

Next lets define an interface for a Player:

package menace

type player interface {
	getName() string
	getSymbol() rune
	startGame()
	getMove(board) int
	winGame()
	drawGame()
	loseGame()
	printProbability(board)
}

Now we’ll create an implementation of the algorithm

package menace

import (
	"fmt"
	"math/rand"
	"strings"
	"time"
)

/*******************/
/** MENACE Player **/
/*******************/
type move_played struct {
	board string
	bead  int
}

type menace_player struct {
	matchboxes   map[string][]int
	moves_played []move_played
	wins         int
	draws        int
	losses       int
	name         string
	symbol rune
	r            *rand.Rand
	debug        bool
}

func newMenacePlayer(name string, symbol rune, debug bool) menace_player {
	s1 := rand.NewSource(time.Now().UnixNano())
	r := rand.New(s1)

	return menace_player{
		matchboxes:   make(map[string][]int),
		wins:         0,
		draws:        0,
		losses:       0,
		moves_played: []move_played{},
		name:         name,
		r:            r,
		debug:        debug,
		symbol: symbol,
	}
}

func makeNewMatchbox(board string) []int {
	matchbox := []int{}
	for i, c := range strings.Split(board, "") {
		if c == " " {
			matchbox = append(matchbox, i)
		}
	}
	// Add more beads to initial boards
	var amount int = (len(matchbox) + 2) / 2
	filledMatchbox := []int{}
	for i := 0; i < amount; i++ {
		filledMatchbox = append(filledMatchbox, matchbox...)
	}
	return filledMatchbox
}

func (player *menace_player) getName() string {
	return player.name
}

func (player *menace_player) getSymbol() rune {
	return player.symbol
}

func (player *menace_player) startGame() {
	player.moves_played = []move_played{}
}

/*
Find board in matchboxes and choose a bead.
If the matchbox is empty, return -1 (resign)
*/
func (player *menace_player) getMove(b board) int {
	board := b.toString()

	if player.debug {
		fmt.Printf("[%s] Contemplating board [%s]\n", player.name, board)
	}

	// Check if we have this board and initialise if not
	if player.matchboxes[board] == nil {
		if player.debug {
			fmt.Printf("[%s] Make new matchbox for board [%s]\n", player.name, board)
		}
		player.matchboxes[board] = makeNewMatchbox(board)
	}

	beads := player.matchboxes[board]

	var bead int

	if len(beads) > 0 {
		bead = beads[player.r.Intn(len(beads))]
		if player.debug {
			fmt.Printf("[%s] Chose bead %d from %v\n", player.name, bead, beads)
		}
		player.moves_played = append(player.moves_played, move_played{board: board, bead: bead})
	} else {
		bead = RESIGN
	}

	return bead
}

func (player *menace_player) winGame() {
	// Menace won so add 3 beads to each move played
	for _, move := range player.moves_played {
		player.matchboxes[move.board] = append(player.matchboxes[move.board], move.bead, move.bead, move.bead)
		if player.debug {
			fmt.Printf("[%s] Win - I added 3 beads of %d to [%s]\n", player.name, move.bead, move.board)
		}
	}
	player.wins++

	if player.debug {
		fmt.Printf("[%s] I won\n", player.name)
	}
}

func (player *menace_player) drawGame() {
	// Menace drew so add 1 bead to each move played
	for _, move := range player.moves_played {
		player.matchboxes[move.board] = append(player.matchboxes[move.board], move.bead)
		if player.debug {
			fmt.Printf("[%s] Draw - I added 1 bead of %d to [%s]\n", player.name, move.bead, move.board)
		}
	}
	player.draws++

}

func (player *menace_player) loseGame() {
	// Menace lost so remove a bead from each move played
	for _, move := range player.moves_played {
		found := 0
		for i, bead := range player.matchboxes[move.board] {
			if bead == move.bead {
				found = i
				break
			}
		}

		matchbox := player.matchboxes[move.board]
		// Copy last element over element to remove
		matchbox[found] = matchbox[len(matchbox)-1]
		// Truncate
		matchbox = matchbox[:len(matchbox)-1]

		// Don't remove all beads
		if len(matchbox) > 0 {
			player.matchboxes[move.board] = matchbox
		}
		if player.debug {
			fmt.Printf("[%s] Lose - I removed 1 bead of %d from [%s]\n", player.name, move.bead, move.board)
		}
	}
	player.losses++

	if player.debug {
		fmt.Printf("[%s] I lost\n", player.name)
	}
}

func (player *menace_player) printStats() {
	fmt.Printf("[%s] Have learnt %d boards\n", player.name, len(player.matchboxes))
	fmt.Printf("[%s] Won: %d Drawn: %d Lost %d\n", player.name, player.wins, player.draws, player.losses)
}

func (player *menace_player) printProbability(b board) {
	board := b.toString()
	matchbox := player.matchboxes[board]
	if matchbox == nil {
		fmt.Printf("[%s] Never seen this board before [%s]\n", player.name, board)
	} else {
		freq := make(map[int]int)

		depiction := [9]int{}

		for i := 0; i < len(matchbox); i++ {
			num := matchbox[i]
			freq[num] = freq[num] + 1
			depiction[num] = depiction[num] + 1
		}
		fmt.Printf("[%s] Stats for this board: %v\n", player.name, freq)
		fmt.Printf(
			"\n\t %3d | %3d | %3d \n"+
				"\t-----+-----+-----\n"+
				"\t %3d | %3d | %3d \n"+
				"\t-----+-----+-----\n"+
				"\t %3d | %3d | %3d \n\n",
			depiction[0], depiction[1], depiction[2], depiction[3], depiction[4], depiction[5], depiction[6], depiction[7], depiction[8],
		)
	}
}

First results

Using this implementation we can actually pit 2 instances of menace_player against each other. We’ll simply create a game loop which gets and plays a move from each player in turn until the game is ended. We can then do this multiple times to see if what happens. Let’s try 1000 iterations first.

[Menace 1] Have learnt 1167 boards
[Menace 1] Won: 598 Drawn: 133 Lost 269
[Menace 2] Have learnt 1012 boards
[Menace 2] Won: 269 Drawn: 133 Lost 598

This clearly shows that Menace 1 who goes first has a clear advantage over the Menace 2. If we look at the graph of this plotting the cumulative of wins – losses and draws for Menace 1 we can see a clear trend.

Problems

To see if Menace really was learning I created an implementation of player to allow human interaction.

package menace

import (
	"bufio"
	"fmt"
	"os"
	"strconv"
	"strings"
)

const RESIGN = -1
const NONE = -2

type player interface {
	getName() string
	getSymbol() rune
	startGame()
	getMove(board) int
	winGame()
	drawGame()
	loseGame()
	printProbability(board)
}

/*******************/
/** Human Player  **/
/*******************/

type human_player struct {
	moves_played []move_played
	wins         int
	draws        int
	losses       int
	symbol rune
}

func newHumanPlayer(symbol rune) human_player {
	return human_player{symbol: symbol}
}

func (player *human_player) getName() string {
	return "Human"
}

func (player *human_player) getSymbol() rune {
	return player.symbol
}

func (p *human_player) startGame() {
	p.moves_played = []move_played{}
	fmt.Print("Get ready!\n\n\n")
}

func (p *human_player) getMove(b board) int {
	reader := bufio.NewReader(os.Stdin)

	var move string
	var err error
	for {
		fmt.Print("Make a move: ")
		move, err = reader.ReadString('\n')
		if err == nil {
			move = strings.TrimSuffix(move, "\n")
			if b.validMove(move) {
				break
			}
		}
		fmt.Println("Not a valid move")
	}

	moveInt, _ := strconv.ParseInt(move, 10, 0)
	return int(moveInt)
}

func (p *human_player) winGame() {
	fmt.Println("You won!")
	p.wins++
}

func (p *human_player) drawGame() {
	fmt.Println("It's a draw")
	p.draws++
}

func (p *human_player) loseGame() {
	fmt.Println("You lost")
	p.losses++
}

func (player *human_player) printProbability(b board) {
	// Not required
}

func (player *human_player) printStats() {
	fmt.Printf("[%s] Won: %d Drawn: %d Lost %d\n", "Human", player.wins, player.draws, player.losses)
}

I then re-ran the program with 1000 iterations of Menace 1 against Menace 2 and then started a loop with Menace 1 against a human player (me). For the empty grid the top 3 opening positions Menace 1 had were 764 “beads” for the centre square, 384 for the top left, and 228 for the bottom right. Since its choices are random it decided to open with the top left corner. The play proceeded as follows:

  1. I play the centre
  2. Menace plays top middle
  3. I play top right to block
  4. Menace chooses middle left
  5. I play bottom left to win!

In Michie’s original tournament against MENACE the games began to draw consistently after 20 games. However, Michie was not also learning the game – he used the optimal strategy and so MENACE was learning much faster. Clearly pitting 2 MENACEs against each other is going to produce non-optimal results.

Playing Optimally

In order to recreate Michie’s findings I decided to play against my Menace code for several games. What I found was not what I expected and was disappointing. Out of 30 games I played Menace only managed to draw against me 4 times and it was never consistent. Something wasn’t quite right with how Menace was supposed to learn. In the next article we’ll have a look at what might be wrong and how we can make Menace a better learner.

Leave a Reply

Your email address will not be published. Required fields are marked *

WordPress Cookie Notice by Real Cookie Banner