Hello community,
I’ve been interested in this fabulous MonteCarloTreeSearch algorithm for a few months now.
I have read several documents like Medium MCTS, Wiki MCTS.
And in particular this github project: GitHub MCTS.
The project that seems the easiest to attack directly with MCTS is the Ultimate TicTacToe challenge.
The Monte Carlo Tree Search method has 4 key steps:
- The best-child
- The expand
- The rollout
- The back-propagate
In the best-child, we choose the best child or an undervalued child.
In the expand, a child is created from the selected child.
Here are the 4 graphics.
My current problem is in the best-child stage. In this step, I don’t understand how to create brothers / sisters. Indeed the graph always shows 1 or more children. Then in the expand step, we always create 1 child of the selected one.
(For exemple this is my output that I have actually ):
child._parent <main.MonteCarloTreeSearch object at 0x7f9caf3809d0>
child <main.MonteCarloTreeSearch object at 0x7f9caf380990>child._parent <main.MonteCarloTreeSearch object at 0x7f9caf380990>
child <main.MonteCarloTreeSearch object at 0x7f9caf380f90>child._parent <main.MonteCarloTreeSearch object at 0x7f9caf380f90>
child <main.MonteCarloTreeSearch object at 0x7f9caf380b90>child._parent <main.MonteCarloTreeSearch object at 0x7f9caf380b90>
child <main.MonteCarloTreeSearch object at 0x7f9caf380e90>child._parent <main.MonteCarloTreeSearch object at 0x7f9caf380e90>
child <main.MonteCarloTreeSearch object at 0x7f9caf380cd0>
I don’t see how the creation of the brothers / sisters goes.
In the picture from Wiki, in the next best_child, how selected best child 3/4 and create a brother of 0/1 ?
Another difficulty that I can see is in the creation of the children, I will also have to play a (random) move from my opponent. How do I keep the best-child from simply taking my opponent’s best bad shots?
I am sharing my code even though it already has something wrong with me in the beginning. I would really like to have some help with this best-child step and how to generate siblings.
import sys
import math
import random, copy
import numpy as np
NEITHER, MINE, OPP = -1 , 0 , 1
TYPE_TURN = { 0 : 'MINE' , 1 : 'OPP' }
f_row = lambda obj : obj.param[0]
f_col = lambda obj : obj.param[1]
f_prop = lambda obj : obj.param[2]
f_win = lambda obj : obj[0]
f_all = lambda obj : obj[1]
TYPE_STR = {
(0,NEITHER) : ' ' , (1,NEITHER) : ' ', (2,NEITHER) : ' ' ,
(3,NEITHER) : ' ' , (4,NEITHER) : ' ', (5,NEITHER) : ' ' ,
(6,NEITHER) : ' ' , (7,NEITHER) : ' ', (8,NEITHER) : ' ' ,
(0,MINE) : ' X X ' , (1,MINE) : ' X ', (2,MINE) : ' X X ' ,
(3,MINE) : ' X X ' , (4,MINE) : ' X ', (5,MINE) : ' X X ' ,
(6,MINE) : ' X X ' , (7,MINE) : ' X ', (8,MINE) : ' X X ' ,
(0,OPP) : ' O ' , (1,OPP) : ' O O ', (2,OPP) : ' O ' ,
(3,OPP) : ' O ' , (4,OPP) : ' O O ', (5,OPP) : ' O ' ,
(6,OPP) : ' O ' , (7,OPP) : ' O O ', (8,OPP) : ' O ' ,
}
class Neuron():
def __init__(self,clone):
if clone is not None:
self.param = clone.param[:]
else:
self.param = [ NEITHER ] * 3
def __str__(self):
return f'{f_row(self)} {f_col(self)}'
@property
def owner(self):
return f_prop(self)
@owner.setter
def owner(self,owner):
self.param[2] = owner
def victory_cond( obj , inrow , incol ):
for r in range(3):
yield [ (r , c) for c in range(3) ]
for c in range(3):
yield [ (r , c) for r in range(3) ]
yield [ (i,i) for i in range(3) ]
yield [ (i, 2 - i) for i in range(3) ]
def victory_check( obj , inrow , incol ):
for _ in victory_cond( obj , inrow , incol ):
if obj[ _[0] ].param[2] == obj[ _[1] ].param[2] == obj[ _[2] ].param[2] :
return obj[ _[0] ].param[2]
return NEITHER
def init_dict( obj , row , col ):
_ = Neuron(None)
_.param = [ row , col , NEITHER ]
obj[ ( row , col ) ] = _
def coord_bigToSmall( inrow , incol ):
row = ( inrow // 3 )
col = ( incol // 3 )
return ( row , col )
def coord_upAndLeft( inrow , incol ):
row = ( inrow // 3 ) * 3
col = ( incol // 3 ) * 3
return ( row , col )
class TicTacToe():
def __init__(self,clone):
if clone is None :
self._secret, self._state, self._predict = {}, {}, []
for y_row in range(3):
for x_col in range(3):
init_dict( self._secret , y_row , x_col )
for y_row in range(9):
for x_col in range(9):
init_dict( self._state , y_row , x_col )
elif clone is not None :
self._secret, self._state, self._predict = {}, {}, []
for k1, _ in clone._secret.items():
self._secret[ k1 ] = Neuron(_)
for k1, _ in clone._state.items():
self._state[ k1 ] = Neuron(_)
def __str__(self):
out = ' 0 1 2 3 4 5 6 7 8 0 1 2 3 4 5 6 7 8\n0'
for dr in range(9):
for dc in range(3):
_ = f_prop(self._secret[(dr // 3),dc])
out = f'{out} {TYPE_STR[dr,_]}'
for dc in range(9):
_ = f_prop(self._state[dr,dc])
if _ == NEITHER : out = f'{out} '
if _ == MINE : out = f'{out} X'
if _ == OPP : out = f'{out} O'
out = f'{out}\n{ dr + 1}'
return out
def update( self , row , col , owner ):
self._state[ ( row , col ) ].param[2] = owner
small_row , small_col = coord_bigToSmall( row , col )
corner_row , corner_col = coord_upAndLeft( row , col )
if f_prop(self._secret[ small_row , small_col ]) != NEITHER: return NEITHER
_ = victory_check( self._state , corner_row , corner_col )
if _ == NEITHER: return NEITHER
self._secret[ small_row , small_col ].owner = _
_ = victory_check( self._secret , 0 , 0 )
return _
def is_game_over( self ):
_ = victory_check( self._secret , 0 , 0 )
return _
def possibility( self ):
_ = []
for k1, n1 in self._state.items():
if f_prop(n1) == NEITHER :
_.append( tuple(k1) )
return _
class MonteCarloTreeSearch():
def __init__(self,clone):
if clone is not None:
self._parent = clone
self._children = []
clone._children.append(self)
self._result = [ 0 , 0 ]
self._state = copy.copy(clone._state)
print(f'MonteCarloTreeSearch init from previous',file=sys.stderr)
else:
self._parent = None
self._children = []
self._result = [ 0 , 0 ]
self._state = None
@property
def n(self):
return f_all(self._result)
@property
def w(self):
return f_win(self._result)
def best_child(self, c_parameter ):
# Upper Confidence Bounds (UCB1) formula
_ = self
while len(_._children) > 0 :
weight = [
(c.w / c.n) + c_parameter * math.sqrt( (2 * math.log2(_.n) / c.n ) )
for c in _._children
]
print(f'weight {list(map(str,weight))}',file=sys.stderr)
_ = _._children[ np.argmax( weight ) ]
return _
def expand(self):
# SELECT NEXT/RANDOM ACTION
# COMPUTE STATE
child = MonteCarloTreeSearch(self)
print(f'child._parent {child._parent} \nchild {child} ',file=sys.stderr)
_ = child._state.possibility()
sel_row , sel_col = random.choice(_)
print(f'expand: row {sel_row} col {sel_col}',file=sys.stderr)
child._state.update( sel_row , sel_col , MINE )
print(f'{child._state}',file=sys.stderr)
child._state._predict.append( child._state._state[ sel_row, sel_col ] )
return child
def randomseq(self):
#print("START >>> randomseq",file=sys.stderr)
#print(f'{self._state}',file=sys.stderr)
rollout_state = TicTacToe(self._state)
ROLLOUT_TURN = OPP
while rollout_state.is_game_over() == NEITHER :
_ = rollout_state.possibility()
sel_row , sel_col = random.choice(_)
#print(f'expand: row {sel_row} col {sel_col} turn {TYPE_TURN[ROLLOUT_TURN]}',file=sys.stderr)
rollout_state.update( sel_row , sel_col , ROLLOUT_TURN )
#print(f'{rollout_state}',file=sys.stderr)
ROLLOUT_TURN = MINE if ROLLOUT_TURN == OPP else OPP
#print(" END >>> randomseq",file=sys.stderr)
#print(f'{self._state}',file=sys.stderr)
if rollout_state.is_game_over() == MINE:
return [ 1 , 1 ]
else :
return [ 0 , 1 ]
def backpropagate(self, result):
self._result[0] = f_win(self._result) + f_win(result)
self._result[1] = f_all(self._result) + f_all(result)
_ = self._parent
while _ is not None :
_._result[0] = f_win(_._result) + f_win(result)
_._result[1] = f_all(_._result) + f_all(result)
_ = _._parent
__mine__ = TicTacToe(None)
# FIRST TURN
_ = [int(i) for i in input().split()] + [OPP]
if _[0] != -1 :
__mine__.update( _[0] , _[1] , _[2] )
# MONTE-CARLO TREE SEARCH INITIALIZATION
__mine__mcts__ = MonteCarloTreeSearch(None)
__mine__mcts__._state = __mine__
for i in range(5):
__children__ = __mine__mcts__.best_child( 1.4 )
__children__ = __children__.expand()
_ = __children__.randomseq()
print(f'result randomseq {list(map(str,_))}',file=sys.stderr)
__children__.backpropagate( _ )
possibility = []
for i in range(int(input())):
_ = [int(j) for j in input().split()]
possibility.append( tuple(_) )
if (4, 4) in possibility:
__mine__.update( 4 , 4 , MINE )
print( __mine__._state[ (4, 4) ] )
else:
sel_row , sel_col = random.choice(possibility)
__mine__.update( sel_row , sel_col , MINE )
print( __mine__._state[ (sel_row, sel_col) ] )
# AFTER FIRST TURN
while True:
_ = [int(i) for i in input().split()] + [OPP]
__mine__.update( _[0] , _[1] , _[2] )
possibility = []
for i in range(int(input())):
_ = [int(j) for j in input().split()]
possibility.append( tuple(_) )
sel_row , sel_col = -1 , -1
sel1_row , sel1_col = random.choice(possibility)
m1 = copy.copy( __mine__ )
sel2_row , sel2_col = random.choice(possibility)
m2 = copy.copy( __mine__ )
if m1.update( 4 , 4 , MINE ) == MINE :
sel_row , sel_col = sel1_row , sel1_col
elif m2.update( 4 , 4 , MINE ) == MINE :
sel_row , sel_col = sel2_row , sel2_col
else:
sel_row , sel_col = random.choice(possibility)
__mine__.update( sel_row , sel_col , MINE )
print( __mine__._state[ (sel_row, sel_col) ] )
Thank you for your answers