//Uct::doPlayout ... if (! tree_->actNode()->hasChildren()) { if (tree_->actNode()->getDepth() < UCT_MAX_DEPTH - 1) { if (tree_->actNode()->isMature()) { //actNode is about to be expanded ...
if step is not expanded then normal Tree::uctDescend is performed
...
tree_->uctDescend();
uctDescends_++;
Step step = tree_->actNode()->getStep();
//perform the step and try commit
if (playBoard->makeStepTryCommit(step) ) {
//commit was successful - check whether winning criteria are reached already
if (playBoard->getWinner() != NO_PLAYER ) {
tree_->updateHistory(WINNER_TO_VALUE(playBoard->getWinner()));
break; //winner is known already in the uct tree -> no need to go deeper
}
}
...
//end Uct::doPlayout
Following snippets are connected to node expansion.
tactics stuff: goal check is performed and move advisor filled
...
//actNode is about to be expanded
Move move;
//static check for goal
if (playBoard->getStepCount() == 0 && playBoard->goalCheck(&move)){
assert(playBoard->getWinner() == NO_PLAYER );
float value = WINNER_TO_VALUE(playBoard->getPlayerToMove());
//limited expansion
tree_->expandNodeLimited(tree_->actNode(), move);
//descend to the expanded area
while (tree_->actNode() && tree_->actNode()->hasChildren()){
tree_->firstChildDescend();
}
tree_->updateHistory(value);
break;
}
//tactics in playouts extension
if (cfg.moveAdvisor()){
fill_advisor(playBoard);
}
steps generation and filtering for expansion and expansion itself
...
//steps generation for expansion
stepsNum = playBoard->genStepsNoPass(playBoard->getPlayerToMove(), steps);
//repetitions filters
stepsNum = playBoard->filterRepetitions(steps, stepsNum);
//add pass if possible
if (playBoard->canPass()) {
steps[stepsNum++] = Step(STEP_PASS, playBoard->getPlayerToMove());
}
//add heuristic values for step (will be used in node initialization)
if (stepsNum > 0) {
if (cfg.knowledgeInTree()){
HeurArray heurs;
playBoard->getHeuristics(steps, stepsNum, heurs);
//expansion itself
tree_->expandNode(tree_->actNode(), steps, stepsNum, &heurs);
}
else{
//expansion itself - without heuristic knowledge
tree_->expandNode(tree_->actNode(), steps, stepsNum);
}
//transposition tables update
if (cfg.uct_tt()){
tree_->updateTT(tree_->actNode(), playBoard);
}
//end of step expansion
Lets have a look on lower abstraction level - particularly the Node class. Interesting part is a mechanism for selecting the best child according to uct. This is used for Tree::uctDescend and is handled by function Node::findUctChild.
//Node::findUctChild ... //exploration coeffiecient float exploreCoeff = exploreRate * log(realFather->visits_); if (cCache_ && visits_ > CCACHE_START_THRESHOLD){ //if cCache is active, select best child only from cached children assert(cCache_); cCacheUpdate(exploreCoeff); for (int i = 0; i < CHILDREN_CACHE_SIZE && cCache_[i]; i++){ act = cCache_[i]; uctOneChild(act, best, bestUrgency, exploreCoeff); } }else{ //loop children and select best chil while (act != NULL) { uctOneChild(act, best, bestUrgency, exploreCoeff); act = act->sibling_; } } //end Node::findUctChild
Node::uctOneChild ensures occasional syncing (for parallelized mode only) and then gets value of exploration formula for given node via Node::exploreFormula function. Exploration formula incorporates heuristic domain knowledge as well as values from history heursistic.
//Node::exploreFormula ... //first play urgency if not visited yet (usually bypassed by virtual visits) if (visits_ == 0) return FPU; return (cfg.ucbTuned() ? ucbTuned(exploreCoeff) : ucb(exploreCoeff)) + heur_/visits_ + (cfg.historyHeuristic() ? ((getNodeType() == NODE_MAX ? twStep_->value : - twStep_->value) * 1.1 /mysqrt(visits_)) : 0) ; //end Node::exploreFormula ...
1.5.7.1