Source code for src.modules.backend.solver.stateless_weighted_gain_strategy

# src/modules/backend/solver/stateless_weighted_gain_strategy.py
"""
Stateless weighted Information Gain solver strategy for Wordle that combines multiple metrics.
"""
import math
from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple

from .solver_utils import calculate_position_frequencies
from .stateless_solver_strategy import StatelessSolverStrategy

if TYPE_CHECKING:
    from ..legacy_word_manager import WordManager
    from ..stateless_word_manager import StatelessWordManager


[docs] class StatelessWeightedGainStrategy(StatelessSolverStrategy): """ Stateless strategy that combines multiple information metrics for better word suggestions. This strategy uses a weighted combination of: - Shannon entropy (information gain) - Positional information (value of exact position matches) - Word frequency (likelihood of being the answer) """
[docs] def __init__( self, entropy_weight: float = 0.5, positional_weight: float = 0.3, frequency_weight: float = 0.2, ): """ Initialize the weighted information gain strategy with customizable weights. Args: entropy_weight: Weight for Shannon entropy (0-1) positional_weight: Weight for positional information (0-1) frequency_weight: Weight for word frequency information (0-1) """ # Normalize weights to ensure they sum to 1.0 total = entropy_weight + positional_weight + frequency_weight self.entropy_weight = entropy_weight / total self.positional_weight = positional_weight / total self.frequency_weight = frequency_weight / total
[docs] def get_top_suggestions( self, constraints: List[Tuple[str, str]], count: int = 10, word_manager: Optional["WordManager"] = None, stateless_word_manager: Optional["StatelessWordManager"] = None, prefer_common: bool = True, word_set: Optional[Set[str]] = None, ) -> List[str]: """Get top N suggestions based on weighted information gain using stateless filtering.""" # Get filtered words using stateless methods possible_words, common_words = self._get_filtered_words( constraints, word_manager, stateless_word_manager, word_set ) if not possible_words: return [] # Handle cases with few words if len(possible_words) <= count: return self._sort_by_weighted_score( possible_words, common_words, word_manager, stateless_word_manager, prefer_common, ) # For large word lists, use efficient candidate selection if len(possible_words) > 100: candidates = self._get_limited_candidates( possible_words, common_words, max_candidates=50 ) else: candidates = possible_words # Calculate weighted scores for candidates word_scores = self._calculate_weighted_scores( candidates, possible_words, constraints, word_manager, stateless_word_manager, ) # Sort by weighted score sorted_candidates = sorted( word_scores.items(), key=lambda x: x[1], reverse=True ) top_candidates = [word for word, _ in sorted_candidates[: count * 2]] # Apply balanced selection if prefer_common is True if prefer_common: return self._balance_common_and_other(top_candidates, common_words, count) else: return top_candidates[:count]
def _calculate_weighted_scores( self, candidates: List[str], possible_words: List[str], constraints: List[Tuple[str, str]], word_manager: Optional["WordManager"] = None, stateless_word_manager: Optional["StatelessWordManager"] = None, ) -> Dict[str, float]: """Calculate weighted scores for candidate words.""" word_scores = {} # Calculate positional frequencies for possible words position_frequencies = calculate_position_frequencies(possible_words) for candidate in candidates: # 1. Entropy component entropy_score = self._calculate_entropy_score(candidate, possible_words) # 2. Positional information component positional_score = self._calculate_positional_score( candidate, position_frequencies ) # 3. Frequency component frequency_score = self._calculate_frequency_score( candidate, word_manager, stateless_word_manager ) # 4. Uniqueness bonus uniqueness_bonus = self._calculate_uniqueness_bonus(candidate, constraints) # Combine weighted scores weighted_score = ( self.entropy_weight * entropy_score + self.positional_weight * positional_score + self.frequency_weight * frequency_score + 0.1 * uniqueness_bonus # Small uniqueness bonus ) word_scores[candidate] = weighted_score return word_scores def _calculate_entropy_score( self, candidate: str, possible_words: List[str] ) -> float: """Calculate normalized entropy score for a candidate.""" if len(possible_words) <= 1: return 1.0 # Calculate pattern distribution from collections import defaultdict from .solver_utils import calculate_pattern pattern_counts = defaultdict(int) for target in possible_words: pattern = calculate_pattern(candidate, target) pattern_counts[pattern] += 1 # Calculate Shannon entropy total_words = len(possible_words) entropy = 0.0 for count in pattern_counts.values(): if count > 0: prob = count / total_words entropy -= prob * math.log2(prob) # Normalize entropy (max entropy for 5-letter words is ~log2(243) ≈ 7.9) max_entropy = min(math.log2(len(possible_words)), 7.9) return entropy / max_entropy if max_entropy > 0 else 0.0 def _calculate_positional_score( self, candidate: str, position_frequencies: Dict ) -> float: """Calculate positional information score for a candidate.""" positional_score = 0.0 for i, letter in enumerate(candidate.upper()): if i in position_frequencies and letter in position_frequencies[i]: # Score based on how informative this letter is at this position letter_freq = position_frequencies[i][letter] total_words = sum(position_frequencies[i].values()) if total_words > 0: # Use inverse frequency - rare letters at positions are more informative info_value = 1.0 - (letter_freq / total_words) positional_score += info_value # Normalize by word length return positional_score / 5.0 def _calculate_frequency_score( self, candidate: str, word_manager: Optional["WordManager"] = None, stateless_word_manager: Optional["StatelessWordManager"] = None, ) -> float: """Calculate normalized frequency score for a candidate.""" frequency = self._get_word_frequency( candidate, word_manager, stateless_word_manager ) if frequency <= 0: return 0.0 # Use log transformation and normalize return min(1.0, math.log10(frequency + 1) / 6.0) def _calculate_uniqueness_bonus( self, candidate: str, constraints: List[Tuple[str, str]] ) -> float: """Calculate bonus for words different from previous guesses.""" if not constraints: return 0.0 # Get letters from previous guesses previous_letters = set() for guess, _ in constraints: previous_letters.update(guess.upper()) candidate_letters = set(candidate.upper()) shared_letters = len(candidate_letters.intersection(previous_letters)) total_letters = len(candidate_letters) # Return uniqueness ratio return 1.0 - (shared_letters / total_letters) if total_letters > 0 else 0.0 def _get_limited_candidates( self, possible_words: List[str], common_words: List[str], max_candidates: int = 50, ) -> List[str]: """Get a limited set of candidates for evaluation.""" # Include possible words (limited) candidates = possible_words[: max_candidates // 2] # Add common words not already included existing_candidates = set(candidates) additional_common = [ word for word in common_words if word not in existing_candidates ][: max_candidates // 2] return candidates + additional_common def _sort_by_weighted_score( self, words: List[str], common_words: List[str], word_manager: Optional["WordManager"] = None, stateless_word_manager: Optional["StatelessWordManager"] = None, prefer_common: bool = True, ) -> List[str]: """Sort words by weighted score, optionally prioritizing common words.""" # Calculate weighted scores for all words word_scores = self._calculate_weighted_scores( words, words, [], word_manager, stateless_word_manager ) if not prefer_common: return sorted(words, key=lambda w: word_scores.get(w, 0.0), reverse=True) # Separate common and other words, sort each by score common_set = set(common_words) common_words_filtered = [w for w in words if w in common_set] other_words = [w for w in words if w not in common_set] common_sorted = sorted( common_words_filtered, key=lambda w: word_scores.get(w, 0.0), reverse=True ) other_sorted = sorted( other_words, key=lambda w: word_scores.get(w, 0.0), reverse=True ) return common_sorted + other_sorted def _balance_common_and_other( self, candidates: List[str], common_words: List[str], count: int ) -> List[str]: """Balance common and other words in final suggestions.""" common_set = set(common_words) common_candidates = [w for w in candidates if w in common_set] other_candidates = [w for w in candidates if w not in common_set] # For weighted gain strategy, prefer 65% common words common_target = max(1, int(count * 0.65)) other_target = count - common_target # Build result result = [] result.extend(common_candidates[:common_target]) result.extend(other_candidates[:other_target]) # Fill remaining slots remaining_slots = count - len(result) if remaining_slots > 0: remaining_candidates = [w for w in candidates if w not in result] result.extend(remaining_candidates[:remaining_slots]) return result[:count]