Source code for src.modules.backend.solver.stateless_hybrid_strategy

# src/modules/backend/solver/stateless_hybrid_strategy.py
"""
Stateless hybrid frequency-entropy strategy combining corpus frequency with information theory.
"""
import math
from typing import TYPE_CHECKING, List, Optional, Set, Tuple

from .stateless_solver_strategy import StatelessSolverStrategy

if TYPE_CHECKING:
    from ..legacy_word_manager import WordManager
    from ..stateless_word_manager import StatelessWordManager


[docs] class StatelessHybridStrategy(StatelessSolverStrategy): """ Stateless strategy that combines frequency-based scoring with entropy for optimal word suggestions. """
[docs] def __init__(self, frequency_weight: float = 0.4, entropy_weight: float = 0.6): """ Initialize the hybrid strategy with customizable weights. Args: frequency_weight: Weight for word frequency (0-1) entropy_weight: Weight for entropy (0-1) """ # Normalize weights to ensure they sum to 1.0 total = frequency_weight + entropy_weight self.frequency_weight = frequency_weight / total self.entropy_weight = entropy_weight / total
[docs] def get_top_suggestions( self, constraints: List[Tuple[str, str]], count: int = 10, word_manager: Optional["WordManager"] = None, stateless_word_manager: Optional["StatelessWordManager"] = None, prefer_common: bool = True, word_set: Optional[Set[str]] = None, ) -> List[str]: """Get top N suggestions based on hybrid frequency-entropy scoring using stateless filtering.""" # Get filtered words using stateless methods possible_words, common_words = self._get_filtered_words( constraints, word_manager, stateless_word_manager, word_set ) if not possible_words: return [] if len(possible_words) <= count: # If we have few words left, sort by combined score return self._sort_by_hybrid_score( possible_words, common_words, word_manager, stateless_word_manager, prefer_common, ) # For the first guess, use high-entropy words with reasonable frequency if not constraints: return self._get_optimal_starters( possible_words, common_words, count, word_manager, stateless_word_manager, ) # Score words based on hybrid frequency-entropy metric word_scores = self._score_words_hybrid( possible_words, constraints, word_manager, stateless_word_manager ) # Sort by score (highest combined score first) sorted_words = [ word for word, score in sorted( word_scores.items(), key=lambda x: x[1], reverse=True ) ] # Apply balanced selection if prefer_common is True if prefer_common: return self._build_balanced_result(sorted_words, common_words, count) else: return sorted_words[:count]
def _sort_by_hybrid_score( self, words: List[str], common_words: List[str], word_manager: Optional["WordManager"] = None, stateless_word_manager: Optional["StatelessWordManager"] = None, prefer_common: bool = True, ) -> List[str]: """Sort words by hybrid score, optionally prioritizing common words.""" # Calculate hybrid scores for all words word_scores = {} for word in words: frequency = self._get_word_frequency( word, word_manager, stateless_word_manager ) entropy = self._get_word_entropy(word, word_manager, stateless_word_manager) # Normalize scores freq_score = self._normalize_frequency(frequency) entropy_score = self._normalize_entropy(entropy) # Combine scores hybrid_score = ( self.frequency_weight * freq_score + self.entropy_weight * entropy_score ) # Boost common words slightly if prefer_common is True if prefer_common and word in common_words: hybrid_score *= 1.1 word_scores[word] = hybrid_score # Sort by hybrid score return sorted(words, key=lambda w: word_scores[w], reverse=True) def _get_optimal_starters( self, possible_words: List[str], common_words: List[str], count: int, word_manager: Optional["WordManager"] = None, stateless_word_manager: Optional["StatelessWordManager"] = None, ) -> List[str]: """Get optimal starting words based on hybrid scoring.""" # Predefined high-value starters optimal_starters = ["SLATE", "CRANE", "ADIEU", "AUDIO", "RAISE", "TEARS"] # Filter available starters available_starters = [ word for word in optimal_starters if word in possible_words ] if len(available_starters) >= count: return available_starters[:count] # Score remaining words for hybrid quality remaining_words = [w for w in possible_words if w not in available_starters] word_scores = self._score_words_hybrid( remaining_words[:100], [], word_manager, stateless_word_manager, # Limit for performance ) # Add best scoring words additional_words = sorted( word_scores.items(), key=lambda x: x[1], reverse=True )[: count - len(available_starters)] return available_starters + [word for word, _ in additional_words] def _score_words_hybrid( self, words: List[str], constraints: List[Tuple[str, str]], word_manager: Optional["WordManager"] = None, stateless_word_manager: Optional["StatelessWordManager"] = None, ) -> dict: """Score words using hybrid frequency-entropy metric.""" word_scores = {} for word in words: frequency = self._get_word_frequency( word, word_manager, stateless_word_manager ) entropy = self._get_word_entropy(word, word_manager, stateless_word_manager) # Normalize scores freq_score = self._normalize_frequency(frequency) entropy_score = self._normalize_entropy(entropy) # Combine scores hybrid_score = ( self.frequency_weight * freq_score + self.entropy_weight * entropy_score ) # Apply uniqueness bonus uniqueness_bonus = self._calculate_uniqueness_bonus(word, constraints) hybrid_score += uniqueness_bonus * 0.1 word_scores[word] = hybrid_score return word_scores def _normalize_frequency(self, frequency: int) -> float: """Normalize frequency to 0-1 scale using log transformation.""" if frequency <= 0: return 0.0 # Use log10 to normalize high frequencies return min(1.0, math.log10(frequency + 1) / 6.0) # Assume max log10(freq) ~ 6 def _normalize_entropy(self, entropy: float) -> float: """Normalize entropy to 0-1 scale.""" if entropy <= 0: return 0.0 # Assume entropy range 0-10 return min(1.0, entropy / 10.0) def _calculate_uniqueness_bonus( self, word: str, constraints: List[Tuple[str, str]] ) -> float: """Calculate bonus for words different from previous guesses.""" if not constraints: return 0.0 # Get letters from previous guesses previous_letters = set() for guess, _ in constraints: previous_letters.update(guess.upper()) word_letters = set(word.upper()) shared_letters = len(word_letters.intersection(previous_letters)) total_letters = len(word_letters) # Return uniqueness ratio (0-1) return 1.0 - (shared_letters / total_letters) if total_letters > 0 else 0.0 def _build_balanced_result( self, sorted_words: List[str], common_words: List[str], count: int ) -> List[str]: """Build balanced result favoring common words.""" common_set = set(common_words) common_candidates = [w for w in sorted_words if w in common_set] other_candidates = [w for w in sorted_words if w not in common_set] # Aim for 70% common words in hybrid strategy common_target = max(1, int(count * 0.7)) other_target = count - common_target # Take up to target from each group result = [] result.extend(common_candidates[:common_target]) result.extend(other_candidates[:other_target]) # Fill remaining slots with best candidates overall remaining_slots = count - len(result) if remaining_slots > 0: remaining_candidates = [w for w in sorted_words if w not in result] result.extend(remaining_candidates[:remaining_slots]) return result[:count]