Source code for src.modules.backend.solver.stateless_frequency_strategy

# src/modules/backend/solver/stateless_frequency_strategy.py
"""
Stateless frequency-based solver strategy for Wordle using corpus frequency data.
"""
import math
from typing import TYPE_CHECKING, List, Optional, Set, Tuple

from .stateless_solver_strategy import StatelessSolverStrategy

if TYPE_CHECKING:
    from ..legacy_word_manager import WordManager
    from ..stateless_word_manager import StatelessWordManager



[docs]
class StatelessFrequencyStrategy(StatelessSolverStrategy):
    """Stateless strategy that uses actual word frequency data from corpus to suggest words."""


[docs]
    def get_top_suggestions(
        self,
        constraints: List[Tuple[str, str]],
        count: int = 10,
        word_manager: Optional["WordManager"] = None,
        stateless_word_manager: Optional["StatelessWordManager"] = None,
        prefer_common: bool = True,
        word_set: Optional[Set[str]] = None,
    ) -> List[str]:
        """Get top N suggestions based on actual word frequency from corpus using stateless filtering."""

        # Get filtered words using stateless methods
        possible_words, common_words = self._get_filtered_words(
            constraints, word_manager, stateless_word_manager, word_set
        )

        if not possible_words:
            return []

        if len(possible_words) <= count:
            # Use common utility for sorting by frequency with common priority
            return self._sort_by_frequency_and_commonness(
                possible_words,
                common_words,
                word_manager,
                stateless_word_manager,
                prefer_common,
            )

        # Use memory-optimized word processing with generators
        def frequency_scoring_func(word: str) -> float:
            return self._get_frequency_score(
                word, constraints, word_manager, stateless_word_manager
            )

        # Use generator-based processing to reduce memory usage
        word_scores = [(word, frequency_scoring_func(word)) for word in possible_words]
        word_scores.sort(key=lambda x: x[1], reverse=True)

        # Get top candidates
        top_candidates = [word for word, _ in word_scores[: count * 2]]

        # Balance common and other words if prefer_common is True
        if prefer_common:
            return self._balance_common_and_other(top_candidates, common_words, count)
        else:
            return top_candidates[:count]


    def _get_frequency_score(
        self,
        word: str,
        constraints: List[Tuple[str, str]],
        word_manager: Optional["WordManager"] = None,
        stateless_word_manager: Optional["StatelessWordManager"] = None,
    ) -> float:
        """Get frequency score for a word with adjustments for previous guesses."""

        # Get base frequency score
        base_score = float(
            self._get_word_frequency(word, word_manager, stateless_word_manager)
        )

        # Normalize score to a reasonable range (log scale for very high frequencies)
        if base_score > 0:
            # Use log scale to prevent extremely high frequencies from dominating
            normalized_score = math.log10(base_score + 1)
        else:
            normalized_score = 0.0

        # Apply small bonus for words not similar to previous guesses
        uniqueness_bonus = self._calculate_uniqueness_bonus(word, constraints)

        return normalized_score + uniqueness_bonus

    def _calculate_uniqueness_bonus(
        self, word: str, constraints: List[Tuple[str, str]]
    ) -> float:
        """Calculate a small bonus for words that are different from previous guesses."""
        if not constraints:
            return 0.0

        # Small bonus for words that share fewer letters with previous guesses
        previous_letters: set[str] = set()
        for guess, _ in constraints:
            previous_letters.update(guess.upper())

        word_letters = set(word.upper())
        shared_letters = len(word_letters.intersection(previous_letters))
        total_letters = len(word_letters)

        # Small bonus (max 0.5) for words with fewer shared letters
        uniqueness_ratio = (
            1.0 - (shared_letters / total_letters) if total_letters > 0 else 0.0
        )
        return uniqueness_ratio * 0.5

    def _sort_by_frequency_and_commonness(
        self,
        words: List[str],
        common_words: List[str],
        word_manager: Optional["WordManager"] = None,
        stateless_word_manager: Optional["StatelessWordManager"] = None,
        prefer_common: bool = True,
    ) -> List[str]:
        """Sort words by frequency with optional common word priority."""
        if not prefer_common:
            # Simple frequency sort
            return sorted(
                words,
                key=lambda w: self._get_word_frequency(
                    w, word_manager, stateless_word_manager
                ),
                reverse=True,
            )

        # Separate common and non-common words
        common_set = set(common_words)
        common_words_filtered = [w for w in words if w in common_set]
        other_words = [w for w in words if w not in common_set]

        # Sort each group by frequency
        common_sorted = sorted(
            common_words_filtered,
            key=lambda w: self._get_word_frequency(
                w, word_manager, stateless_word_manager
            ),
            reverse=True,
        )
        other_sorted = sorted(
            other_words,
            key=lambda w: self._get_word_frequency(
                w, word_manager, stateless_word_manager
            ),
            reverse=True,
        )

        # Combine with common words first
        return common_sorted + other_sorted

    def _balance_common_and_other(
        self, candidates: List[str], common_words: List[str], count: int
    ) -> List[str]:
        """Balance common and other words in the final suggestions."""
        common_set = set(common_words)
        common_candidates = [w for w in candidates if w in common_set]
        other_candidates = [w for w in candidates if w not in common_set]

        # Aim for roughly 60% common words, 40% other words
        common_target = max(1, int(count * 0.6))
        other_target = count - common_target

        # Take up to target from each group
        result = []
        result.extend(common_candidates[:common_target])
        result.extend(other_candidates[:other_target])

        # Fill remaining slots with best candidates overall
        remaining_slots = count - len(result)
        if remaining_slots > 0:
            remaining_candidates = [w for w in candidates if w not in result]
            result.extend(remaining_candidates[:remaining_slots])

        return result[:count]

    def _fallback_frequency_score(self, word: str) -> float:
        """Fallback scoring based on word characteristics when no frequency data available."""
        # Simple heuristic: prefer words with common letters
        common_letters = set("ETAOINSHRDLCUMWFGYPBVKJXQZ")
        word_letters = set(word.upper())

        # Score based on how many common letters the word contains
        common_count = len(word_letters.intersection(common_letters))
        return float(common_count) / len(word_letters) if word_letters else 0.0