# This script defines the `PromptRefiner` class, which provides utility
# functions for processing and cleaning text generated by Large Language Models (LLMs).
# It includes methods for combining prompts, ensuring content is formatted as Markdown,
# and removing extraneous meta-elements or code block formatting that LLMs might
# include in their responses. This helps in preparing LLM outputs for further use.

from .colors import Colors
import re
from markdownify import markdownify as md

class PromptRefiner:
    """
    A utility class for refining and cleaning LLM-generated text.
    Provides methods to combine prompts, format content, and remove unwanted
    meta-elements or markdown formatting from responses.
    """
    def __init__(self, config: dict = {}, verbose: bool = False):
        """
        Initializes the PromptRefiner.

        Args:
            config (dict): Configuration dictionary (currently not used). Defaults to {}.
            verbose (bool): If True, enables verbose output for cleaning operations. Defaults to False.
        """
        self.config = config
        self.verbose = verbose

    def combine_refinement_prompt(self, input_prompt: str, refinement_prompt: str) -> str:
        """
        Combines an initial input prompt with a refinement prompt.
        The refinement prompt typically contains instructions for how to refine the input.

        Args:
            input_prompt (str): The initial text prompt that needs refinement.
            refinement_prompt (str): The prompt containing instructions for refinement
                                     and a call to action to refine the input_prompt.

        Returns:
            str: The combined prompt string, ready to be sent to an LLM for refinement.

        Raises:
            ValueError: If the input_prompt is empty.
        """
        if len(input_prompt) == 0:
            raise ValueError("Input prompt cannot be empty")
        if self.verbose:
            print("-"*50)
            print("📝 INPUT PROMPT", Colors.BRIGHT_MAGENTA, "═")
            print(f"{Colors.DIM}{input_prompt}{Colors.RESET}\n")
            print("-"*50)
            print("🔧 REFINEMENT PROMPT", Colors.BRIGHT_YELLOW, "═")
            print(f"{Colors.DIM}{refinement_prompt}{Colors.RESET}\n")
            print("-"*50)
        return f"{refinement_prompt}\n\n{input_prompt}"
      
    def format_as_markdown(self, content: str) -> str:
        """
        Ensures the given content is formatted as Markdown. If the content
        already appears to contain Markdown syntax, it is returned as is.
        Otherwise, it is converted to a basic Markdown structure.

        Args:
            content (str): The raw content string from an LLM.

        Returns:
            str: The Markdown formatted content.
        """
        if any(marker in content for marker in ['#', '**', '*', '`', '```']):
            return content
        
        return md(content)
    
    def clean_json_response(self, response: str) -> str:
        res = self.clean_response(response)
        
        # remove all separators
        res = re.sub(r'\n','', res)
        return res

    def clean_response(self, response: str) -> str:
        """
        Cleans an LLM-generated response by removing common meta-elements
        (like <think> tags) and extraneous prefixes or whitespace.

        Args:
            response (str): The raw string response from an LLM.

        Returns:
            str: The cleaned response string.
        """
        if self.verbose:
            print(f"{Colors.MAGENTA}Cleaning refined response...{Colors.RESET}")
        
        original_length = len(response)
        cleaned = response
        
        patterns_to_remove = [
            r'<think>.*?</think>',  # Think blocks
            r'<thinking>.*?</thinking>',  # Thinking blocks
            r'<reasoning>.*?</reasoning>',  # Reasoning blocks
            r'<meta>.*?</meta>',  # Meta blocks
            r'<reflection>.*?</reflection>',  # Reflection blocks
        ]
        
        for pattern in patterns_to_remove:
            matches = re.findall(pattern, cleaned, re.DOTALL | re.IGNORECASE)
            if matches:
                if self.verbose:
                    print(f"{Colors.BLUE}Removing {len(matches)} instances of pattern: {pattern}{Colors.RESET}")
                cleaned = re.sub(pattern, '', cleaned, flags=re.DOTALL | re.IGNORECASE)
        
        prefixes_to_remove = [
            r'^Here\'s the refined prompt:\s*',
            r'^Refined prompt:\s*',
            r'^The refined prompt is:\s*',
            r'^Here is the refined version:\s*',
            r'^Refined version:\s*',
        ]
        
        for prefix in prefixes_to_remove:
            if re.match(prefix, cleaned, re.IGNORECASE):
                cleaned = re.sub(prefix, '', cleaned, flags=re.IGNORECASE)
                if self.verbose:
                    print(f"{Colors.BLUE}Removed prefix pattern: {prefix}{Colors.RESET}")
        
        # cleanup unneeded tags
        cleaned = re.sub(r'\n\s*\n\s*\n', '\n\n', cleaned)
        
        stray_tag_pattern = r'</?(think|tool_code|execute_result|response|answer)>'
        cleaned = re.sub(stray_tag_pattern, '', cleaned, flags=re.DOTALL)
        cleaned = cleaned.strip()
        
        chars_removed = original_length - len(cleaned)
        if chars_removed > 0:
            if self.verbose:
                print(f"{Colors.GREEN}Cleaned response: removed {chars_removed} characters{Colors.RESET}")
        else:
            if self.verbose:
                print(f"{Colors.BLUE}No cleaning needed - response was already clean{Colors.RESET}")
        
        if self.verbose:
            print(f"{Colors.BLUE}Final cleaned length: {len(cleaned)} characters{Colors.RESET}")
        return cleaned
    
    def clean_response_from_markdown(self, content: str) -> str:
        """
        Removes Markdown code block formatting (e.g., ```python) from the content.

        Args:
            content (str): The string content potentially containing Markdown code blocks.

        Returns:
            str: The content with Markdown code block formatting removed.
        """
        original_length = len(content)
        
        pattern = r'```(?:\n|(?:json|yml|yaml|javascript|html)\n)'
        cleaned = re.sub(pattern, '', content, flags=re.DOTALL)
        cleaned = re.sub('```', '', cleaned)
        if self.verbose:
            print(f"{Colors.BLUE}Final cleaned length: {len(cleaned)} characters{Colors.RESET}")
        
        return cleaned
