import os
import unittest
from collections import Counter
from typing import List, Tuple

import httpretty

from RigorEvaluation.TFIDFEvaluation import TFIDFEvaluation
from TokenSatisfaction.KeywordQualification import KeywordQualification


class test_keyword_qualification(unittest.TestCase):
    def test_keyword_qualification(self):
        # Mock JSON response
        # Adapted from request made to https://youtube.googleapis.com/youtube/v3/videos?part=contentDetails&id=T9xsTO6ujqM&key=[YOUR_API_KEY]
        # on 6/22/2023.
        mock_video_duration_response = """
        {
            "kind": "youtube#videoListResponse",
            "etag": "jRM3UkS0H97J0JE_RooCw1CNA0c",
            "items": [
                {
                    "kind": "youtube#video",
                    "etag": "2_4jVYm_8tYAVsj-Vg_P8HJ6qkc",
                    "id": "T9xsTO6ujqM",
                    "contentDetails": {
                        "duration": "PT24M20S",
                        "dimension": "2d",
                        "definition": "hd",
                        "caption": "false",
                        "licensedContent": true,
                        "contentRating": {},
                        "projection": "rectangular"
                    }
                }
            ],
            "pageInfo": {
                "totalResults": 1,
                "resultsPerPage": 1
            }
        }
        """
        videoID = "example_videoID"
        api_key = "example_api_key"
        os.environ["api_key"] = api_key
        expected_score = 2.9120317139990197
        httpretty.enable()
        httpretty.register_uri(
            httpretty.GET,
            f"https://youtube.googleapis.com/youtube/v3/videos?part=contentDetails&id={videoID}&key={api_key}&alt=json",
            body=mock_video_duration_response,
        )
        with open("tests/sample_data/example_youtube_video.html", "r") as f:
            mock_youtube_html_response = f.read()
        with open("tests/sample_data/example_xml_youtube_transcript.xml", "r") as f:
            mock_youtube_xml_response = f.read()
        httpretty.register_uri(
            httpretty.GET,
            f"https://www.youtube.com/watch?v={videoID}",
            body=mock_youtube_html_response,
        )
        # This is the request that is made to get the final transcript
        httpretty.register_uri(
            httpretty.GET,
            "https://www.youtube.com/api/timedtext?v=T9xsTO6ujqM&caps=asr&opi=112496729&xoaf=5&hl=en&ip=0.0.0.0&ipbits=0&expire=1687325993&sparams=ip,ipbits,expire,v,caps,opi,xoaf&signature=82EC42B531F868F24D3EF4C6765BFA55A735C121.851DCD4FDCD046D8793E98EFEDC7F451B8156CFE&key=yt8&kind=asr&lang=en",
            body=mock_youtube_xml_response,
        )
        ir_evaluator = TFIDFEvaluation(videoID)

        # Perform text pre-processing
        ir_evaluator.lower_case_conversion()
        ir_evaluator.evaluate_term_freq()
        ir_evaluator.evaluate_inverse_doc_freq()
        ir_evaluator.initialize_json_scores()
        ir_evaluator.strip_non_ascii()
        ir_evaluator.filter_words()

        # Perform keyword qualification
        ir_evaluator.fallback_evaluation()
        k_qualifier = KeywordQualification(
            ir_evaluator.tokens, ir_evaluator.tf_idf_score
        )
        (freq_dist, qualification, qualified_terms) = k_qualifier.show()
        expected_freq_dist = {
            "this": 36,
            "is": 34,
            "me": 16,
            "pitching": 5,
            "a": 95,
            "ball": 34,
            "that": 53,
            "'s": 40,
            "impossible": 4,
            "to": 122,
            "hit": 5,
            "and": 127,
            "i": 82,
            "engineered": 1,
            "it": 65,
            "out": 29,
            "of": 87,
            "necessity": 1,
            "because": 14,
            "'m": 13,
            "facing": 1,
            "off": 4,
            "against": 5,
            "the": 226,
            "world": 7,
            "greatest": 2,
            "wiffle": 13,
            "players": 6,
            "now": 21,
            "popular": 1,
            "backyard": 2,
            "version": 1,
            "baseball": 9,
            "here": 16,
            "in": 35,
            "america": 1,
            "what": 10,
            "makes": 4,
            "really": 9,
            "interesting": 1,
            "balls": 10,
            "have": 16,
            "these": 6,
            "holes": 4,
            "on": 44,
            "one": 24,
            "side": 6,
            "which": 13,
            "means": 4,
            "they": 28,
            "can": 9,
            "curve": 7,
            "like": 24,
            "crazy": 1,
            "but": 33,
            "how": 18,
            "does": 1,
            "work": 4,
            "do": 19,
            "baseballs": 2,
            "or": 8,
            "from": 12,
            "any": 3,
            "sport": 1,
            "actually": 4,
            "through": 4,
            "air": 15,
            "for": 29,
            "matter": 1,
            "so": 37,
            "today": 2,
            "we": 24,
            "'re": 15,
            "going": 6,
            "test": 2,
            "answer": 3,
            "question": 2,
            "since": 2,
            "grew": 2,
            "up": 14,
            "playing": 7,
            "countless": 1,
            "hours": 1,
            "all": 22,
            "myself": 2,
            "revive": 1,
            "ble": 1,
            "pros": 3,
            "n't": 17,
            "want": 5,
            "give": 4,
            "too": 3,
            "much": 6,
            "away": 4,
            "might": 4,
            "had": 18,
            "rely": 2,
            "my": 36,
            "engineering": 2,
            "skills": 2,
            "level": 3,
            "field": 4,
            "[": 18,
            "music": 14,
            "]": 18,
            "whole": 4,
            "idea": 1,
            "journey": 2,
            "discovery": 2,
            "started": 2,
            "with": 48,
            "an": 16,
            "impromptu": 1,
            "visit": 2,
            "childhood": 3,
            "home": 8,
            "brea": 1,
            "california": 2,
            "last": 5,
            "time": 20,
            "lived": 1,
            "was": 62,
            "over": 9,
            "two": 14,
            "decades": 1,
            "ago": 1,
            "just": 29,
            "gon": 6,
            "na": 6,
            "go": 11,
            "ambush": 1,
            "them": 12,
            "tour": 1,
            "hey": 1,
            "mark": 3,
            "are": 7,
            "you": 78,
            "good": 7,
            "used": 4,
            "live": 1,
            "oh": 10,
            "word": 1,
            "got": 12,
            "after": 10,
            "meeting": 1,
            "family": 1,
            "were": 11,
            "kind": 2,
            "enough": 5,
            "let": 8,
            "take": 6,
            "walk": 1,
            "down": 7,
            "memory": 1,
            "lane": 1,
            "gosh": 2,
            "if": 16,
            "walls": 1,
            "could": 7,
            "talk": 1,
            "know": 8,
            "keep": 1,
            "micro": 1,
            "machines": 2,
            "right": 9,
            "little": 8,
            "cabinet": 1,
            "there": 9,
            "roof": 2,
            "where": 14,
            "scott": 1,
            "glacier": 1,
            "throw": 7,
            "water": 4,
            "balloons": 1,
            "sister": 1,
            "her": 1,
            "friends": 1,
            "sunbathing": 1,
            "option": 1,
            "kids": 2,
            "also": 7,
            "took": 5,
            "chance": 4,
            "confess": 1,
            "their": 9,
            "metal": 3,
            "shed": 1,
            "aaron": 1,
            "shots": 1,
            "homemade": 1,
            "crossbow": 1,
            "fashioned": 1,
            "eighth": 1,
            "grade": 1,
            "target": 1,
            "very": 5,
            "at": 16,
            "apparently": 1,
            "made": 2,
            "most": 5,
            "happy": 2,
            "hear": 1,
            "neighborhood": 2,
            "still": 4,
            "played": 1,
            "front": 3,
            "while": 3,
            "may": 1,
            "look": 3,
            "street": 4,
            "some": 5,
            "us": 4,
            "soccer": 2,
            "hockey": 1,
            "basketball": 1,
            "stadium": 2,
            "long": 6,
            "forgotten": 1,
            "legends": 2,
            "born": 1,
            "each": 2,
            "game": 9,
            "always": 4,
            "felt": 4,
            "big": 6,
            "only": 5,
            "thing": 2,
            "stop": 1,
            "when": 7,
            "lights": 5,
            "came": 3,
            "signaling": 2,
            "dinner": 3,
            "absolute": 4,
            "delight": 1,
            "applause": 2,
            "hummers": 1,
            "legend": 2,
            "rober": 1,
            "returned": 2,
            "not": 4,
            "deny": 1,
            "old": 3,
            "times": 1,
            "great": 4,
            "even": 5,
            "mound": 3,
            "slinging": 1,
            "lasers": 1,
            "as": 33,
            "invigorating": 1,
            "did": 6,
            "make": 6,
            "missed": 2,
            "true": 1,
            "calling": 2,
            "professional": 4,
            "player": 3,
            "should": 6,
            "share": 1,
            "similar": 2,
            "origin": 1,
            "story": 1,
            "our": 6,
            "history": 1,
            "split": 2,
            "paths": 1,
            "kept": 4,
            "dream": 2,
            "alive": 2,
            "turned": 1,
            "those": 3,
            "adolescent": 1,
            "visions": 1,
            "grandeur": 1,
            "into": 8,
            "first": 12,
            "pro": 1,
            "football": 1,
            "league": 7,
            "fact": 1,
            "kyle": 9,
            "11": 1,
            "year": 3,
            "14": 2,
            "years": 2,
            "later": 1,
            "founder": 1,
            "commissioner": 1,
            "he": 7,
            "told": 2,
            "every": 8,
            "starting": 1,
            "spring": 3,
            "18s": 1,
            "compete": 1,
            "four-month": 1,
            "season": 1,
            "traveling": 2,
            "country": 1,
            "draft": 1,
            "playoff": 1,
            "series": 3,
            "course": 2,
            "'ve": 2,
            "tracked": 1,
            "possible": 3,
            "stat": 1,
            "possibly": 1,
            "trackable": 1,
            "'d": 8,
            "expect": 2,
            "heated": 1,
            "team": 6,
            "rivalries": 1,
            "jimmy": 11,
            "norp": 1,
            "aka": 3,
            "norpedo": 4,
            "who": 3,
            "considered": 1,
            "by": 11,
            "many": 2,
            "be": 6,
            "best": 6,
            "hitting": 2,
            "dual": 1,
            "threat": 1,
            "rain": 1,
            "back-to-back": 2,
            "champ": 2,
            "easy": 1,
            "see": 9,
            "why": 2,
            "none": 1,
            "get": 10,
            "paid": 1,
            "play": 5,
            "truest": 1,
            "compensation": 1,
            "sheer": 1,
            "thrill": 1,
            "victory": 2,
            "has": 4,
            "united": 1,
            "oklahoma": 2,
            "around": 8,
            "finally": 4,
            "under": 2,
            "actual": 2,
            "crowd": 3,
            "meant": 4,
            "refer": 1,
            "himself": 2,
            "third": 3,
            "person": 1,
            "his": 8,
            "debut": 1,
            "cool": 2,
            "gate": 2,
            "spotted": 1,
            "unmistakable": 1,
            "flowing": 2,
            "hair": 1,
            "hoster": 1,
            "captain": 5,
            "clutch": 4,
            "north": 1,
            "definitely": 1,
            "leagues": 1,
            "per": 3,
            "3v3": 1,
            "divided": 1,
            "teams": 1,
            "connected": 1,
            "early": 1,
            "man": 1,
            "red": 2,
            "baron": 8,
            "yeah": 3,
            "nice": 1,
            "turn": 1,
            "waste": 1,
            "getting": 4,
            "basis": 1,
            "ahead": 2,
            "shot": 1,
            "green": 2,
            "monster": 2,
            "youth": 1,
            "being": 2,
            "replaced": 1,
            "dreamed": 1,
            "uh-oh": 1,
            "feels": 3,
            "four": 1,
            "three": 4,
            "strikes": 3,
            "strike": 7,
            "hits": 2,
            "anywhere": 2,
            "rectangular": 1,
            "pipes": 1,
            "more": 14,
            "embarrassingly": 1,
            "plate": 3,
            "middle": 1,
            "sorry": 1,
            "visibly": 1,
            "unimpressed": 1,
            "swapped": 1,
            "pinch": 1,
            "run": 7,
            "second": 11,
            "base": 10,
            "everywhere": 1,
            "outs": 2,
            "cleared": 1,
            "cobwebs": 1,
            "sense": 2,
            "things": 3,
            "about": 5,
            "change": 1,
            "ah": 1,
            "well": 7,
            "think": 6,
            "pitcher": 4,
            "anyways": 1,
            "boys": 1,
            "taste": 3,
            "school": 2,
            "suburban": 1,
            "streetball": 1,
            "norm": 1,
            "reign": 1,
            "mount": 1,
            "olympus": 1,
            "come": 7,
            "end": 1,
            "laughter": 2,
            "dinger": 1,
            "center": 3,
            "struck": 4,
            "again": 4,
            "witchcraft": 1,
            "proud": 2,
            "performance": 1,
            "defense": 1,
            "unlike": 1,
            "west": 1,
            "coast": 1,
            "pitches": 2,
            "moving": 1,
            "10": 2,
            "feet": 1,
            "left": 2,
            "propellers": 1,
            "nearly": 1,
            "figure": 1,
            "pitch": 5,
            "swing": 2,
            "duck": 1,
            "perhaps": 1,
            "both": 1,
            "rest": 1,
            "unfortunately": 4,
            "same": 3,
            "although": 1,
            "am": 1,
            "say": 3,
            "inning": 5,
            "okay": 4,
            "despite": 1,
            "efforts": 1,
            "narrowly": 1,
            "edged": 1,
            "win": 3,
            "supposed": 1,
            "happen": 1,
            "plane": 1,
            "ride": 1,
            "truthfully": 1,
            "plan": 3,
            "already": 3,
            "forming": 1,
            "head": 3,
            "science": 2,
            "credit": 1,
            "use": 4,
            "abilities": 1,
            "step": 1,
            "fly": 2,
            "crunch": 8,
            "labs": 2,
            "study": 1,
            "exactly": 5,
            "making": 4,
            "bunch": 2,
            "observations": 1,
            "gathering": 1,
            "data": 2,
            "then": 10,
            "marched": 1,
            "straight": 3,
            "san": 1,
            "francisco": 1,
            "giants": 1,
            "regarded": 1,
            "advanced": 1,
            "development": 1,
            "program": 1,
            "major": 2,
            "wow": 1,
            "pretty": 2,
            "incredibly": 1,
            "relieved": 1,
            "elite": 1,
            "coaches": 1,
            "struggling": 1,
            "clubhouse": 1,
            "real": 2,
            "purpose": 1,
            "trip": 1,
            "meet": 1,
            "brian": 1,
            "bannister": 1,
            "whose": 1,
            "deep": 2,
            "understanding": 1,
            "behind": 1,
            "spinning": 1,
            "sort": 5,
            "secret": 1,
            "sauce": 1,
            "developing": 1,
            "pictures": 1,
            "hardware": 1,
            "show": 3,
            "concepts": 2,
            "feeling": 2,
            "handle": 2,
            "final": 6,
            "confirmation": 1,
            "suspicion": 1,
            "she": 1,
            "met": 1,
            "stanford": 1,
            "normal": 3,
            "versus": 4,
            "identical": 1,
            "missing": 1,
            "scene": 1,
            "further": 1,
            "point": 1,
            "indication": 1,
            "report": 1,
            "nationally": 1,
            "ranked": 1,
            "batters": 1,
            "fell": 1,
            "victim": 1,
            "extensive": 1,
            "collection": 1,
            "research": 1,
            "interviews": 1,
            "curves": 4,
            "starts": 1,
            "riddle": 1,
            "astronaut": 6,
            "space": 3,
            "station": 2,
            "spacewalk": 1,
            "fix": 1,
            "solar": 1,
            "panel": 1,
            "your": 9,
            "wrench": 3,
            "heading": 2,
            "distracted": 1,
            "view": 1,
            "miss": 1,
            "grabbing": 1,
            "slowly": 2,
            "drifting": 1,
            "save": 1,
            "yourself": 3,
            "swim": 1,
            "way": 3,
            "back": 9,
            "zero": 2,
            "gravity": 2,
            "demonstrated": 2,
            "flail": 1,
            "arms": 1,
            "mass": 1,
            "wo": 1,
            "move": 5,
            "alright": 1,
            "trick": 1,
            "fast": 2,
            "opposite": 2,
            "direction": 1,
            "will": 4,
            "push": 1,
            "intuitive": 1,
            "skateboard": 1,
            "something": 3,
            "heavy": 1,
            "nate": 1,
            "job": 1,
            "rockets": 1,
            "no": 3,
            "create": 1,
            "chemical": 1,
            "reaction": 1,
            "fuel": 1,
            "lot": 2,
            "tiny": 2,
            "wrenches": 8,
            "rocket": 1,
            "naturally": 1,
            "other": 3,
            "need": 2,
            "kiwanda": 1,
            "effect": 4,
            "states": 1,
            "fluids": 1,
            "flow": 2,
            "smooth": 6,
            "surface": 3,
            "ever": 5,
            "noticed": 2,
            "curved": 1,
            "spoon": 2,
            "fluid": 1,
            "strings": 1,
            "following": 1,
            "airflow": 2,
            "turns": 1,
            "round": 2,
            "corner": 1,
            "frisbee": 1,
            "put": 3,
            "together": 3,
            "perspective": 1,
            "rushing": 2,
            "past": 1,
            "top": 5,
            "curving": 5,
            "nicely": 1,
            "rotation": 1,
            "matches": 1,
            "bottom": 2,
            "head-on": 1,
            "collision": 1,
            "creating": 1,
            "turbulence": 1,
            "thrown": 2,
            "diagonally": 2,
            "result": 1,
            "spun": 1,
            "moves": 1,
            "causing": 2,
            "fancy": 1,
            "term": 1,
            "magnus": 1,
            "light": 2,
            "beach": 1,
            "demonstrating": 1,
            "lots": 2,
            "areas": 1,
            "causes": 1,
            "tennis": 1,
            "ping-pong": 1,
            "record-breaking": 1,
            "basketballs": 1,
            "golf": 1,
            "seams": 2,
            "help": 1,
            "grab": 1,
            "spins": 2,
            "provide": 1,
            "pitchers": 1,
            "better": 2,
            "grip": 1,
            "spin": 1,
            "faster": 1,
            "tests": 1,
            "confirmed": 1,
            "principle": 1,
            "frisbees": 1,
            "seem": 1,
            "defy": 1,
            "edge": 1,
            "throwing": 2,
            "turning": 1,
            "freaking": 1,
            "jet": 1,
            "pack": 1,
            "having": 1,
            "said": 2,
            "yet": 4,
            "slightly": 1,
            "different": 1,
            "than": 4,
            "scuff": 1,
            "serious": 1,
            "comparison": 1,
            "smoother": 1,
            "easier": 1,
            "lift": 1,
            "towards": 2,
            "sure": 2,
            "check": 1,
            "high": 2,
            "speed": 4,
            "footage": 1,
            "case": 1,
            "whenever": 1,
            "flying": 1,
            "knew": 3,
            "secrets": 1,
            "curvature": 1,
            "knowledge": 1,
            "engineer": 3,
            "sweet": 1,
            "revenge": 1,
            "18": 1,
            "build": 5,
            "montage": 1,
            "duffel": 1,
            "bag": 2,
            "full": 3,
            "surprises": 1,
            "addressed": 1,
            "specifically": 1,
            "noor": 1,
            "before": 2,
            "slabs": 4,
            "new": 2,
            "addition": 1,
            "fight": 1,
            "summer": 12,
            "brain": 2,
            "drain": 1,
            "least": 3,
            "boring": 2,
            "created": 1,
            "camp": 14,
            "12-week": 3,
            "virtual": 2,
            "featuring": 2,
            "weekly": 8,
            "videos": 4,
            "mega": 4,
            "experiments": 2,
            "super": 3,
            "challenges": 4,
            "challenge": 3,
            "submission": 1,
            "platinum": 2,
            "ticket": 2,
            "biggest": 1,
            "experiment": 2,
            "video": 3,
            "usually": 2,
            "crunchlab": 2,
            "boxes": 2,
            "fun": 2,
            "toy": 2,
            "teach": 2,
            "juicy": 2,
            "physics": 2,
            "comes": 2,
            "month": 2,
            "coincide": 2,
            "lab": 1,
            "ship": 2,
            "week": 2,
            "learn": 3,
            "percent": 1,
            "bug": 1,
            "bites": 1,
            "poison": 1,
            "ivy": 1,
            "crunchlabs.com": 2,
            "reserve": 2,
            "limited": 2,
            "spots": 2,
            "'ll": 3,
            "number": 1,
            "opportunity": 1,
            "admiring": 1,
            "gift": 1,
            "avoid": 1,
            "eye": 1,
            "contact": 3,
            "tactics": 1,
            "intimidation": 1,
            "stuff": 1,
            "bit": 3,
            "rematch": 1,
            "officially": 1,
            "edward": 1,
            "must": 1,
            "spread": 1,
            "larger": 1,
            "gathered": 1,
            "watch": 1,
            "been": 1,
            "outing": 1,
            "closer": 1,
            "nightmare": 1,
            "decided": 2,
            "small": 1,
            "wound": 1,
            "deliver": 1,
            "hot": 1,
            "steamy": 1,
            "appetizer": 1,
            "served": 1,
            "retrieve": 1,
            "reload": 1,
            "brass": 2,
            "slug": 1,
            "preload": 1,
            "cylinder": 1,
            "3d": 2,
            "printed": 2,
            "apart": 1,
            "dollar": 1,
            "kitchen": 2,
            "timer": 2,
            "harvest": 1,
            "geared": 2,
            "mechanism": 2,
            "wind": 1,
            "hemispheres": 2,
            "half": 2,
            "delay": 2,
            "until": 1,
            "screw": 1,
            "rotates": 1,
            "releases": 1,
            "spring-loaded": 1,
            "plug": 1,
            "alters": 1,
            "trajectory": 1,
            "midair": 1,
            "predict": 1,
            "unpredictability": 1,
            "friend": 1,
            "found": 2,
            "achilles": 1,
            "heel": 1,
            "rendered": 1,
            "useless": 1,
            "backup": 3,
            "strikeout": 3,
            "pre-loaded": 1,
            "options": 1,
            "went": 1,
            "bad": 2,
            "ones": 1,
            "secure": 1,
            "crafting": 1,
            "rapido": 1,
            "deduced": 1,
            "correctly": 1,
            "simply": 1,
            "stepping": 1,
            "forward": 1,
            "copter": 1,
            "cheeky": 1,
            "thorough": 1,
            "reading": 1,
            "rules": 1,
            "pipe": 1,
            "plates": 1,
            "above": 1,
            "legs": 2,
            "mention": 1,
            "exploding": 1,
            "loopholes": 1,
            "smugness": 1,
            "short-lived": 1,
            "become": 1,
            "without": 1,
            "tremendous": 1,
            "amount": 1,
            "hand-eye": 1,
            "coordination": 1,
            "runners": 1,
            "triple": 1,
            "000": 1,
            "rpm": 1,
            "wheeled": 1,
            "monstrosity": 1,
            "personal": 2,
            "machine": 1,
            "demogorgon": 2,
            "calibrated": 1,
            "launch": 1,
            "cruise": 1,
            "missiles": 1,
            "74": 1,
            "mile": 1,
            "hour": 2,
            "limit": 1,
            "problem": 2,
            "requires": 1,
            "electrons": 1,
            "battery": 1,
            "power": 2,
            "soon": 1,
            "became": 1,
            "concern": 1,
            "position": 1,
            "find": 1,
            "sadly": 1,
            "giving": 1,
            "lead": 2,
            "concerned": 1,
            "would": 8,
            "start": 1,
            "thankfully": 1,
            "ca": 1,
            "at-bat": 2,
            "pop-up": 1,
            "routine": 1,
            "catch": 1,
            "survived": 1,
            "allowed": 1,
            "chances": 1,
            "another": 1,
            "followed": 1,
            "bond": 1,
            "suddenly": 1,
            "rate": 1,
            "winning": 1,
            "capitalized": 1,
            "potential": 1,
            "inside": 1,
            "park": 2,
            "rule": 1,
            "zone": 2,
            "doubles": 1,
            "catcher": 1,
            "beat": 1,
            "fresh": 1,
            "batteries": 1,
            "rogue": 1,
            "freedom": 1,
            "free": 2,
            "range": 1,
            "50": 1,
            "miles": 1,
            "bring": 1,
            "him": 3,
            "seeing": 1,
            "torpedo": 2,
            "next": 2,
            "plenty": 1,
            "dreams": 1,
            "crushed": 1,
            "northed": 1,
            "tied": 2,
            "clearly": 1,
            "sore": 1,
            "winner": 1,
            "threatening": 1,
            "score": 1,
            "short": 1,
            "circuited": 1,
            "needed": 2,
            "avenge": 1,
            "headed": 3,
            "perfect": 2,
            "lucky": 2,
            "bat": 2,
            "hoped": 1,
            "increased": 1,
            "area": 1,
            "teamwork": 1,
            "scored": 1,
            "someone": 1,
            "fine": 1,
            "though": 1,
            "arm": 2,
            "strength": 1,
            "advantage": 1,
            "volunteered": 1,
            "duties": 1,
            "gimmick": 3,
            "thanks": 1,
            "pneumatics": 1,
            "zones": 1,
            "gave": 1,
            "floater": 1,
            "room": 1,
            "generous": 1,
            "old-fashioned": 1,
            "hustle": 1,
            "knowing": 1,
            "stake": 1,
            "disconnected": 1,
            "airline": 1,
            "honestly": 1,
            "cheating": 1,
            "road": 1,
            "slide": 1,
            "tricks": 2,
            "clutchnorp": 1,
            "mono": 1,
            "imano": 1,
            "delayed": 1,
            "riser": 1,
            "modified": 1,
            "slider": 1,
            "everything": 2,
            "learned": 2,
            "inerting": 1,
            "subtle": 1,
            "thought": 1,
            "mutual": 1,
            "respect": 1,
            "michael": 1,
            "passing": 1,
            "torch": 1,
            "kobe": 1,
            "type": 1,
            "moment": 2,
            "lasted": 1,
            "immediately": 1,
            "instead": 2,
            "runs": 1,
            "quickly": 1,
            "hopes": 1,
            "miracle": 1,
            "fair": 1,
            "square": 1,
            "foreign": 2,
            "saw": 1,
            "clear": 1,
            "wall": 1,
            "mad": 1,
            "dash": 1,
            "sight": 2,
            "set": 1,
            "risky": 1,
            "mean": 1,
            "taking": 1,
            "cut": 1,
            "gauntlet": 1,
            "maybe": 1,
            "blast": 2,
            "100": 1,
            "psi": 1,
            "either": 1,
            "triumphant": 1,
            "beautiful": 1,
            "white": 1,
            "rubber": 1,
            "pentagons": 1,
            "almost": 1,
            "reminded": 1,
            "truth": 1,
            "buddies": 1,
            "never": 1,
            "lost": 1,
            "finding": 1,
            "truly": 1,
            "passionate": 1,
            "brings": 1,
            "joy": 1,
            "life": 1,
            "lovely": 1,
            "gifts": 1,
            "protect": 1,
            "cherish": 1,
            "prized": 1,
            "possession": 1,
            "thank": 1,
            "seen": 1,
            "firewood": 2,
            "shopping": 1,
            "robot": 1,
            "shop": 1,
            "jenga": 1,
            "severely": 1,
            "lacking": 1,
            "welcome": 1,
            "heck": 1,
            "crunchlabs": 2,
            "ask": 1,
            "film": 1,
            "20": 1,
            "survive": 1,
            "12": 1,
            "weeks": 1,
            "spend": 1,
            "bored": 1,
            "growing": 1,
            "delightful": 1,
            "ways": 1,
            "slab": 1,
            "subscriber": 1,
            "worry": 1,
            "somehow": 1,
            "subscribed": 1,
            "link": 1,
            "description": 1,
        }
        expected_qualification = 326
        expected_qualified_terms = {
            "ball": 3.541858830019222,
            "just": 3.0209972373693366,
            "like": 2.5001356447194514,
            "time": 2.0834463705995425,
            "air": 1.562584777949657,
            "music": 1.45841245941968,
            "camp": 1.45841245941968,
            "wiffle": 1.3542401408897025,
            "got": 1.2500678223597257,
            "summer": 1.2500678223597257,
            "oh": 1.1458955038297485,
            "jimmy": 1.1458955038297485,
            "second": 1.1458955038297485,
            "balls": 1.0417231852997713,
            "base": 1.0417231852997713,
            "right": 0.9375508667697942,
            "really": 0.9375508667697942,
            "game": 0.9375508667697942,
            "baseball": 0.9375508667697942,
            "kyle": 0.9375508667697942,
            "crunch": 0.833378548239817,
            "baron": 0.833378548239817,
            "weekly": 0.833378548239817,
            "home": 0.833378548239817,
            "wrenches": 0.833378548239817,
            "little": 0.833378548239817,
            "know": 0.833378548239817,
            "let": 0.833378548239817,
            "strike": 0.72920622970984,
            "curve": 0.72920622970984,
            "run": 0.72920622970984,
            "playing": 0.72920622970984,
            "good": 0.72920622970984,
            "come": 0.72920622970984,
            "league": 0.72920622970984,
            "world": 0.72920622970984,
            "throw": 0.72920622970984,
            "wasn": 0.6250339111798628,
            "big": 0.6250339111798628,
            "make": 0.6250339111798628,
            "going": 0.6250339111798628,
            "players": 0.6250339111798628,
            "think": 0.6250339111798628,
            "smooth": 0.6250339111798628,
            "astronaut": 0.6250339111798628,
            "gonna": 0.6250339111798628,
            "team": 0.6250339111798628,
            "best": 0.6250339111798628,
            "final": 0.6250339111798628,
            "long": 0.6250339111798628,
            "captain": 0.5208615926498856,
            "sort": 0.5208615926498856,
            "inning": 0.5208615926498856,
            "took": 0.5208615926498856,
            "hit": 0.5208615926498856,
            "build": 0.5208615926498856,
            "curving": 0.5208615926498856,
            "did": 0.5208615926498856,
            "play": 0.5208615926498856,
            "week": 0.5208615926498856,
            "lights": 0.5208615926498856,
            "want": 0.5208615926498856,
            "exactly": 0.5208615926498856,
            "pitch": 0.5208615926498856,
            "pitching": 0.5208615926498856,
            "don": 0.5208615926498856,
            "effect": 0.4166892741199085,
            "field": 0.4166892741199085,
            "making": 0.4166892741199085,
            "professional": 0.4166892741199085,
            "impossible": 0.4166892741199085,
            "means": 0.4166892741199085,
            "head": 0.4166892741199085,
            "meant": 0.4166892741199085,
            "mega": 0.4166892741199085,
            "bat": 0.4166892741199085,
            "getting": 0.4166892741199085,
            "finally": 0.4166892741199085,
            "curves": 0.4166892741199085,
            "norpedo": 0.4166892741199085,
            "struck": 0.4166892741199085,
            "street": 0.4166892741199085,
            "great": 0.4166892741199085,
            "felt": 0.4166892741199085,
            "kept": 0.4166892741199085,
            "okay": 0.4166892741199085,
            "holes": 0.4166892741199085,
            "spring": 0.4166892741199085,
            "challenges": 0.4166892741199085,
            "chance": 0.4166892741199085,
            "speed": 0.4166892741199085,
            "crunchlabs": 0.4166892741199085,
            "slabs": 0.4166892741199085,
            "clutch": 0.4166892741199085,
            "pitcher": 0.4166892741199085,
            "old": 0.4166892741199085,
            "makes": 0.4166892741199085,
            "water": 0.4166892741199085,
            "12": 0.4166892741199085,
            "use": 0.4166892741199085,
            "absolute": 0.4166892741199085,
            "videos": 0.4166892741199085,
            "away": 0.4166892741199085,
            "actually": 0.4166892741199085,
            "used": 0.4166892741199085,
            "versus": 0.4166892741199085,
            "work": 0.4166892741199085,
            "unfortunately": 0.4166892741199085,
            "backup": 0.3125169555899314,
            "ll": 0.3125169555899314,
            "strikes": 0.3125169555899314,
            "straight": 0.3125169555899314,
            "strikeout": 0.3125169555899314,
            "win": 0.3125169555899314,
            "level": 0.3125169555899314,
            "player": 0.3125169555899314,
            "possible": 0.3125169555899314,
            "look": 0.3125169555899314,
            "gimmick": 0.3125169555899314,
            "challenge": 0.3125169555899314,
            "video": 0.3125169555899314,
            "plate": 0.3125169555899314,
            "plan": 0.3125169555899314,
            "super": 0.3125169555899314,
            "came": 0.3125169555899314,
            "mark": 0.3125169555899314,
            "engineer": 0.3125169555899314,
            "way": 0.3125169555899314,
            "metal": 0.3125169555899314,
            "center": 0.3125169555899314,
            "crowd": 0.3125169555899314,
            "month": 0.3125169555899314,
            "mound": 0.3125169555899314,
            "space": 0.3125169555899314,
            "normal": 0.3125169555899314,
            "feels": 0.3125169555899314,
            "answer": 0.3125169555899314,
            "series": 0.3125169555899314,
            "learn": 0.3125169555899314,
            "contact": 0.3125169555899314,
            "aka": 0.3125169555899314,
            "headed": 0.3125169555899314,
            "things": 0.3125169555899314,
            "year": 0.3125169555899314,
            "yeah": 0.3125169555899314,
            "wrench": 0.3125169555899314,
            "say": 0.3125169555899314,
            "taste": 0.3125169555899314,
            "knew": 0.3125169555899314,
            "dinner": 0.3125169555899314,
            "childhood": 0.3125169555899314,
            "bit": 0.3125169555899314,
            "pros": 0.3125169555899314,
            "surface": 0.3125169555899314,
            "eye": 0.20834463705995426,
            "deep": 0.20834463705995426,
            "personal": 0.20834463705995426,
            "reserve": 0.20834463705995426,
            "expect": 0.20834463705995426,
            "experiment": 0.20834463705995426,
            "perfect": 0.20834463705995426,
            "decided": 0.20834463705995426,
            "tied": 0.20834463705995426,
            "experiments": 0.20834463705995426,
            "proud": 0.20834463705995426,
            "virtual": 0.20834463705995426,
            "outs": 0.20834463705995426,
            "data": 0.20834463705995426,
            "fashioned": 0.20834463705995426,
            "fast": 0.20834463705995426,
            "featuring": 0.20834463705995426,
            "opposite": 0.20834463705995426,
            "alive": 0.20834463705995426,
            "feeling": 0.20834463705995426,
            "crunchlab": 0.20834463705995426,
            "park": 0.20834463705995426,
            "delay": 0.20834463705995426,
            "dream": 0.20834463705995426,
            "physics": 0.20834463705995426,
            "discovery": 0.20834463705995426,
            "question": 0.20834463705995426,
            "oklahoma": 0.20834463705995426,
            "problem": 0.20834463705995426,
            "airflow": 0.20834463705995426,
            "victory": 0.20834463705995426,
            "platinum": 0.20834463705995426,
            "diagonally": 0.20834463705995426,
            "ve": 0.20834463705995426,
            "real": 0.20834463705995426,
            "usually": 0.20834463705995426,
            "pitches": 0.20834463705995426,
            "red": 0.20834463705995426,
            "demonstrated": 0.20834463705995426,
            "printed": 0.20834463705995426,
            "ahead": 0.20834463705995426,
            "rely": 0.20834463705995426,
            "power": 0.20834463705995426,
            "engineering": 0.20834463705995426,
            "demogorgon": 0.20834463705995426,
            "timer": 0.20834463705995426,
            "visit": 0.20834463705995426,
            "noticed": 0.20834463705995426,
            "left": 0.20834463705995426,
            "learned": 0.20834463705995426,
            "lead": 0.20834463705995426,
            "gosh": 0.20834463705995426,
            "laughter": 0.20834463705995426,
            "gravity": 0.20834463705995426,
            "labs": 0.20834463705995426,
            "14": 0.20834463705995426,
            "greatest": 0.20834463705995426,
            "green": 0.20834463705995426,
            "grew": 0.20834463705995426,
            "kitchen": 0.20834463705995426,
            "kind": 0.20834463705995426,
            "kids": 0.20834463705995426,
            "juicy": 0.20834463705995426,
            "journey": 0.20834463705995426,
            "half": 0.20834463705995426,
            "handle": 0.20834463705995426,
            "happy": 0.20834463705995426,
            "instead": 0.20834463705995426,
            "years": 0.20834463705995426,
            "heading": 0.20834463705995426,
            "10": 0.20834463705995426,
            "hemispheres": 0.20834463705995426,
            "hour": 0.20834463705995426,
            "high": 0.20834463705995426,
            "zero": 0.20834463705995426,
            "hits": 0.20834463705995426,
            "hitting": 0.20834463705995426,
            "legend": 0.20834463705995426,
            "legends": 0.20834463705995426,
            "legs": 0.20834463705995426,
            "fun": 0.20834463705995426,
            "firewood": 0.20834463705995426,
            "new": 0.20834463705995426,
            "neighborhood": 0.20834463705995426,
            "needed": 0.20834463705995426,
            "need": 0.20834463705995426,
            "flow": 0.20834463705995426,
            "flowing": 0.20834463705995426,
            "fly": 0.20834463705995426,
            "monster": 0.20834463705995426,
            "moment": 0.20834463705995426,
            "foreign": 0.20834463705995426,
            "missed": 0.20834463705995426,
            "free": 0.20834463705995426,
            "mechanism": 0.20834463705995426,
            "3d": 0.20834463705995426,
            "actual": 0.20834463705995426,
            "gate": 0.20834463705995426,
            "geared": 0.20834463705995426,
            "zone": 0.20834463705995426,
            "major": 0.20834463705995426,
            "machines": 0.20834463705995426,
            "lucky": 0.20834463705995426,
            "lots": 0.20834463705995426,
            "lot": 0.20834463705995426,
            "loaded": 0.20834463705995426,
            "lived": 0.20834463705995426,
            "limited": 0.20834463705995426,
            "light": 0.20834463705995426,
            "roof": 0.20834463705995426,
            "pretty": 0.20834463705995426,
            "applause": 0.20834463705995426,
            "bunch": 0.20834463705995426,
            "station": 0.20834463705995426,
            "calling": 0.20834463705995426,
            "started": 0.20834463705995426,
            "stadium": 0.20834463705995426,
            "causing": 0.20834463705995426,
            "spots": 0.20834463705995426,
            "spoon": 0.20834463705995426,
            "split": 0.20834463705995426,
            "spins": 0.20834463705995426,
            "arm": 0.20834463705995426,
            "champ": 0.20834463705995426,
            "soccer": 0.20834463705995426,
            "slowly": 0.20834463705995426,
            "skills": 0.20834463705995426,
            "similar": 0.20834463705995426,
            "signaling": 0.20834463705995426,
            "sight": 0.20834463705995426,
            "coincide": 0.20834463705995426,
            "short": 0.20834463705995426,
            "ship": 0.20834463705995426,
            "com": 0.20834463705995426,
            "comes": 0.20834463705995426,
            "sense": 0.20834463705995426,
            "california": 0.20834463705995426,
            "tricks": 0.20834463705995426,
            "seams": 0.20834463705995426,
            "brass": 0.20834463705995426,
            "ticket": 0.20834463705995426,
            "thrown": 0.20834463705995426,
            "throwing": 0.20834463705995426,
            "tiny": 0.20834463705995426,
            "today": 0.20834463705995426,
            "told": 0.20834463705995426,
            "baseballs": 0.20834463705995426,
            "torpedo": 0.20834463705995426,
            "thing": 0.20834463705995426,
            "toy": 0.20834463705995426,
            "test": 0.20834463705995426,
            "bag": 0.20834463705995426,
            "teach": 0.20834463705995426,
            "bad": 0.20834463705995426,
            "better": 0.20834463705995426,
            "swing": 0.20834463705995426,
            "backyard": 0.20834463705995426,
            "blast": 0.20834463705995426,
            "sure": 0.20834463705995426,
            "boring": 0.20834463705995426,
            "traveling": 0.20834463705995426,
            "boxes": 0.20834463705995426,
            "brain": 0.20834463705995426,
            "concepts": 0.20834463705995426,
            "returned": 0.20834463705995426,
            "said": 0.20834463705995426,
            "round": 0.20834463705995426,
            "school": 0.20834463705995426,
            "science": 0.20834463705995426,
            "rushing": 0.20834463705995426,
            "course": 0.20834463705995426,
            "cool": 0.20834463705995426,
        }
        self.assertEqual(freq_dist, expected_freq_dist)
        self.assertEqual(qualification, expected_qualification)
        self.assertEqual(qualified_terms, expected_qualified_terms)


if __name__ == "__main__":
    unittest.main()
