WEAKNESS_BEFORE_STOP_WORDS = r' ((has|contains) (an |a |)|(was|were) (found|discovered) to contain a )'
WEAKNESS_PATTERN = r'(?P<weakness>.*?)'
WEAKNESS_AFTER_STOP_WORDS = r'( ((V|v)ulnerabilit(y|ies)|issue[s]?)( (was|were) (discovered|found)|)|)'

COMPONENT_BEFORE_STOP_WORDS = r' (in (the |its |)|(starting |)at )'
COMPONENT_PATTERN = r'(?P<component>.+?)'
COMPONENT_AFTER_STOP_WORDS = r'(file|component|feature|function|method)'

DATE_VERSION_PATTERN = r'(before|through) \d{4}-\d{2}-\d{2}'
VERSION_PATTERN = r'\b(?:v\.|v|R)*(?:\d+(?:\.(?:\d+|x))+)(?:\-\d|a)*\b'
COMMIT_VERSION_PATTERN = r'\b[0-9a-f]{5,40}\b'

CONFIGURATION_BEFORE_STOP_WORDS = r' (in|of|on) (some |the |a |)'
CONFIGURATION_PATTERN = (r'(?P<configuration>[a-zA-Z0-9-\/\(\)]{2,}.+?(?:' + VERSION_PATTERN + r'|' + COMMIT_VERSION_PATTERN
                         + r'|' + DATE_VERSION_PATTERN + r')(?: and (?:before|prior|earlier|below)| all versions|))')

ATTACKER_BEFORE_STOP_WORDS = r' (could |which |may |might |)(allow|enabl[e]?)(s|ed|ing|) (a |an |)'
ATTACKER_PATTERN = r'(?P<attacker>.*?(attacker|user|server)[s]?)'

IMPACT_BEFORE_STOP_WORDS = r' (to (cause |lead |execute |run |crash |overwrite |corrupt |take |perform |obtain |achieve |remotely |potentially (enable |))(a |)|resulting in|leading to |causing a )'
IMPACT_PATTERN = r'(?P<impact>.+?)'

VECTOR_BEFORE_STOP_WORDS = r' (via|by) (an |a |vectors |)'
VECTOR_PATTERN = r'(?P<vector>.+?)'

ROOT_CAUSE_BEFORE_STOP_WORD = r' (because (of|a|the|)|when|due to) '
ROOT_CAUSE_PATTERN = r'(?P<root_cause>.+?)'


FUNCTION_NAME_PATTERN = r'[a-zA-Z_][a-zA-Z0-9_:\(\)]+'
EXTENSION_PATTERN = r"\.[a-zA-Z0-9]{1,4}"
DIR_FILE_NAME_PATTERN = r"[a-zA-Z0-9_-]+"
FILE_PATTERN = DIR_FILE_NAME_PATTERN + EXTENSION_PATTERN
RELATIVE_PATH_PATTERN = DIR_FILE_NAME_PATTERN + r"(?:/(?:" + DIR_FILE_NAME_PATTERN + r"))+" + EXTENSION_PATTERN


REPLACEMENT_PHRASES = {
    r'via the (' + FUNCTION_NAME_PATTERN + r') function in the component (.+?)\.$': r'in the \1 function at \2 component.',
    r'via the (' + FUNCTION_NAME_PATTERN + r') (function|component) (at|in) (.+?)\.$': r'in the \1 at \4.',
    r'via the (function|component) (' + FUNCTION_NAME_PATTERN + r') (at|in) (.+?)\.$': r'in the \2 \1 at \4.',
    r'via (' + FUNCTION_NAME_PATTERN + r') class in (.+?)\.$': r'in the \1 at \2.',
    r'via the (' + FUNCTION_NAME_PATTERN + r') function.$': r'in the \1 function.',
    r'via the function (' + FUNCTION_NAME_PATTERN + r').$': r'in the \1 function.',
    r'via the component (.+?)\.$': r'in the \1.',
    r'in function (' + FUNCTION_NAME_PATTERN + r') in (.+?) in': r'in \1 function at \2 in',
    r'at (.+?) in (' + FUNCTION_NAME_PATTERN + r') in ' + CONFIGURATION_PATTERN: r'in \2 at \1 in \3',
    r'out of bounds': r'out-of-bounds',
    r'leads to a': r'leading to',
    r'^There is a': r'A',
    r'(\d+), ([a-z]+)': r'\1 \2',
    r'^In ' + CONFIGURATION_PATTERN + r' there is a': r'\1 has a',
    r'^(?:In |)' + CONFIGURATION_PATTERN + r' ((?:a |).+?)(?: vulnerability|) exists in': r'\1 has \3 in',
    r'^(' + FUNCTION_NAME_PATTERN + r') (in) (' + RELATIVE_PATH_PATTERN + r')' + CONFIGURATION_BEFORE_STOP_WORDS : r'\1 \2 \3 file \4 ',
    r'(' + FUNCTION_NAME_PATTERN + r') function in (' + FILE_PATTERN + r')' + CONFIGURATION_BEFORE_STOP_WORDS : r'\1 function at \2 file \3 ',
    r'^(' + FUNCTION_NAME_PATTERN + r') (in) (' + FILE_PATTERN + r')' + CONFIGURATION_BEFORE_STOP_WORDS : r'\1 \2 \3 file \4 ',
    r'^(' + RELATIVE_PATH_PATTERN + r')' + CONFIGURATION_BEFORE_STOP_WORDS: r'\1 file \2 ',
    r'^(' + FILE_PATTERN + r')' + CONFIGURATION_BEFORE_STOP_WORDS: r'\1 file \2 '
}

NOISE_PATTERNS = [
    r'\s+\(.*?\)',
    r'\s+\[CWE-\d{1,4}\]',
    r' a different vulnerability than CVE-\d{4}-\d{4,7}',
    r' a related issue to CVE-\d{4}-\d{4,7}',
    r' and CVE-\d{4}-\d{4,7}',
    r' fixed in ' + VERSION_PATTERN,
    r'can be abused '
]
