# -*- coding: utf-8 -*-
#!/usr/bin/env python3
"""Convert CppCheck XML to Code Quality JSON

CppCheck is a useful tool to lint C/C++ code, checking for errors and code smells.
Developer tools, such as GitLab, can display useful insights about code quality,
when given a JSON report file defined by Code Climate.

This tool converts the XML report generated by CppCheck into a JSON file, as
defined by Code Climate.

Example:
    cppcheck --xml --enable=warning,style,performance ./src 2> cppcheck.xml
    python3 -m cppcheck-codequality -f cppcheck.xml -o cppcheck.json

References:
  - https://codeclimate.com
  - http://cppcheck.sourceforge.net
  - https://docs.gitlab.com/ee/user/project/merge_requests/code_quality.html#implementing-a-custom-tool

SPDX-License-Identifier: MIT
"""

import hashlib
import json
import logging
import os
import typing
from copy import deepcopy

# third-party
import xmltodict

__version__ = "1.3.0"

log = logging.getLogger(__name__)

# Source: https://github.com/codeclimate/platform/blob/master/spec/analyzers/SPEC.md#data-types
CODE_QUAL_ELEMENT = {
    "type": "issue",
    "severity": "--GITLAB-REQUIREMENT--",
    "check_name": "--CODE-CLIMATE-REQUIREMENT--",
    "description": "--CODE-CLIMATE-REQUIREMENT--",
    "categories": "--CODE-CLIMATE-REQUIREMENT--",
    "fingerprint": "--GITLAB-REQUIREMENT--",
    "location": {"path": "", "positions": {"begin": {"line": -1, "column": -1}}},
}


def _get_codeclimate_category(cppcheck_severity: str) -> str:
    """Get Code Climate category, from CppCheck severity string

    CppCheck: error, warning, style, performance, portability, information
    CodeQuality: Bug Risk, Clarity, Compatibility, Complexity, Duplication,
                 Performance, Security, Style
    """
    map_severity_to_category = {
        "error": "Bug Risk",
        "warning": "Bug Risk",
        "style": "Style",
        "performance": "Performance",
        "portability": "Compatibility",
        "information": "Style",
    }
    return map_severity_to_category[cppcheck_severity]


def _get_codeclimate_severity(cppcheck_severity: str) -> str:
    """Get Code Climate severity, from CppCheck severity string

    CodeQuality: info, minor, major, critical, blocker
    """
    map_severity_to_severity = {
        "error": "critical",
        "warning": "major",
        "style": "minor",
        "performance": "minor",
        "portability": "minor",
        "information": "info",
    }
    return map_severity_to_severity[cppcheck_severity]


def convert_file(
    fname_in: str, fname_out: str, base_dirs: typing.Optional[list] = None
) -> int:
    """Convert CppCheck XML file to GitLab-compatible "Code Quality" JSON report

    Args:
        fname_in (str):
          Input file path (CppCheck XML). Like 'cppcheck.xml'.
        fname_out (str):
          Output file path (code quality JSON). Like 'cppcheck.json'.
        base_dir (str):
          Base directory where source files with relative paths can be found.

    Returns:
        int: If processing failed, a negative value. If successful, number of
          CppCheck issues processed.
    """
    fin = None
    json_out_str = ""
    num_cq_issues_converted = 0

    fname_in = os.path.abspath(fname_in)

    if not os.path.isfile(fname_in):
        log.error(
            "Input (CppCheck XML) file does not exist or cannot be opened -- '%s'",
            fname_in,
        )
        return -1

    log.debug("Reading input file: %s", fname_in)
    with open(fname_in, mode="rt", encoding="utf-8", errors="backslashreplace") as fin:
        json_out_str, num_cq_issues_converted = _convert(
            fin.read(), base_dirs=base_dirs
        )

    log.debug("Writing output file: %s", fname_out)
    with open(fname_out, "w", encoding="utf-8") as f_out:
        f_out.write(json_out_str)

    return num_cq_issues_converted


def _get_line_from_file(
    filename: str, line_number: int, base_dirs: typing.Optional[typing.List[str]]
) -> str:
    """Return a specific line in a file as a string.

    I've found that linecache.getline() will end up raising a UnicodeDecodeError
    if the source file we're opening has non-UTF-8 characters in it. So, here,
    we're explicitly escaping those bad characters.

    Side note, it seems CppCheck v2.0+ will generate a 'syntaxError' for
    "unhandled characters", so you could find these issues with your source code
    more easily.

    Args:
        filename (str):
          Name of file to open and read line from
        line_number (int):
          Number of the line to extract. Line number starts at 1.

    Returns:
        str: Contents of the specified line.
    """
    max_line_cnt = 0
    if line_number <= 0:
        return str(filename) + "<the whole file>"

    if not os.path.isabs(filename) and isinstance(base_dirs, list):
        for d in base_dirs:
            tmp_path = os.path.join(d, filename)
            if os.path.isfile(tmp_path):
                filename = tmp_path
                break

    filename = os.path.abspath(filename)
    if not os.path.isfile(filename):
        raise FileNotFoundError(
            "Source code file does not exist or cannot be opened. Missing a base directory?\n--> '{}'".format(
                filename
            )
        )

    with open(filename, mode="rt", encoding="utf-8", errors="backslashreplace") as fin:
        for i, line in enumerate(fin):
            if (i + 1) == line_number:
                # log.debug("Extracted line %s:%d", filename, line_number)
                return line
            max_line_cnt += 1

    log.warning(
        "Only %d lines in file. Can't read line %d from '%s'",
        max_line_cnt,
        line_number,
        filename,
    )
    return "Can't read line {} from a {} line file".format(line_number, max_line_cnt)


def _convert(
    xml_input, base_dirs: typing.Optional[typing.List[str]] = None
) -> typing.Tuple[str, int]:
    """Convert CppCheck XML to Code Climate JSON

    Note:
        There isn't a great 1:1 conversion from CppCheck's "severity" level, to
        the Code Climate's "categories." To prevent information loss, the
        original CppCheck severity is appended to the category list.

        In the future, maybe this conversion can be made using CppCheck's "id"
        or check name.

    Args:
        fname_in (str): Filename of the XML from CppCheck
        fname_out (str): Filename to write the JSON output

    Returns:
        Tuple, where the first element, a string, is the JSON conversion result
        and the second element, an int, is the number of issues converted.
    """

    dict_in = xmltodict.parse(xml_input=xml_input)

    if len(dict_in) == 0:
        log.info("Empty file imported. Skipping...")
        return ("[ ]", 0)

    cppcheck_ver_str = dict_in["results"]["cppcheck"]["@version"]
    if cppcheck_ver_str < "1.82":
        log.warning("\nWARNING: This was tested against a newer version of CppCheck")

    dict_out = []

    # Ensure this XML report has errors to convert
    if not isinstance(dict_in["results"]["errors"], dict):
        log.warning("No <errors> in XML file. Nothing to do.")
        return (json.dumps(dict_out), 0)

    if not isinstance(dict_in["results"]["errors"]["error"], list):
        dict_in["results"]["errors"]["error"] = list(
            [dict_in["results"]["errors"]["error"]]
        )

    # log.debug("Got the following dict:\n%s\n", str(dict_in))
    # log.debug("Type is {}\n".format(str(type(dict_in["results"]["errors"]))))
    # log.debug("Type is {}\n".format(str(type(dict_in["results"]["errors"]["error"]))))

    for error in dict_in["results"]["errors"]["error"]:

        log.debug("Processing -- %s", str(error))

        # Some information messages are not related to the code.
        # Let's let the user know, then skip.
        if "location" not in error:
            log.info("No file location. Skipping the below issue:\n  %s", error["@msg"])
            continue

        tmp_dict = dict(CODE_QUAL_ELEMENT)
        rule = error["@id"]
        tmp_dict["check_name"] = "cppcheck[" + rule.strip() + "]"
        if rule == "ConfigurationNotChecked":
            log.warning(
                "A 'ConfigurationNotChecked' message was found.\n  %s", error["@msg"]
            )

        tmp_dict["categories"] = list(
            _get_codeclimate_category(error["@severity"]).split("\n")
        )
        tmp_dict["severity"] = _get_codeclimate_severity(error["@severity"])
        tmp_dict["description"] = error["@msg"]

        path = ""
        line = -1
        column = -1
        if isinstance(error["location"], list):
            if "@file0" in error["location"][0]:
                tmp_dict["description"] = "Also see source file: {}\n\n{}".format(
                    error["location"][0]["@file0"], tmp_dict["description"]
                )

            path = error["location"][0]["@file"]
            line = int(error["location"][0]["@line"])
            column = 0
            if "@column" in error["location"][0]:
                column = int(error["location"][0]["@column"])

            for i in range(1, len(error["location"])):
                loc_other = dict(CODE_QUAL_ELEMENT["location"])
                loc_other["path"] = error["location"][i]["@file"]
                loc_other["positions"]["begin"]["line"] = int(
                    error["location"][i]["@line"]
                )

                extra_col = 0
                if "@column" in error["location"][i]:
                    extra_col = int(error["location"][i]["@column"])
                loc_other["positions"]["begin"]["column"] = extra_col

                if "other_locations" not in tmp_dict:
                    tmp_dict["other_locations"] = []
                tmp_dict["other_locations"].append(deepcopy(loc_other))
        else:
            if "@file0" in error["location"]:
                tmp_dict["description"] = "Also see source file: {}\n\n{}".format(
                    error["location"]["@file0"], tmp_dict["description"]
                )

            path = error["location"]["@file"]
            line = int(error["location"]["@line"])

            column = 0
            if "@column" in error["location"]:
                column = int(error["location"]["@column"])

        tmp_dict["location"]["path"] = path
        tmp_dict["location"]["positions"]["begin"]["line"] = line
        tmp_dict["location"]["positions"]["begin"]["column"] = column

        tmp_dict["content"] = {"body": ""}

        if "@cwe" in error:
            cwe_id = error["@cwe"]
            tmp_dict["description"] = "{} (CWE-{})".format(
                tmp_dict["description"], cwe_id
            )

        # GitLab requires the fingerprint field. Code Climate describes this as
        # being used to uniquely identify the issue, so users could "exclude it
        # from future analysis."
        #
        # The components of the fingerprint aren't well defined, but Code Climate
        # has some examples here:
        # https://github.com/codeclimate/codeclimate-duplication/blob/1c118a13b28752e82683b40d610e5b1ee8c41471/lib/cc/engine/analyzers/violation.rb#L83
        # https://github.com/codeclimate/codeclimate-phpmd/blob/7d0aa6c652a2cbab23108552d3623e69f2a30282/tests/FingerprintTest.php

        codeline = _get_line_from_file(
            filename=path, line_number=line, base_dirs=base_dirs
        ).strip()

        fingerprint_str = "cppcheck-" + rule + "-" + path + "-" + codeline
        log.debug("Fingerprint string: '%s'", fingerprint_str)
        tmp_dict["fingerprint"] = hashlib.md5(
            (fingerprint_str).encode("utf-8")
        ).hexdigest()

        # Append this record
        dict_out.append(deepcopy(tmp_dict))

    if len(dict_out) == 0:
        log.warning("Result is empty")
    return (json.dumps(dict_out), len(dict_out))


if __name__ == "__main__":
    import warnings

    warnings.warn(
        "use 'python3 -m cppcheck_codequality', not 'python3 -m cppcheck_codequality.__init__'",
        DeprecationWarning,
    )
