#!/usr/bin/env python3

import sys, getopt, os
import simplejson as json
#sys.path.append ('/Users/darnaud/dev/geo/opentrep/_skbuild/macosx-10.15-x86_64-3.8/cmake-install/lib')
#sys.path.append ('/Users/darnaud/dev/geo/opentrep/_skbuild/macosx-10.15-x86_64-3.8/cmake-install/lib/python3.8/site-packages/pyopentrep')
import Travel_pb2

# Usage
def usage (script_name = 'pyopentrep'):
    print
    print ("That script searches for, against a Xapian-indexed database of")
    print ("travel-related terms, place key words. It allows for some")
    print ("mis-spelling. For instance: 'sna francisco rio de janero los angeles reykyavki'")
    print ("will retrieve the details of San Francisco, CA,")
    print ("Rio de Janeiro, Brasil, Los Angeles, CA and Reykjavik, Iceland.")
    print
    print ("The output is a mere string giving the retrieved place codes")
    print ("associated to their correspondong matching weights. It can:")
    print (" - be a single line with only the place codes and matching weigths;")
    print (" - give the full details, as returned by the C++ library;")
    print (" - return a JSON-formatted string with all the details")
    print
    print ("Usage: %s [options] \"search string\"" % script_name)
    print
    print ("The default output is short.")
    print
    print ("Options:")
    print ("  -h, --help         : outputs this help and exits")
    print ("  -i, --index        : performs the indexation with Xapian")
    print ("  -g, --generate=    : randomly draws the documents from Xapian")
    print ("  -d, --xapiandb=    : file-path to the Xapian index/database")
    print ("  -t, --sqldbtype=   : type of the SQL DB (noDB, sqlite, mysql)")
    print ("  -s, --sqldbconx=   : specifies the connection string for the SQL DB")
    print ("  -m, --deploymentdb=: deployment number/version")
    print ("  -f, --format=      : format of the output: Short (S, default),")
    print ("                       Full (F), raw JSON (J), ")
    print ("                       Interpretation from JSON (I) or from Protobuf (P)")
    print

# Conversion of a string into a boolean
def str2bool (v):
    return v.lower() in ("yes", "true", "t", "1")

# Handle command-line options
def handle_opt():
    try:
        opts, args = getopt.getopt (sys.argv[1:], "hif:d:t:s:g:m:",
                        ["help", "index", "format=", "xapiandb=", "sqldbtype=",
                         "sqldbconx=", "generate", "deploymentnb="])
    except getopt.GetoptError as err:
        # Print help information and exit. It will print something like
        # "option -a not recognized"
        print (str(err))
        usage()
        sys.exit(2)

    # Command, either 'search' or 'index'
    command = 'search'

    # Format of the output
    outputFormat = 'S'

    # Number of draws
    nbOfDraws = 1

    # Default search string
    searchString = 'sna francisco rio de janero los angeles reykyavki'
    if len (args) >= 1:
        searchString = ' '.join (args)

    # Options
    xapianDBPath = "/tmp/opentrep/xapian_traveldb"
    sqlDBType = "nodb"
    sqlDBConnStr = "/tmp/opentrep/sqlite_travel.db"
    deploymentNumber = 0
    for o, a in opts:
        if o in ("-h", "--help"):
            usage (sys.argv[0])
            sys.exit()
        elif o in ("-i", "--index"):
            command = "index"
        elif o in ("-f", "--format"):
            outputFormat = a
        elif o in ("-d", "--xapiandb"):
            xapianDBPath = a
        elif o in ("-t", "--sqldbtype"):
            sqlDBType = a
        elif o in ("-s", "--sqldbconx"):
            sqlDBConnStr = a
        elif o in ("-g", "--generate"):
            command = "generate"
            nbOfDraws = int (a)
        elif o in ("-m", "--deploymentnb"):
            deploymentNumber = int (a)
        else:
            assert False, "Unhandled option"
    return (command, outputFormat, xapianDBPath, sqlDBType, sqlDBConnStr,
            deploymentNumber, searchString, nbOfDraws)

# Parser helpers
def getMain (locations):
    return locations[:3]

##
# Compact result parser. The result string contains the main matches,
# separated by commas (','), along with their associated weights, given
# as percentage numbers. For every main match:
#  - Columns (':') separate potential extra matches (i.e., matches with the same
#    matching percentage).
#  - Dashes ('-') separate potential alternate matches (i.e., matches with lower
#    matching percentages).
#
# Samples of result string to be parsed:
#  - pyopentrep -f S nice sna francisco vancouver niznayou
#    - 'nce/100,sfo/100-emb/98-jcc/97,yvr/100-cxh/83-xea/83-ydt/83;niznayou'
#  - pyopentrep -f S fr
#    - 'aur:avf:bae:bou:chr:cmf:cqf:csf:cvf:dij/100'
def compactResultParser (resultString):
    # Defaults
    msg, unrecognized = '', ''
    codes, locations, alter_locations = [], [], []

    # Initialise the string of matches with the raw result
    str_matches = resultString

    # Parsing begins
    # 1. First, strip out the unrecognised keywords
    if ';' in str_matches:
        str_matches, unrecognized = str_matches.split(';', 1)

    msg = 'Unrecognised: %s. ' % unrecognized
    str_value = unrecognized

    # 2. Then, for each matching location, the alternate matches have to be
    #    stored aside.
    if not str_matches:
        return ''

    else:
        alter_locations = str_matches.split(',')
        locations = [getMain(x) for x in alter_locations]

        for alter_location_list in alter_locations:
            alter_location_list = alter_location_list.split('-')
            for extra_location_list in alter_location_list:
                extra_location_list = extra_location_list.split(':')

                codes = [x[:3].upper() for x in alter_locations]
                if codes:
                    form_value = [' '.join(codes)]
                if str_value:
                    form_value.append (str_value)

        return ' '.join (form_value)

##
# JSON interpreter. The JSON structure contains a list with the main matches,
# along with their associated fields (weights, coordinates, etc).
# For every main match:
#  - There is a potential list of extra matches (i.e., matches with the same
#    matching percentage).
#  - There is a potential list of alternate matches (i.e., matches with lower
#    matching percentages).
#
# Samples of result string to be parsed:
#  - pyopentrep -fJ "nice sna francisco"
#    - {'locations':[
#         {'names':[
#            {'name': 'Nice Airport'}, {'name': 'Nice Côte d'Azur International Airport'}],
#          'cities': { 'city_details': { 'iata_code': 'NCE' } },
#         {'names':[
#            {'name': 'San Francisco Apt'}, {'name': 'San Francisco Intl. Airport'}],
#          'cities': { 'city_details': { 'iata_code': 'SFO' } },
#      ]}
#
def interpretFromJSON (jsonFormattedResult):
    parsedStruct = json.loads (jsonFormattedResult)
    interpretedString = ''
    for location in parsedStruct['locations']:
        interpretedString += location['iata_code'] + '-' 
        interpretedString += location['icao_code'] + '-' 
        interpretedString += location['geonames_id'] + ' ' 
        interpretedString += '(' + location['page_rank'] + '%) / '
        interpretedString += location['cities']['city_details']['iata_code']+": "
        interpretedString += location['lat'] + ' ' 
        interpretedString += location['lon'] + '; '

    #
    return interpretedString

##
# Protobuf interpreter. The Protobuf structure contains a list with the
# main matches, along with their associated fields (weights, coordinates, etc).
def interpretFromProtobuf (protobufFormattedResult):
    unmatchedKeywordString, interpretedString = '', ''

    # DEBUG
    #print (f"DEBUG - Protobuf (array of bytes): {protobufFormattedResult}")
    
    # Protobuf
    from google.protobuf.message import DecodeError
    queryAnswer = Travel_pb2.QueryAnswer()
    try:
        queryAnswer.ParseFromString (protobufFormattedResult)
    except DecodeError as err:
        print ('Error in decoding. Will continue though')
        print ('DEBUG - {}'.format (str(queryAnswer)))

    # List of recognised places
    placeList = queryAnswer.place_list

    # DEBUG
    #print ('DEBUG - Result: ' + str(placeList))

    for place in placeList.place:
        airport_code = place.tvl_code
        interpretedString += airport_code.code + '-' 
        icao_code = place.icao_code
        interpretedString += icao_code.code + '-'
        geoname_id = place.geonames_id
        interpretedString += str(geoname_id.id) + ' '
        page_rank = place.page_rank
        interpretedString += '(' + str(page_rank.rank) + '%) / '
        city_list = place.city_list.city
        city = Travel_pb2.City()
        for svd_city in city_list:
            city = svd_city
        interpretedString += str(city.code.code) + ": "
        geo_point = place.coord
        interpretedString += str(geo_point.latitude) + ' ' 
        interpretedString += str(geo_point.longitude) + '; '

    # List of un-matched keywords
    unmatchedKeywords = queryAnswer.unmatched_keyword_list

    for keyword in unmatchedKeywords.word:
        unmatchedKeywordString += keyword

    #
    return unmatchedKeywordString, interpretedString


##
# File-path details
#
def getPaths (openTrepLibrary):
    # Calls the underlying OpenTrep library service
    filePathListString = openTrepLibrary.getPaths()
    filePathList = filePathListString.split(';')
    optd_por_filepath = filePathList[0]
    actual_xapian_db_path = None
    sqlitedb_filepath = None

    #
    if len(filePathList) >= 2:
        actual_xapian_db_path = filePathList[1]

    #
    if len(filePathList) >= 3:
        sqlitedb_filepath = filePathList[2]
    
    # Report the results
    print ("OPTD-maintained list of POR (points of reference): '{}'".\
           format (optd_por_filepath))
    if len(filePathList) >= 2:
        print ("Xapian-based travel database/index: '{}'".\
               format (actual_xapian_db_path))
    if len(filePathList) >= 3:
        print ("SQLite database: '{}'".format (sqlitedb_filepath))

    #
    return (optd_por_filepath, actual_xapian_db_path, sqlitedb_filepath)

##
# Indexation
#
def index (openTrepLibrary, xapianDBPath):
    # DEBUG
    print ("Perform the indexation of the (Xapian-based) travel database.")
    print ("That operation may take several minutes on some slow machines.")
    print ("It takes less than 20 seconds on fast ones...")

    # Calls the underlying OpenTrep library service
    result = openTrepLibrary.index()

    # Report the results
    print ("Done. Indexed " + result + " POR (points of reference)")


##
# Search
#
def generate (openTrepLibrary, nbOfDraws, outputFormat):
    # If no search string was supplied as arguments of the command-line,
    # ask the user for some
    if nbOfDraws <= 0:
        print ('Can not generate ' + str(nbOfDraws) + ' locations')
        return

    # DEBUG
    print ('Number of locations to generate: ' + str(nbOfDraws))

    ##
    # Call the OpenTrep C++ library.
    #
    # The 'I' (Interpretation from JSON) output format is just an example
    # of how to use the output generated by the OpenTrep library. Hence,
    # that latter does not support that "output format". So, the raw JSON
    # format is required, and the JSON string will then be parsed and
    # interpreted by the interpretFromJSON() method, just to show how it
    # works
    opentrepOutputFormat = outputFormat
    if opentrepOutputFormat == 'I': opentrepOutputFormat = 'J'

    result = None
    if opentrepOutputFormat != 'P':
        result = openTrepLibrary.generate (opentrepOutputFormat, nbOfDraws)

    # When the compact format is selected, the result string has to be
    # parsed accordingly.
    if (outputFormat == 'S'):
        parsedStruct = compactResultParser (result)
        print ('Compact format => recognised place (city/airport) codes:')
        print (parsedStruct)
        print ('------------------')

    # When the full details have been requested, the result string is
    # potentially big and complex, and is not aimed to be
    # parsed. So, the result string is just displayed/dumped as is.
    elif (outputFormat == 'F'):
        print ('Raw result from the OpenTrep library:')
        print (result)
        print ('------------------')

    # When the raw JSON format has been requested, no handling is necessary.
    elif (outputFormat == 'J'):
        print ('Raw (JSON) result from the OpenTrep library:')
        print (result)
        print ('------------------')

    # The interpreted JSON format is an example of how to extract relevant
    # information from the corresponding Python structure. That code can be
    # copied/pasted by clients to the OpenTREP library.
    elif (outputFormat == 'I'):
        interpretedString = interpretFromJSON (result)
        print ('JSON format => recognised place (city/airport) codes:')
        print (interpretedString)
        print ('------------------')

    # The interpreted Protobuf format is an example of how to extract relevant
    # information from the corresponding Python structure. That code can be
    # copied/pasted by clients to the OpenTREP library.
    elif (outputFormat == 'P'):
        result = openTrepLibrary.generateToPB (nbOfDraws)
        
        unmatchedKeywords, interpretedString = interpretFromProtobuf (result)
        print ('Protobuf format => recognised place (city/airport) codes:')
        print (interpretedString)
        print ('------------------')


##
# Search
#
def search (openTrepLibrary, searchString, outputFormat):
    # If no search string was supplied as arguments of the command-line,
    # ask the user for some
    if searchString == '':
        # Ask for the user input
        searchString = raw_input ("Enter a search string, e.g., 'rio de janero sna francisco'")
    if searchString == '':
        searchString = 'rio de janero sna francisco'

    # DEBUG
    print ("searchString: " + searchString)

    ##
    # Call the OpenTrep C++ library.
    #
    # The 'I' (Interpretation from JSON) output format is just an example
    # of how to use the output generated by the OpenTrep library. Hence,
    # that latter does not support that "output format". So, the raw JSON
    # format is required, and the JSON string will then be parsed and
    # interpreted by the interpretFromJSON() method, just to show how it
    # works
    opentrepOutputFormat = outputFormat
    if opentrepOutputFormat == 'I': opentrepOutputFormat = 'J'

    result = None
    if opentrepOutputFormat != 'P':
        result = openTrepLibrary.search (opentrepOutputFormat, searchString)

    # When the compact format is selected, the result string has to be
    # parsed accordingly.
    if (outputFormat == 'S'):
        parsedStruct = compactResultParser (result)
        print ('Compact format => recognised place (city/airport) codes:')
        print (parsedStruct)
        print ('------------------')

    # When the full details have been requested, the result string is
    # potentially big and complex, and is not aimed to be
    # parsed. So, the result string is just displayed/dumped as is.
    elif (outputFormat == 'F'):
        print ('Raw result from the OpenTrep library:')
        print (result)
        print ('------------------')

    # When the raw JSON format has been requested, no handling is necessary.
    elif (outputFormat == 'J'):
        print ('Raw (JSON) result from the OpenTrep library:')
        print (result)
        print ('------------------')

    # The interpreted JSON format is an example of how to extract relevant
    # information from the corresponding Python structure. That code can be
    # copied/pasted by clients to the OpenTREP library.
    elif (outputFormat == 'I'):
        interpretedString = interpretFromJSON (result)
        print ('JSON format => recognised place (city/airport) codes:')
        print (interpretedString)
        print ('------------------')

    # The interpreted Protobuf format is an example of how to extract relevant
    # information from the corresponding Python structure. That code can be
    # copied/pasted by clients to the OpenTREP library.
    elif (outputFormat == 'P'):
        result = openTrepLibrary.searchToPB (searchString)
        unmatchedKeywords, interpretedString = interpretFromProtobuf (result)
        print ('Protobuf format => recognised place (city/airport) codes:')
        print (interpretedString)
        print ('Unmatched keywords:')
        print (unmatchedKeywords)
        print ('------------------')


############################
# Main
############################
if __name__ == '__main__':
    command, outputFormat, xapianDBPath, sqlDBType, sqlDBConnStr, \
        deploymentNumber, searchString, nbOfDraws = handle_opt()

    # Initialize the OpenTrep C++ library
    import pyopentrep
    openTrepLibrary = pyopentrep.OpenTrepSearcher()
    initOK = openTrepLibrary.init (xapianDBPath, sqlDBType, sqlDBConnStr,
                                   deploymentNumber, 'pyopentrep.log')
    if initOK == False:
        errorMsg = 'Error: The OpenTrepSearcher cannot be initialized'
        raise Exception (errorMsg)

    # Print out the file-path details
    (optd_por_filepath, actual_xapian_db_path, sqlitedb_filepath) = \
        getPaths (openTrepLibrary)

    #
    if command == 'index':
        index (openTrepLibrary, xapianDBPath)

    elif command == 'generate':
        generate (openTrepLibrary, nbOfDraws, outputFormat)

    else:
        # Check whether the directory of the Xapian database/index exists
        doesXapianDirExist = os.path.isdir (actual_xapian_db_path)
        if doesXapianDirExist == False:
            print ('Error:\tThe directory of the Xapian database/index ({}) ' \
                   'does not exist or is not accessible.'\
                   .format (actual_xapian_db_path))
            print ('\tEither the indexing process has not run yet, or it has ' \
                   'run with a different deployment number ({} being the ' \
                   'current one).'.format (deploymentNumber))
            print ('\tThe indexer can be re-run by adding the -i option ' \
                   'to this script.')
            sys.exit (2)

        #
        search (openTrepLibrary, searchString, outputFormat)


    # Free the OpenTREP library resource
    openTrepLibrary.finalize()

