#!/usr/bin/env python

import sys
import getopt

from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from esgcet.publish import parseDatasetVersionId
from esgcet.config import loadConfig, initLogging
from esgcet.model import Dataset, DatasetVersion
from esgcet.query import printResult
from esgcet.messaging import warning

usage = """Usage:
    esglist_files [options] dataset_id [dataset_id ...]

    List the files in a dataset.

Arguments:
    dataset_id: Dataset identifier, or '-' to read dataset ids from standard input.
                Use the syntax foo.bar#n to list version n of dataset foo.bar.

Options:

    --echo-sql:
        Echo SQL commands.

    --full:
        Print a full listing.

    -h, --help:
        Print a help message.

    -i init_dir:
        Directory containing all initialization files.
        Recommended: one initialization file for the default sections (esg.ini) and one per project, must match the name format esg.<project>.ini
        If not specified, the default installed init files are read.

Example:

    List file paths and sizes for model ukmo_hadgem1, experiment 1pctto2x:

        % esglist_datasets --model ukmo_hadgem1 --experiment 1pctto2x --no-header --select name ipcc4 | esglist_files --full -

"""

def main(argv):

    try:
        args, lastargs = getopt.getopt(argv, "hi:", ['echo-sql', 'full'])
    except getopt.error:
        print sys.exc_value
        print usage
        sys.exit(0)

    if len(lastargs)>0:
        if lastargs[0]=='-':
            datasetIds = [line.strip() for line in sys.stdin.readlines()]
        else:
            datasetIds = lastargs
    else:
        print "No dataset specified."
        print usage
        sys.exit(0)

    echoSql = False
    fullList = False
    init_dir = '/esg/config/esgcet/'
    for flag, arg in args:
        if flag=='--echo-sql':
            echoSql = True
        elif flag=='--full':
            fullList = True
        elif flag in ['-h', '--help']:
            print usage
            sys.exit(0)
        elif flag=='-i':
            init_dir = arg

    # Load the configuration and set up a database connection
    config = loadConfig(init_dir)
    engine = create_engine(config.getdburl('extract'), echo=echoSql, pool_recycle=3600)
    initLogging('DEFAULT', override_sa=engine)
    Session = sessionmaker(bind=engine, autoflush=True, autocommit=False)
    session = Session()

    # Lookup the dataset versions
    filetuples = []
    for datasetVersionId in datasetIds:
        datasetId, version = parseDatasetVersionId(datasetVersionId)

        # First find the dataset
        dset = session.query(Dataset).filter_by(name=datasetId).first()
        if dset is None:
            warning("Dataset not found: %s"%datasetVersionId)
            continue
        elif version==-1:
            version = dset.getVersion()
            
        rowTuple = session.query(Dataset, DatasetVersion).filter(Dataset.id==DatasetVersion.dataset_id).filter(Dataset.name==datasetId).filter(DatasetVersion.version==version).first()
        if rowTuple is None:
            warning("Dataset %s, version %d not found"%(datasetId, version))
            continue
        dsetVersionObj = rowTuple[1]
        if dsetVersionObj is None:
            warning("Dataset not found: %s"%datasetVersionId)
            continue

        if fullList:
            filetuples.extend([(file.getLocation(), str(file.getVersion()), str(file.getSize()), str(file.getPublicationTime()), file.getModificationFtime(), file.getTrackingID(), file.getChecksum(), file.getChecksumType()) for file in dsetVersionObj.files])
        else:
            for file in dsetVersionObj.files:
                print file.getLocation()

    if fullList:
        filetuples.sort()
        printResult(['path', 'version', 'size', 'publication_time', 'modification_time', 'tracking_id', 'checksum', 'checksum_type'], filetuples)

    session.close()

if __name__=='__main__':
    main(sys.argv[1:])
    
