#!/usr/bin/env python

# This work was created by participants in the DataONE project, and is
# jointly copyrighted by participating institutions in DataONE. For
# more information on DataONE, see our web site at http://dataone.org.
#
#   Copyright 2009-2016 DataONE
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Script to do a simple text dump of an ORE document."""

import argparse
import codecs
import logging
import sys

import d1_pyore
import requests


def downloadDocument(url):
  if url == "-":
    return sys.stdin.read()
  if url.startswith("https://") or url.startswith("http://"):
    response = requests.get(url)
    return response.text
  with codecs.open(url, encoding="utf-8") as f:
    return f.read()


def packageToObject(pkg):

  def getPidForId(ids, oid):
    for entry in ids:
      if entry[0] == oid:
        return entry[1]
    return ''


  def getIdforPid(ids, pid):
    for entry in ids:
      if entry[1] == pid:
        return entry[0]
    return ''


  obj = {'pid': pkg.getResourceMapPid(),
         'aggregations': {},
         'objects':{},
         'metadata':[],
         'data':[],
         'documents':{},
         'isDocumentedBy':{},
         }

  #Load the aggregations from the package
  ags = pkg.getSubjectObjectsByPredicate(d1_pyore.ORE.isAggregatedBy)
  ids = pkg.getSubjectObjectsByPredicate(d1_pyore.DCTERMS.identifier)

  obj['id'] = getIdforPid(ids, obj['pid'])

  for ag in ags:
    if not obj['aggregations'].has_key(ag[1]):
      obj['aggregations'][ag[1]] = []
    obj['aggregations'][ag[1]].append(ag[0])
  for oid in ids:
    obj['objects'][oid[0]] = {'pid':oid[1],
                          }

  obj['metadata'] = pkg.getAggregatedScienceMetadataPids()
  obj['data'] = pkg.getAggregatedScienceDataPids()

  ids = pkg.getSubjectObjectsByPredicate(d1_pyore.CITO.isDocumentedBy)
  for oid in ids:
    if oid[0] not in obj['isDocumentedBy'].keys():
      obj['isDocumentedBy'][oid[0]] = []
    obj['isDocumentedBy'][oid[0]].append(oid[1])

  ids = pkg.getSubjectObjectsByPredicate(d1_pyore.CITO.documents)
  for oid in ids:
    if oid[0] not in obj['documents'].keys():
      obj['documents'][oid[0]] = []
    obj['documents'][oid[0]].append(oid[1])

  return obj


def packageToText(pkg, fto=sys.stdout):
  obj = packageToObject(pkg)

  fto.write( "OAI-ORE Description\n\n" )
  fto.write( "Resource Map Document PID: {0}\n".format(obj['pid']) )
  fto.write( "                       ID: {0}\n\n".format(obj['id']) )
  fto.write( "Aggregations\n\n" )
  i = 1
  for ag in obj['aggregations'].keys():
    fto.write( "{0}: {1}\n".format(i, ag) )
    fto.write( "   Contents:\n" )
    j = 1
    for oid in obj['aggregations'][ag]:
      pid = obj['objects'][oid]['pid']
      fto.write( "    {0}:  id: {1}\n".format(j, oid) )
      fto.write( "       pid: {1}\n".format(j, pid) )
      j += 1
    i += 1

  i = 1
  fto.write( "\nCITO:documents\n" )
  for doc in obj['documents'].keys():
    fto.write( "\nThe document:\n" )
    fto.write( "{0}:  id: {1}".format(i, doc) )
    pid = obj['objects'][doc]['pid']
    fto.write( "   pid: {0}\n\n".format(pid) )
    fto.write( "   describes:\n" )
    j = 1
    for oid in obj['documents'][doc]:
      pid = obj['objects'][oid]['pid']
      fto.write( "    {0}:  id: {1}\n".format(j, oid) )
      fto.write( "       pid: {0}\n".format(pid) )
      j += 1
    i += 1

  i = 1
  fto.write( "\nCITO:isDocumentedBy\n" )
  for doc in obj['isDocumentedBy'].keys():
    fto.write( "\nThe data:\n" )
    fto.write( "{0}:  id: {1}\n".format(i, doc) )
    pid = obj['objects'][doc]['pid']
    fto.write( "   pid: {0}\n\n".format(pid) )
    fto.write( "   is described by:\n" )
    j = 1
    for oid in obj['isDocumentedBy'][doc]:
      pid = obj['objects'][oid]['pid']
      fto.write( "    {0}:  id: {1}\n".format(j, oid) )
      fto.write( "       pid: {0}\n".format(pid) )
      j += 1
    i += 1


def main(url, format):
  ore_doc = downloadDocument(url)
  pkg = d1_pyore.ResourceMap()
  pkg.parse(data=ore_doc)
  if args.format == "text":
    packageToText(pkg)
    return
  print pkg.serialize(doc_format=format)


if __name__ == "__main__":
  parser = argparse.ArgumentParser(description='Show OAI-ORE object as text or serialized as json-ld, n3, xml')

  parser.add_argument('-l', '--log',
    default=logging.WARN,
    type=int,
    help="Set the logging level (debug=10, error=40)")

  parser.add_argument('url',
    help="Full URL to ORE object, path to ORE file, or '-' to read from stdin")

  parser.add_argument('-f', '--format',
                      default="text",
                      help="Format for output. Can be text or any of the standard serialization formats.")

  args = parser.parse_args()

  if args.log not in (10,20,30,40,50):
    logging.basicConfig(level=logging.INFO)
    logging.warn("Invalid value %s for log level. Using 20 (INFO).", args.log)
  else:
    logging.basicConfig( level=args.log )

  main(args.url, args.format.lower())

