  #!/bin/bash

  # The script validates dftracer traces
  # This has the following signature.
  #
  # usage: dftracer_validate [-v] [-d input_directory]
  #   -v                      enable verbose mode
  #   -h                      display help
  #   -d input_directory      specify input directories. should contain .pfw or .pfw.gz files.

  LOG_FILE="dftracer_validate.log"

  date_echo() {
    dt=$(date '+%d/%m/%Y %H:%M:%S');
    echo "$dt  $*" | tee -a "$LOG_FILE"
  }

  progress_date_echo() {
    dt=$(date '+%d/%m/%Y %H:%M:%S')
    echo -ne "$dt  $*"\\r | tee -a "$LOG_FILE"
  }

  LOG_DIR=$PWD

  function usage {
    echo "usage: $(basename "$0") [-cv] [-d input_directory]"
    echo "  -v                      enable verbose mode"
    echo "  -h                      display help"
    echo "  -d input_directory      specify input directories. should contain .pfw or .pfw.gz files."
    exit 1
  }

  while getopts ':cvfd:h' opt; do
    case "$opt" in
    d)
      LOG_DIR="${OPTARG}"
      ;;
    v)
      set -x
      ;;
    h)
      usage
      ;;

    :)
      echo -e "option requires an argument.\n" | tee -a "$LOG_FILE"
      usage
      ;;

    ?)
      echo -e "Invalid command option.\n" | tee -a "$LOG_FILE"
      usage
      ;;
    esac
  done

  LOG_DIR=$(realpath "$LOG_DIR")

  shift "$(($OPTIND -1))"
  total=0
  for file in *.pfw*; do total=1; break; done

  if [ $total == 0 ]; then
    date_echo "The folder does not contain any pfw or pfw.gz files."
    exit 1
  fi

  JOBS_LIMIT=$(nproc --all)
  if [ -d $LOG_DIR ]; then
    files=("$LOG_DIR"/*.pfw*)
    pushd $LOG_DIR > /dev/null || return
  else
    files=("$LOG_DIR")
  fi
  total=${#files[@]}
  date_echo "Found $total files to process."

  # loop over logs
  for file_index in "${!files[@]}"; do
    file=${files[$file_index]}
    filename=$(basename -- "$file")
    ext="${filename##*.}"
    if [ "$ext" != "zindex" ]; then
    running_jobs=$(jobs -rp | wc -l)
    if [ $running_jobs -ge $JOBS_LIMIT ]; then
      date_echo "Waiting for running jobs to be less than $JOBS_LIMIT"
      while [ $running_jobs -ge $JOBS_LIMIT ]
      do
        sleep 1
        running_jobs=$(jobs -rp | wc -l)
      done
      date_echo "Running jobs are now less than $JOBS_LIMIT"
    fi
    # only look at files
    if [ -f "$file" ]; then
      # calculate basename and copy files
      filename=$(basename -- "$file")
      ext="${filename##*.}"
      if [ "$ext" == "gz" ]; then
      {
        # if file is gz get the name
        name=${filename%.pfw.gz}
        if jq -e -c > /dev/null <<< "$(gunzip -c "$file" | grep -v "\[" | grep -v "\]")"; then
        date_echo "Successfully parsed JSON for $name"
        else
        date_echo "Failed to parse JSON for $name"
        fi
        progress_date_echo "Completed $file_index of $total"
      } &
      else
      {
        # if file is pfw get the name
        name=${filename%.pfw}
        if jq -e -c > /dev/null <<< "$(grep -v "\[" "$file" | grep -v "\]")"; then
        date_echo "Successfully parsed JSON for $name"
        else
        date_echo "Failed to parse JSON for $name"
        fi
        progress_date_echo "Completed $file_index of $total"
      } &
      fi
    fi
    fi
  done
  if [ -d "$LOG_DIR" ]; then
  popd > /dev/null || return
  fi
  wait
  echo ""
  date_echo "Validating traces finished"
