#!/usr/bin/env bash

## Licensed to the Apache Software Foundation (ASF) under one
## or more contributor license agreements.  See the NOTICE file
## distributed with this work for additional information
## regarding copyright ownership.  The ASF licenses this file
## to you under the Apache License, Version 2.0 (the
## "License"); you may not use this file except in compliance
## with the License.  You may obtain a copy of the License at
##
##     http://www.apache.org/licenses/LICENSE-2.0
##
## Unless required by applicable law or agreed to in writing, software
## distributed under the License is distributed on an "AS IS" BASIS,
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
## See the License for the specific language governing permissions and
## limitations under the License.

function printUsage() {
  cat << EOF
tdbloader2 - TDB Bulk Loader

Usage: tdbloader2 --loc <Directory> [Options] <Data> ...

Bulk loader for TDB which manipulates the data files directly and so
can only be used to create new databases.  This command relies on
POSIX utilities so will only work on POSIX operating systems.

If you wish to bulk load to an existing database please use tdbloader
instead.

Required options are as follows:

  -l <DatabaseDirectory>
  --loc <DatabaseDirectory>
    Sets the location in which the database should be created.

    This location must be a directory and must be empty, if a
    non-existent path is specified it will be created as a new
    directory.

  <Data>
    Specifies the path to one/more data files to load

Common additional options are as follows:

  -h
  --help
    Prints this help summary and exits

Advanced additional options are as follows:

  -d
  --debug
    Enable debug mode, adds extra debug output

  -j <JvmArgs>
  --jvm-args <JvmArgs>
    Sets the arguments that should be passed to the JVM for the
    JVM based portions of the build.

    Generally it is best to not change these unless you have been
    specifically advised to.  The scripts will use appropriate
    defaults if this is not specified.

    In particular be careful increasing the heap size since many
    parts of TDB actually use memory mapped files that live
    outside the heap so if the heap is too large the heap may
    conflict with the memory mapped files for memory space.

  -k
  --keep-work
    Keeps the temporary work files around after they are no longer
    needed.  May be useful for debugging.

  -p <Phase>
  --phase <Phase>
    Sets the phase of the build to run, supported values are:

      all      Full bulk load
      data     Data phase only
      index    Index phase only, requires the data phase to
               previously have been run

    When no phase is specified it defaults to all

  -s <SortArgs>
  --sort-args <SortArgs>
    Sets the arguments that should be passed to sort for the sort
    based portions of the build.

    Generally it is best not to change these as the scripts will
    use appropriate defaults for your system.

  -t
  --trace
    Enable trace mode, essentially sets -x within the scripts

EOF
}

function resolveLink() {
  local NAME=$1

  if [ -L "$NAME" ]; then
    case "$OSTYPE" in
      darwin*|bsd*)
        # BSD style readlink behaves differently to GNU readlink
        # Have to manually follow links
        while [ -L "$NAME" ]; do
          NAME=$(readlink -- "$NAME")
        done
        ;;
      *)
        # Assuming standard GNU readlink with -f for
        # canonicalize
        NAME=$(readlink -f -- "$NAME")
        ;;
    esac
  fi

  echo "$NAME"
}

# If JENA_HOME is empty
if [ -z "$JENA_HOME" ]; then
  SCRIPT="$0"
  # Catch common issue: script has been symlinked
  if [ -L "$SCRIPT" ]; then
    SCRIPT=$(resolveLink "$0")
    # If link is relative
    case "$SCRIPT" in
      /*)
        # Already absolute
        ;;
      *)
        # Relative, make absolute
        SCRIPT=$( dirname "$0" )/$SCRIPT
        ;;
    esac
  fi

  # Work out root from script location
  JENA_HOME="$( cd "$( dirname "$SCRIPT" )/.." && pwd )"
  export JENA_HOME
fi

# If JENA_HOME is a symbolic link need to resolve
if [ -L "${JENA_HOME}" ]; then
  JENA_HOME=$(resolveLink "$JENA_HOME")
  # If link is relative
  case "$JENA_HOME" in
    /*)
      # Already absolute
      ;;
    *)
      # Relative, make absolute
      JENA_HOME=$(dirname "$JENA_HOME")
      ;;
  esac
  export JENA_HOME
  echo "Resolved symbolic links for JENA_HOME to $JENA_HOME"
fi

if [ -e "${JENA_HOME}/bin/tdbloader2common" ]; then
  # Can source common functions
  source "${JENA_HOME}/bin/tdbloader2common"
else
  echo "Unable to locate common functions script tdbloader2common"
  exit 1
fi

# ---- Setup
JVM_ARGS=${JVM_ARGS:--Xmx1024M}
# Expand JENA_HOME but literal *
JENA_CP="$JENA_HOME"'/lib/*'
SOCKS=
LOGGING="-Dlog4j.configuration=file:$JENA_HOME/jena-log4j.properties"

# Platform specific fixup
#??On CYGWIN convert path and end with a ';' 
case "$(uname)" in
   CYGWIN*) JENA_CP="$(cygpath -wp "$JENA_CP");";;
esac

export JENA_CP


# Process arguments
LOC=
PHASE=
KEEP_WORK=0
DEBUG=0
TRACE=0
JVM_ARGS=

while [ $# -gt 0 ]
do
  ARG=$1
  case "$ARG" in
    -d|--debug)
      # Debug Mode
      shift
      DEBUG=1
      ;;
    -h|--help)
      # Help
      printUsage
      exit 0
      ;;
    -j|--jvm-args)
      # JVM Arguments
      shift
      JVM_ARGS="$1"
      shift
      ;;
    -k|--keep-work)
      # Keep work files
      shift
      KEEP_WORK=1
      ;;
    -l|--loc|-loc)
      # Location space separated
      shift
      LOC="$1"
      shift
      ;;
    -*loc=*)
      # Location = separated
      LOC=${ARG/-*loc=/}
      shift
      ;;
    -p|--phase)
      # Phase space separated
      shift
      PHASE="$1"
      shift
      ;;
    -s|--sort-args)
      # Sort arguments
      shift
      SORT_ARGS=$1
      shift
      ;;
    -t|--trace)
      # Trace mode
      shift
      TRACE=1
      set -x
      ;;
    --)
      # Arguments separator
      # All further arguments are treated as data files
      shift
      break
      ;;
    -*)
      # Looks like an option but not known
      abort 1 "Unrecognized option $ARG, if this was meant to be a data file separate options from data files with --"
      ;;
    *)
      # Once we see an unrecognized argument that doesn't look like an option treat as start of files to process
      break
      ;;
  esac
done

if [ -z "$PHASE" ]; then
  PHASE="all"
fi

# Prepare arguments to pass to children
COMMON_ARGS=
DATA_ARGS=
INDEX_ARGS=
if [ $KEEP_WORK = 1 ]; then
  COMMON_ARGS="--keep-work"
fi
if [ $DEBUG = 1 ]; then
  COMMON_ARGS="$COMMON_ARGS --debug"
fi
if [ $TRACE = 1 ]; then
  COMMON_ARGS="$COMMON_ARGS --trace"
fi
if [ -n "$JVM_ARGS" ]; then
  COMMON_ARGS="$COMMON_ARGS --jvm-args $JVM_ARGS"
fi
if [ -n "$SORT_ARGS" ]; then
  INDEX_ARGS="--sort-args $SORT_ARGS"
fi

# ---- Start
info "-- TDB Bulk Loader Start"
TIME1="$(date +%s)"

TOOL_DIR="$JENA_HOME/bin"
case "$PHASE" in
  all)
    # All Phases
    # Data Phase
    "${TOOL_DIR}/tdbloader2data" $COMMON_ARGS $DATA_ARGS --loc "$LOC" -- "$@"
    RET=$?
    if [ $RET -ne 0 ]; then
      abort $RET "Failed during data phase"
    fi

    # Index Phase
    "${TOOL_DIR}/tdbloader2index" $COMMON_ARGS $INDEX_ARGS --loc "$LOC"
    RET=$?
    if [ $RET -ne 0 ]; then
      abort $RET "Failed during data phase"
    fi
    ;;

  data)
    # Data Phase
    "${TOOL_DIR}/tdbloader2data" $COMMON_ARGS $DATA_ARGS --loc "$LOC" -- "$@"
    RET=$?
    if [ $RET -ne 0 ]; then
      abort $RET "Failed during data phase"
    fi
    ;;

  index)
    # Index Phase
    "${TOOL_DIR}/tdbloader2index" $COMMON_ARGS $INDEX_ARGS --loc "$LOC"
    RET=$?
    if [ $RET -ne 0 ]; then
      abort $RET "Failed during index phase"
    fi
    ;;
  *)
    abort 1 "Unrecognized phase $PHASE"
    ;;
esac

# ---- End
TIME2="$(date +%s)"
info "-- TDB Bulk Loader Finish"
ELAPSED=$(($TIME2-$TIME1))
info "-- $ELAPSED seconds"
