#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -ex

if ! charms.reactive is_state 'spark.started'; then
    action-set outcome="failure"
    action-fail 'Spark not yet ready'
    exit 1
fi

# Do not call this script directly. Call it via one of the symlinks. The
# symlink name determines the benchmark to run.
BENCHMARK=`basename $0`

# Juju actions have an annoying lowercase alphanum restriction, so translate
# that into the sparkbench name.
case "${BENCHMARK}" in
  connectedcomponent)
    BENCHMARK="ConnectedComponent"
    RESULT_KEY="ConnectedComponent"
    ;;
  decisiontree)
    BENCHMARK="DecisionTree"
    RESULT_KEY="DecisionTree"
    ;;
  kmeans)
    BENCHMARK="KMeans"
    RESULT_KEY="KMeans"
    ;;
  linearregression)
    BENCHMARK="LinearRegression"
    RESULT_KEY="LinearRegression"
    ;;
  logisticregression)
    BENCHMARK="LogisticRegression"
    RESULT_KEY="LogisticRegression"
    ;;
  matrixfactorization)
    BENCHMARK="MatrixFactorization"
    RESULT_KEY="MF"
    ;;
  pagerank)
    BENCHMARK="PageRank"
    RESULT_KEY="PageRank"
    ;;
  pca)
    BENCHMARK="PCA"
    RESULT_KEY="PCA"
    ;;
  pregeloperation)
    BENCHMARK="PregelOperation"
    RESULT_KEY="PregelOperation"
    ;;
  shortestpaths)
    BENCHMARK="ShortestPaths"
    RESULT_KEY="ShortestPaths"
    ;;
  sql)
    BENCHMARK="SQL"
    RESULT_KEY="sql"
    ;;
  stronglyconnectedcomponent)
    BENCHMARK="StronglyConnectedComponent"
    RESULT_KEY="StronglyConnectedComponent"
    ;;
  svdplusplus)
    BENCHMARK="SVDPlusPlus"
    RESULT_KEY="SVDPlusPlus"
    ;;
  svm)
    BENCHMARK="SVM"
    RESULT_KEY="SVM"
    ;;
esac

SB_HOME="/home/ubuntu/SparkBench"
SB_APPS="${SB_HOME}/bin/applications.lst"
if [ -f "${SB_APPS}" ]; then
  VALID_TEST=`grep -c ^${BENCHMARK} ${SB_HOME}/bin/applications.lst`

  if [ ${VALID_TEST} -gt 0 ]; then
    # create dir to store results
    RUN=`date +%s`
    RESULT_DIR=/opt/sparkbench-results/${BENCHMARK}
    RESULT_LOG=${RESULT_DIR}/${RUN}.log
    mkdir -p ${RESULT_DIR}
    chown -R ubuntu:ubuntu ${RESULT_DIR}

    # user running the benchmark (spark for local modes; ubuntu for yarn-*)
    SB_USER="spark"

    # make sure our report file is writable by user + group members
    SB_REPORT="${SB_HOME}/num/bench-report.dat"
    if [ -f "${SB_REPORT}" ]; then
      chmod 664 "${SB_REPORT}"
    fi

    # Benchmark input data is packed into our sparkbench.tgz, which makes
    # it available on all spark units. In yarn mode, however, the nodemanagers
    # act as the spark workers and will not have access to this local data.
    # In yarn mode, generate our own input data (stored in hdfs) so
    # nodemanagers can access it.
    MODE=`config-get spark_execution_mode`
    if [[ $MODE == "yarn"* ]]; then
      SB_USER="ubuntu"
      echo 'generating data'
      sudo -u ${SB_USER} ${SB_HOME}/${BENCHMARK}/bin/gen_data.sh
    fi

    # run the benchmark
    echo 'running benchmark'
    benchmark-start
    sudo -u ${SB_USER} ${SB_HOME}/${BENCHMARK}/bin/run.sh
    benchmark-finish

    # collect our data (the last line in our bench-report.dat file)
    DATA=`grep ${RESULT_KEY} ${SB_HOME}/num/bench-report.dat | tail -1`
    DURATION=`echo ${DATA} | awk -F, '{print $3}'`
    THROUGHPUT=`echo ${DATA} | awk -F, '{print $5}'`

    # send data points and composite score
    benchmark-data 'duration' "${DURATION}" 'secs' 'asc'
    benchmark-data 'throughput' "${THROUGHPUT}" 'MB/sec' 'desc'
    benchmark-composite "${DURATION}" 'secs' 'asc'

    # send raw data (benchmark-raw takes a file)
    echo ${DATA} > ${RESULT_LOG}
    benchmark-raw ${RESULT_LOG}
    action-set outcome="success"
  else
    action-set outcome="failure"
    action-fail "ERROR: Invalid benchmark (${BENCHMARK})"
    exit 1
  fi
else
  action-set outcome="failure"
  action-fail "ERROR: Could not find SparkBench application list"
  exit 1
fi