#!/bin/bash # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. set -ex if ! charms.reactive is_state 'spark.started'; then action-set outcome="failure" action-fail 'Spark not yet ready' exit 1 fi # Do not call this script directly. Call it via one of the symlinks. The # symlink name determines the benchmark to run. BENCHMARK=`basename $0` # Juju actions have an annoying lowercase alphanum restriction, so translate # that into the sparkbench name. case "${BENCHMARK}" in connectedcomponent) BENCHMARK="ConnectedComponent" RESULT_KEY="ConnectedComponent" ;; decisiontree) BENCHMARK="DecisionTree" RESULT_KEY="DecisionTree" ;; kmeans) BENCHMARK="KMeans" RESULT_KEY="KMeans" ;; linearregression) BENCHMARK="LinearRegression" RESULT_KEY="LinearRegression" ;; logisticregression) BENCHMARK="LogisticRegression" RESULT_KEY="LogisticRegression" ;; matrixfactorization) BENCHMARK="MatrixFactorization" RESULT_KEY="MF" ;; pagerank) BENCHMARK="PageRank" RESULT_KEY="PageRank" ;; pca) BENCHMARK="PCA" RESULT_KEY="PCA" ;; pregeloperation) BENCHMARK="PregelOperation" RESULT_KEY="PregelOperation" ;; shortestpaths) BENCHMARK="ShortestPaths" RESULT_KEY="ShortestPaths" ;; sql) BENCHMARK="SQL" RESULT_KEY="sql" ;; stronglyconnectedcomponent) BENCHMARK="StronglyConnectedComponent" RESULT_KEY="StronglyConnectedComponent" ;; svdplusplus) BENCHMARK="SVDPlusPlus" RESULT_KEY="SVDPlusPlus" ;; svm) BENCHMARK="SVM" RESULT_KEY="SVM" ;; esac SB_HOME="/home/ubuntu/SparkBench" SB_APPS="${SB_HOME}/bin/applications.lst" if [ -f "${SB_APPS}" ]; then VALID_TEST=`grep -c ^${BENCHMARK} ${SB_HOME}/bin/applications.lst` if [ ${VALID_TEST} -gt 0 ]; then # create dir to store results RUN=`date +%s` RESULT_DIR=/opt/sparkbench-results/${BENCHMARK} RESULT_LOG=${RESULT_DIR}/${RUN}.log mkdir -p ${RESULT_DIR} chown -R ubuntu:ubuntu ${RESULT_DIR} # user running the benchmark (spark for local modes; ubuntu for yarn-*) SB_USER="spark" # make sure our report file is writable by user + group members SB_REPORT="${SB_HOME}/num/bench-report.dat" if [ -f "${SB_REPORT}" ]; then chmod 664 "${SB_REPORT}" fi # Benchmark input data is packed into our sparkbench.tgz, which makes # it available on all spark units. In yarn mode, however, the nodemanagers # act as the spark workers and will not have access to this local data. # In yarn mode, generate our own input data (stored in hdfs) so # nodemanagers can access it. MODE=`config-get spark_execution_mode` if [[ $MODE == "yarn"* ]]; then SB_USER="ubuntu" echo 'generating data' sudo -u ${SB_USER} ${SB_HOME}/${BENCHMARK}/bin/gen_data.sh fi # run the benchmark echo 'running benchmark' benchmark-start sudo -u ${SB_USER} ${SB_HOME}/${BENCHMARK}/bin/run.sh benchmark-finish # collect our data (the last line in our bench-report.dat file) DATA=`grep ${RESULT_KEY} ${SB_HOME}/num/bench-report.dat | tail -1` DURATION=`echo ${DATA} | awk -F, '{print $3}'` THROUGHPUT=`echo ${DATA} | awk -F, '{print $5}'` # send data points and composite score benchmark-data 'duration' "${DURATION}" 'secs' 'asc' benchmark-data 'throughput' "${THROUGHPUT}" 'MB/sec' 'desc' benchmark-composite "${DURATION}" 'secs' 'asc' # send raw data (benchmark-raw takes a file) echo ${DATA} > ${RESULT_LOG} benchmark-raw ${RESULT_LOG} action-set outcome="success" else action-set outcome="failure" action-fail "ERROR: Invalid benchmark (${BENCHMARK})" exit 1 fi else action-set outcome="failure" action-fail "ERROR: Could not find SparkBench application list" exit 1 fi