diff options
author | Paul Sokolovsky <paul.sokolovsky@linaro.org> | 2016-01-15 14:04:57 +0200 |
---|---|---|
committer | Paul Sokolovsky <paul.sokolovsky@linaro.org> | 2016-01-15 14:04:57 +0200 |
commit | 67eba98d03d6341b3961ee64005eaf0343cee99e (patch) | |
tree | abdfbe69a6e79fc6fd7427c78dd7e7a0f625b031 | |
parent | 4bcaad6be4c8fd4789c1b7a3a179892c1e473ba6 (diff) |
analyse-logs-old.sh: Copy of analyse-logs.sh, before applying more changes.
analyse-logs.sh is going to be switched to use incrementally resolved logs
as produced by resolve-logs-incremental.sh script.
Change-Id: I18d80f6994b74f13763f1a1722e3febd0235369c
-rwxr-xr-x | analyse-logs-old.sh | 632 |
1 files changed, 632 insertions, 0 deletions
diff --git a/analyse-logs-old.sh b/analyse-logs-old.sh new file mode 100755 index 0000000..c3e1fd5 --- /dev/null +++ b/analyse-logs-old.sh @@ -0,0 +1,632 @@ +#!/bin/bash +######################################################################### +# # +# This script processed linaro web logs, producing different reports # +# Reports that can be run: # +# 1) Awffull (webalizer fork) # +# 2) Webalizer # +# 3) Webdruid (webalizer fork) # +# 4) Visitors # +# # +# You can run those tools on the full log or a processed log # +# The full log shows path traveled and other info, the processed log # +# really only shows what files were downloaded, for releases and # +# snapshots.linaro.org this is important information. # +# # +# 1) full log analyses of un unaltered web log # +# 2) filtered analyses of the web log of only the .gz and .bz2 files # +# and everything else stripped out # +# # +# To speed things up we use dnshistory to do reverse DNS # +# as a time cheat we only run the lookup on www.linaro.org log so # +# you should always process that log first so your DNS data is fresh # +# # +######################################################################### + +# first save where we are +STARTING_LOCATION=`pwd` + +# house keeping +if [ -z "$WEB_NAME" ] || [ -z "$TRUE" ] ; then + echo "WEB SITE NAME or other variables NOT SET" + echo "This script is not designed to be called directly" + exit 1 +fi + +#TRUE=1 +#FALSE=0 + +#WEB_NAME="snapshots.linaro.org" +#WEB_NAME="releases.linaro.org" +#WEB_NAME="www.linaro.org" + +# Which tools are we running, if not set at all set to false +if [ -z "$AWFFULL" ] ; then + AWFFULL=$FALSE +fi +if [ -z "$WEBALIZER" ] ; then + WEBALIZER=$FALSE +fi +if [ -z "$WEBDRUID" ] ; then + WEBDRUID=$FALSE +fi +if [ -z "$VISITORS" ] ; then + VISITORS=$FALSE +fi + +# this allows an external script to set DEBUG, or if it's not set, +# then set it to false here so the script is run quietly +if [ -z "$DEBUG" ] ; then + DEBUG=$FALSE +fi + +# do we want to extract file info and run the log analyzers on only that data +# quite handy for snapshots and releases .linaro.org so we get a better +# picture of what is downloaded. +# 1 = true, 0 = false +if [ $WEB_NAME = "snapshots.linaro.org" ] || [ $WEB_NAME = "releases.linaro.org" ] || [ $WEB_NAME = "builds.96boards.org" ]; then + EXTRACT_GZ_BZ2_FILES=$TRUE + EXTRACT_TOOLCHAIN_LOG=$TRUE +else + EXTRACT_GZ_BZ2_FILES=$FALSE + EXTRACT_TOOLCHAIN_LOG=$FALSE +fi + +# this allows an external script to set DO_GEOIP_LOOKUP or DO_REV_DNS_LOOKUP +# as desired but by default they are set to true so the log files have +# as much data in them as possible. +if [ -z "$DO_GEOIP_LOOKUP" ] ; then + DO_GEOIP_LOOKUP=$TRUE +fi +if [ -z "$DO_REV_DNS_LOOKUP" ] ; then + DO_REV_DNS_LOOKUP=$TRUE +fi + +# Load config +CONFIG=${1:-config} +source $CONFIG + +#if [ $WEB_NAME = "snapshots.linaro.org" ] || [ $WEB_NAME = "releases.linaro.org" ] ; then +# RAW_LOG_NAME="$WEB_NAME-$PROCESSED_LOG_NAME" +#elif [ $WEB_NAME = "www.linaro.org" ] ; then +# RAW_LOG_NAME="$PROCESSED_LOG_NAME" +#fi + +######################################################################### +# # +# only have old logs on www.linaro.org as it's hard to get stuff done # +# on that machine # +# # +######################################################################### +if [ $WEB_NAME = "www.linaro.org" ] ; then + OLD_LOG_PATH="$INPUT_PATH/2013" + OLD_LOG_NAME="$PROCESSED_LOG_NAME" +else + OLD_LOG_PATH="$INPUT_PATH" + OLD_LOG_NAME="" +fi + +AWFFULL_FULL_PATH_NAME="awffull.full" +AWFFULL_CONF_FULL_NAME="awffull.conf.full" + +AWFFULL_FILTERED_PATH_NAME="awffull.filtered" +AWFFULL_CONF_FILTERED_NAME="awffull.conf.filtered" + +AWFFULL_TOOLCHAIN_PATH_NAME="awffull.toolchain" +AWFFULL_CONF_TOOLCHAIN_NAME="awffull.conf.toolchain" + +AWFFULL_EXTRA_CMD_LINE_OPTIONS="" + +WEBALIZER_FULL_PATH_NAME="webalizer.full" +WEBALIZER_CONF_FULL_NAME="webalizer.conf.full" + +WEBALIZER_FILTERED_PATH_NAME="webalizer.filtered" +WEBALIZER_CONF_FILTERED_NAME="webalizer.conf.filtered" + +WEBALIZER_TOOLCHAIN_PATH_NAME="webalizer.toolchain" +WEBALIZER_CONF_TOOLCHAIN_NAME="webalizer.conf.toolchain" + +WEBALIZER_EXTRA_CMD_LINE_OPTIONS="" + +WEBDRUID_FULL_PATH_NAME="webdruid.full" +WEBDRUID_CONF_FULL_NAME="webdruid.conf.full" + +WEBDRUID_FILTERED_PATH_NAME="webdruid.filtered" +WEBDRUID_CONF_FILTERED_NAME="webdruid.conf.filtered" + +WEBDRUID_TOOLCHAIN_PATH_NAME="webdruid.toolchain" +WEBDRUID_CONF_TOOLCHAIN_NAME="webdruid.conf.toolchain" + +WEBDRUID_EXTRA_CMD_LINE_OPTIONS="" + +write.conf.file () +{ +# $1 = File to write conf into into +# $2 = Output directory for analsys to be written into +# $3 = Info to be written into the web analsys to id what you are looking at + + echo "# DO NOT EDIT THIS FILE, modify analyse-snapshot.sh as it rewrites this file everytime it's run" > $1 + echo "OutputDir $2" >> $1 + echo "HTMLPost $3" >> $1 + echo "ReportTitle \"Usage Statistics for the $3 of \"" >> $1 + echo "HostName $WEB_NAME" >> $1 + echo "ReallyQuiet yes" >> $1 + echo "TopSites 100" >> $1 + echo "AllSites yes" >> $1 + echo "TopURLs 100" >> $1 + echo "AllURLs yes" >> $1 + echo "GeoIP no" >> $1 +# echo "GeoIPDatabase $GEO_IP_DB" >> $1 + echo "IgnoreURL /get-remote-static" >> $1 + echo "IgnoreURL /linaro-openid/login" >> $1 + echo "IgnoreURL /get-textile-files" >> $1 + echo "IgnoreURL /css/*" >> $1 + echo "IgnoreURL /static/*" >> $1 + echo "IgnoreURL /js/*" >> $1 + echo "IgnoreURL /license" >> $1 + + if [ "$WEB_NAME" == "cards.linaro.org" -a $1 == "$WORK_PATH/$WEBDRUID_CONF_FULL_NAME" ]; then + # Graphs take too long too generate, being killed if tried + echo "PathGraph no" >> $1 + echo "UsersFlow no" >> $1 + fi +} + +write_config_files () +{ + if [ $AWFFULL -eq $TRUE ] ; then + write.conf.file $WORK_PATH/$AWFFULL_CONF_FULL_NAME $OUTPUT_PATH/$AWFFULL_FULL_PATH_NAME $PROCESSED_LOG_NAME + if [ $DEBUG -eq $TRUE ] ; then + echo "write.awffull.conf.full" + fi + if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then + write.conf.file $WORK_PATH/$AWFFULL_CONF_FILTERED_NAME $OUTPUT_PATH/$AWFFULL_FILTERED_PATH_NAME $FILTERED_LOG_NAME + if [ $DEBUG -eq $TRUE ] ; then + echo "write.awffull.conf.filtered" + fi + if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then + write.conf.file $WORK_PATH/$AWFFULL_CONF_TOOLCHAIN_NAME $OUTPUT_PATH/$AWFFULL_TOOLCHAIN_PATH_NAME $TOOLCHAIN_LOG_NAME + if [ $DEBUG -eq $TRUE ] ; then + echo "write.awffull.conf.toolchain" + fi + fi + fi + fi + + if [ $WEBALIZER -eq $TRUE ] ; then + write.conf.file $WORK_PATH/$WEBALIZER_CONF_FULL_NAME $OUTPUT_PATH/$WEBALIZER_FULL_PATH_NAME $PROCESSED_LOG_NAME + if [ $DEBUG -eq $TRUE ] ; then + echo "write.webalizer.conf.full" + fi + if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then + write.conf.file $WORK_PATH/$WEBALIZER_CONF_FILTERED_NAME $OUTPUT_PATH/$WEBALIZER_FILTERED_PATH_NAME $FILTERED_LOG_NAME + if [ $DEBUG -eq $TRUE ] ; then + echo "write.webalizer.conf.filtered" + fi + if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then + write.conf.file $WORK_PATH/$WEBALIZER_CONF_TOOLCHAIN_NAME $OUTPUT_PATH/$WEBALIZER_TOOLCHAIN_PATH_NAME $TOOLCHAIN_LOG_NAME + if [ $DEBUG -eq $TRUE ] ; then + echo "write.webalizer.conf.toolchain" + fi + fi + fi + fi + + if [ $WEBDRUID -eq $TRUE ] ; then + write.conf.file $WORK_PATH/$WEBDRUID_CONF_FULL_NAME $OUTPUT_PATH/$WEBDRUID_FULL_PATH_NAME $PROCESSED_LOG_NAME + if [ $DEBUG -eq $TRUE ] ; then + echo "write.webdruid.conf.full" + fi + if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then + write.conf.file $WORK_PATH/$WEBDRUID_CONF_FILTERED_NAME $OUTPUT_PATH/$WEBDRUID_FILTERED_PATH_NAME $FILTERED_LOG_NAME + if [ $DEBUG -eq $TRUE ] ; then + echo "write.webdruid.conf.filtered" + fi + if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then + write.conf.file $WORK_PATH/$WEBDRUID_CONF_TOOLCHAIN_NAME $OUTPUT_PATH/$WEBDRUID_TOOLCHAIN_PATH_NAME $TOOLCHAIN_LOG_NAME + if [ $DEBUG -eq $TRUE ] ; then + echo "write.webdruid.conf.toolchain" + fi + fi + fi + fi +} + +prep_directories () +{ + if [ ! -d "$WORK_PATH" ]; then + mkdir -p "$WORK_PATH" + if [ $DEBUG -eq $TRUE ] ; then + echo "mkdir -p $WORK_PATH" + fi + fi + if [ $AWFFULL -eq $TRUE ] ; then + if [ ! -d "$OUTPUT_PATH/$AWFFULL_FULL_PATH_NAME" ]; then + mkdir -p "$OUTPUT_PATH/$AWFFULL_FULL_PATH_NAME" + if [ $DEBUG -eq $TRUE ] ; then + echo "mkdir -p $OUTPUT_PATH/$AWFFULL_FULL_PATH_NAME" + fi + fi + if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then + if [ ! -d "$OUTPUT_PATH/$AWFFULL_FILTERED_PATH_NAME" ]; then + mkdir -p "$OUTPUT_PATH/$AWFFULL_FILTERED_PATH_NAME" + if [ $DEBUG -eq $TRUE ] ; then + echo "mkdir -p $OUTPUT_PATH/$AWFFULL_FILTERED_PATH_NAME" + fi + fi + if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then + if [ ! -d "$OUTPUT_PATH/$AWFFULL_TOOLCHAIN_PATH_NAME" ]; then + mkdir -p "$OUTPUT_PATH/$AWFFULL_TOOLCHAIN_PATH_NAME" + if [ $DEBUG -eq $TRUE ] ; then + echo "mkdir -p $OUTPUT_PATH/$AWFFULL_TOOLCHAIN_PATH_NAME" + fi + fi + fi + fi + fi + if [ $WEBALIZER -eq $TRUE ] ; then + if [ ! -d "$OUTPUT_PATH/$WEBALIZER_FULL_PATH_NAME" ]; then + mkdir -p "$OUTPUT_PATH/$WEBALIZER_FULL_PATH_NAME" + if [ $DEBUG -eq $TRUE ] ; then + echo "mkdir -p $OUTPUT_PATH/$WEBALIZER_FULL_PATH_NAME" + fi + fi + if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then + if [ ! -d "$OUTPUT_PATH/$WEBALIZER_FILTERED_PATH_NAME" ]; then + mkdir -p "$OUTPUT_PATH/$WEBALIZER_FILTERED_PATH_NAME" + if [ $DEBUG -eq $TRUE ] ; then + echo "mkdir -p $OUTPUT_PATH/$WEBALIZER_FILTER_PATH_NAME" + fi + fi + if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then + if [ ! -d "$OUTPUT_PATH/$WEBALIZER_TOOLCHAIN_PATH_NAME" ]; then + mkdir -p "$OUTPUT_PATH/$WEBALIZER_TOOLCHAIN_PATH_NAME" + if [ $DEBUG -eq $TRUE ] ; then + echo "mkdir -p $OUTPUT_PATH/$WEBALIZER_TOOLCHAIN_PATH_NAME" + fi + fi + fi + fi + fi + if [ $WEBDRUID -eq $TRUE ] ; then + if [ ! -d "$OUTPUT_PATH/$WEBDRUID_FULL_PATH_NAME" ]; then + mkdir -p "$OUTPUT_PATH/$WEBDRUID_FULL_PATH_NAME" + if [ $DEBUG -eq $TRUE ] ; then + echo "mkdir -p $OUTPUT_PATH/$WEBDRUID_FULL_PATH_NAME" + fi + fi + if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then + if [ ! -d "$OUTPUT_PATH/$WEBDRUID_FILTERED_PATH_NAME" ]; then + mkdir -p "$OUTPUT_PATH/$WEBDRUID_FILTERED_PATH_NAME" + if [ $DEBUG -eq $TRUE ] ; then + echo "mkdir -p $OUTPUT_PATH/$WEBDRUID_FILTERED_PATH_NAME" + fi + fi + if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then + if [ ! -d "$OUTPUT_PATH/$WEBDRUID_TOOLCHAIN_PATH_NAME" ]; then + mkdir -p "$OUTPUT_PATH/$WEBDRUID_TOOLCHAIN_PATH_NAME" + if [ $DEBUG -eq $TRUE ] ; then + echo "mkdir -p $OUTPUT_PATH/$WEBDRUID_TOOLCHAIN_PATH_NAME" + fi + fi + fi + fi + fi + + cd "$WORK_PATH" +} + +cleanup () +{ + # now delete the temp log file. + rm -f $WORK_PATH/$TMP_LOG_NAME + if [ $DEBUG -eq $TRUE ] ; then + echo "rm -f $WORK_PATH/$TMP_LOG_NAME" + fi + + rm -f $WORK_PATH/$PROCESSED_LOG_NAME + if [ $DEBUG -eq $TRUE ] ; then + echo "rm -f $WORK_PATH/$PROCESSED_LOG_NAME" + fi + + if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then + rm -f $WORK_PATH/$FILTERED_LOG_NAME + if [ $DEBUG -eq $TRUE ] ; then + echo "rm -f $WORK_PATH/$FILTERED_LOG_NAME" + fi + if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then + rm -f $WORK_PATH/$TOOLCHAIN_LOG_NAME + if [ $DEBUG -eq $TRUE ] ; then + echo "rm -f $WORK_PATH/$TOOLCHAIN_LOG_NAME" + fi + fi + fi + + if [ $AWFFULL -eq $TRUE ] ; then + rm -f $WORK_PATH/$AWFFULL_CONF_FULL_NAME + if [ $DEBUG -eq $TRUE ] ; then + echo "rm -f $WORK_PATH/$AWFFULL_CONF_FULL_NAME" + fi + + if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then + rm -f $WORK_PATH/$AWFFULL_CONF_FILTERED_NAME + if [ $DEBUG -eq $TRUE ] ; then + echo "rm -f $WORK_PATH/$AWFFULL_CONF_FILTERED_NAME" + fi + if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then + rm -f $WORK_PATH/$AWFFULL_CONF_TOOLCHAIN_NAME + if [ $DEBUG -eq $TRUE ] ; then + echo "rm -f $WORK_PATH/$AWFFULL_CONF_TOOLCHAIN_NAME" + fi + fi + fi + fi + + if [ $WEBALIZER -eq $TRUE ] ; then + rm -f $WORK_PATH/$WEBALIZER_CONF_FULL_NAME + if [ $DEBUG -eq $TRUE ] ; then + echo "rm -f $WORK_PATH/$WEBALIZER_CONF_FULL_NAME" + fi + + if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then + if [ -f "$WORK_PATH/$WEBALIZER_CONF_FILTERED_NAME" ] ; then + rm -f $WORK_PATH/$WEBALIZER_CONF_FILTERED_NAME + if [ $DEBUG -eq $TRUE ] ; then + echo "rm -f $WORK_PATH/$WEBALIZER_CONF_FILTERED_NAME" + fi + if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then + rm -f $WORK_PATH/$WEBALIZER_CONF_TOOLCHAIN_NAME + if [ $DEBUG -eq $TRUE ] ; then + echo "rm -f $WORK_PATH/$WEBALIZER_CONF_TOOLCHAIN_NAME" + fi + fi + fi + fi + fi + + if [ $WEBDRUID -eq $TRUE ] ; then + rm -f $WORK_PATH/$WEBDRUID_CONF_FULL_NAME + if [ $DEBUG -eq $TRUE ] ; then + echo "rm -f $WORK_PATH/$WEBDRUID_CONF_FULL_NAME" + fi + + if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then + rm -f $WORK_PATH/$WEBDRUID_CONF_FILTERED_NAME + if [ $DEBUG -eq $TRUE ] ; then + echo "rm -f $WORK_PATH/$WEBDRUID_CONF_FILTERED_NAME" + fi + if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then + rm -f $WORK_PATH/$WEBDRUID_CONF_TOOLCHAIN_NAME + if [ $DEBUG -eq $TRUE ] ; then + echo "rm -f $WORK_PATH/$WEBDRUID_CONF_TOOLCHAIN_NAME" + fi + fi + fi + fi +} + +extract_logs () +{ + # Build a single log file that is not gzipped. + + # Now in 2014 we can just preprocess all 2012 and 2013 files and save processing time for all 3 web servers + # then just grab all of the 2014 files to process + # *access.log-2014* + # preprocessed-*-2013-access.log.gz + x=`ls $INPUT_PATH/$RAW_LOG_NAME | wc -l` + if [ x > 0 ] ; then + if [ $DEBUG -eq $TRUE ] ; then + echo "$WEB_NAME making access.log by zcat $INPUT_PATH/$RAW_LOG_NAME" + fi + zcat $INPUT_PATH/$RAW_LOG_NAME | grep -v "::1" > $WORK_PATH/$TMP_LOG_NAME + fi + # Previous years logs preprocessed into a single compressed file to save processing time. + if [ $DEBUG -eq $TRUE ] ; then + zcat $INPUT_PATH/preprocessed*access.log.gz > $WORK_PATH/$PROCESSED_LOG_NAME || true + else + zcat $INPUT_PATH/preprocessed*access.log.gz > $WORK_PATH/$PROCESSED_LOG_NAME 2>/dev/null || true + fi + + if [ $DO_REV_DNS_LOOKUP -eq $TRUE ] || [ $DO_GEOIP_LOOKUP -eq $TRUE ] ; then + # If it's www.linaro.org build the DNS database + # This is a tad risky as we could have differnt folks coming directly + # into releases or snapshots then the main site, that said the risk is + # low and the speedup huge so it's worth it. + if [ $WEB_NAME = "www.linaro.org" ] ; then + if [ $DEBUG -eq $TRUE ] ; then + echo "About to do dnshistory lookup" + fi + if [ $DEBUG -eq $TRUE ] ; then + /usr/bin/dnshistory -L $DNSHISTORY_OPTS -d $DNSHISTORY_DB -f $WORK_PATH/$TMP_LOG_NAME + else + /usr/bin/dnshistory -L $DNSHISTORY_OPTS -d $DNSHISTORY_DB -f $WORK_PATH/$TMP_LOG_NAME > /dev/null + fi + fi + + # Now translate ip addresses to DNS names for all log files + if [ $DO_GEOIP_LOOKUP -eq $TRUE ] ; then + # if GEOIP LOOKUP is desired do both GEOIP and reverse DNS lookup at the sametime + # the iploc.py program was modified to read both databases and do both in one pass. + if [ $DEBUG -eq $TRUE ] ; then + echo "About to do GEOIP LOOKUP and dnshistory replace" + fi + python $STARTING_LOCATION/iploc.py --config=$STARTING_LOCATION/$CONFIG \ + $WORK_PATH/$TMP_LOG_NAME >> $WORK_PATH/$PROCESSED_LOG_NAME + else + # GEOIP info not requested so do the reverse DNS only + if [ $DEBUG -eq $TRUE ] ; then + echo "About to do dnshistory replace only" + fi + /usr/bin/dnshistory -T --logtype=www -d $DNSHISTORY_DB -f $WORK_PATH/$TMP_LOG_NAME >> $WORK_PATH/$PROCESSED_LOG_NAME + fi + else + if [ $DEBUG -eq $TRUE ] ; then + echo "No GEOIP LOOKUP or Reverse DNS" + fi + cat $WORK_PATH/$TMP_LOG_NAME >> $WORK_PATH/$PROCESSED_LOG_NAME + fi + + # now make a new file with only .gz, bz2, xz,exe, and zip files downloaded + # this grep can take some time to run, it's using a regular expression to extract compressed files + if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then + cat $WORK_PATH/$PROCESSED_LOG_NAME | grep -E '\<*\.(bz2|gz|xz|exe|zip)\>' | grep -v "gcc-linaro\ " > $WORK_PATH/$TMP_LOG_NAME + if [ $DEBUG -eq $TRUE ] ; then + echo "creating filtered log" + fi + # strip out our known IP's and some standard extra junk we don't need or care about + cat $WORK_PATH/$TMP_LOG_NAME \ + | grep -v .asc \ + | grep -v HEAD \ + | grep -v OPTIONS \ + | grep -v .png \ + | grep -v .ico \ + | grep -v .css \ + | grep -v .js \ + | grep -v validation.linaro.org \ + > $WORK_PATH/$FILTERED_LOG_NAME + if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then + if [ $DEBUG -eq $TRUE ] ; then + echo "creating toochain log" + fi + cat $WORK_PATH/$FILTERED_LOG_NAME | grep -E '\<*gcc-linaro' > $WORK_PATH/$TOOLCHAIN_LOG_NAME + fi + fi +} + +process_logs () +{ + ## Awffull + # use all the data in the file + if [ $DEBUG -eq $TRUE ] ; then + echo "`date`" + fi + if [ $AWFFULL -eq $TRUE ] ; then + awffull $AWFFULL_EXTRA_CMD_LINE_OPTIONS -c $WORK_PATH/$AWFFULL_CONF_FULL_NAME $WORK_PATH/$PROCESSED_LOG_NAME > /dev/null + if [ $DEBUG -eq $TRUE ] ; then + echo "awffull processing $WORK_PATH/$PROCESSED_LOG_NAME" + fi + if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then + # Now use the filtered log that has only the .bz2 or .gz files and look at those patterns + awffull $AWFFULL_EXTRA_CMD_LINE_OPTIONS -c $WORK_PATH/$AWFFULL_CONF_FILTERED_NAME $WORK_PATH/$FILTERED_LOG_NAME > /dev/null + if [ $DEBUG -eq $TRUE ] ; then + echo "awffull processing $WORK_PATH/$FILTERED_LOG_NAME" + fi + if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then + awffull $AWFFULL_EXTRA_CMD_LINE_OPTIONS -c $WORK_PATH/$AWFFULL_CONF_TOOLCHAIN_NAME $WORK_PATH/$TOOLCHAIN_LOG_NAME > /dev/null + if [ $DEBUG -eq $TRUE ] ; then + echo "awffull processing $WORK_PATH/$TOOLCHAIN_LOG_NAME" + fi + fi + fi + fi + + ## Webalizer + # use all the data in the file + if [ $DEBUG -eq $TRUE ] ; then + echo "`date`" + fi + if [ $WEBALIZER -eq $TRUE ] ; then + webalizer $WEBALIZER_EXTRA_CMD_LINE_OPTIONS -c $WORK_PATH/$WEBALIZER_CONF_FULL_NAME $WORK_PATH/$PROCESSED_LOG_NAME > /dev/null + if [ $DEBUG -eq $TRUE ] ; then + echo "webalizer processing $WORK_PATH/$PROCESSED_LOG_NAME" + fi + if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then + # Now use the filtered log that has only the .bz2 or .gz files and look at those patterns + webalizer $WEBALIZER_EXTRA_CMD_LINE_OPTIONS -c $WORK_PATH/$WEBALIZER_CONF_FILTERED_NAME $WORK_PATH/$FILTERED_LOG_NAME > /dev/null + if [ $DEBUG -eq $TRUE ] ; then + echo "webalizer processing $WORK_PATH/$FILTERED_LOG_NAME" + fi + if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then + webalizer $WEBALIZER_EXTRA_CMD_LINE_OPTIONS -c $WORK_PATH/$WEBALIZER_CONF_TOOLCHAIN_NAME $WORK_PATH/$TOOLCHAIN_LOG_NAME > /dev/null + if [ $DEBUG -eq $TRUE ] ; then + echo "webalizer processing $WORK_PATH/$TOOLCHAIN_LOG_NAME" + fi + fi + fi + fi + + ## Visitors + # use all the data in the file + if [ $DEBUG -eq $TRUE ] ; then + echo "`date`" + fi + if [ $VISITORS -eq $TRUE ] ; then + if [ $DEBUG -eq $TRUE ] ; then + echo "visitors processing $WORK_PATH/$PROCESSED_LOG_NAME" + echo visitors --ignore-404 -A --prefix $WEB_NAME -o html --trails --output-file $OUTPUT_PATH/visitors-all.html $WORK_PATH/$PROCESSED_LOG_NAME + fi + visitors --ignore-404 -A --prefix $WEB_NAME -o html --trails --output-file $OUTPUT_PATH/visitors-all.html $WORK_PATH/$PROCESSED_LOG_NAME &> /dev/null + if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then + # Now use the filtered log that has only the .bz2 or .gz files and look at those patterns + visitors --ignore-404 -A --prefix $WEB_NAME -o html --trails --output-file $OUTPUT_PATH/visitors-filtered.html $WORK_PATH/$FILTERED_LOG_NAME &> /dev/null + cp $WORK_PATH/$FILTERED_LOG_NAME $OUTPUT_PATH/. + gzip -f -9 $OUTPUT_PATH/$FILTERED_LOG_NAME + if [ $DEBUG -eq $TRUE ] ; then + echo "visitors processing $WORK_PATH/$FILTERED_LOG_NAME and then creating $OUTPUT_PATH/$FILTERED_LOG_NAME.gz" + fi + if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then + visitors --ignore-404 -A --prefix $WEB_NAME -o html --trails --output-file $OUTPUT_PATH/visitors-toolchain.html $WORK_PATH/$TOOLCHAIN_LOG_NAME &> /dev/null + cp $WORK_PATH/$TOOLCHAIN_LOG_NAME $OUTPUT_PATH/. + gzip -f -9 $OUTPUT_PATH/$TOOLCHAIN_LOG_NAME + if [ $DEBUG -eq $TRUE ] ; then + echo "visitors processing $WORK_PATH/$TOOLCHAIN_LOG_NAME and then creating $OUTPUT_PATH/$TOOLCHAIN_LOG_NAME.gz" + fi + fi + fi + fi + + ## Webdruid + # use all the data in the file + if [ $DEBUG -eq $TRUE ] ; then + echo "`date`" + fi + if [ $WEBDRUID -eq $TRUE ] ; then + webdruid $WEBDRUID_EXTRA_CMD_LINE_OPTIONS -c $WORK_PATH/$WEBDRUID_CONF_FULL_NAME $WORK_PATH/$PROCESSED_LOG_NAME > /dev/null + if [ $DEBUG -eq $TRUE ] ; then + echo "webdruid processing $WORK_PATH/$PROCESSED_LOG_NAME" + fi + if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then + # Now use the filtered log that has only the .bz2 or .gz files and look at those patterns + webdruid $WEBDRUID_EXTRA_CMD_LINE_OPTIONS -c $WORK_PATH/$WEBDRUID_CONF_FILTERED_NAME $WORK_PATH/$FILTERED_LOG_NAME > /dev/null + if [ $DEBUG -eq $TRUE ] ; then + echo "webdruid processing $WORK_PATH/$FILTERED_LOG_NAME" + fi + if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then + webdruid $WEBDRUID_EXTRA_CMD_LINE_OPTIONS -c $WORK_PATH/$WEBDRUID_CONF_TOOLCHAIN_NAME $WORK_PATH/$TOOLCHAIN_LOG_NAME > /dev/null + if [ $DEBUG -eq $TRUE ] ; then + echo "webdruid processing $WORK_PATH/$TOOLCHAIN_LOG_NAME" + fi + fi + fi + fi + +} + +# OK, the actual section to do work, prep, write out config files as needed, +# extract log files, process the log files, cleanup and exit. + +# verify and make if necessary all directories needed by this script +prep_directories +cleanup + +# write out the config files, rememnber they are changed here in this file +write_config_files + +#/usr/bin/touch ~/bin/starttime +# combine all logs into a single file, then filter the log into a second file +extract_logs + +# analyse the logs... +process_logs + +# cleanup the extra files and stuff +if [ $DEBUG -ne $TRUE ] ; then + cleanup +else + echo "WARNING: Not cleaning up temporary files, beware of running out of disk space." +fi + +# change back to where we were called from +cd $STARTING_LOCATION + +# done, out of here |