diff options
author | Paul Sokolovsky <paul.sokolovsky@linaro.org> | 2014-02-17 18:58:39 +0000 |
---|---|---|
committer | Paul Sokolovsky <paul.sokolovsky@linaro.org> | 2014-02-17 18:58:39 +0000 |
commit | 82d30707ae69285981970af87193ea6cdb954ba1 (patch) | |
tree | 8f0307b646e969457ef9867e35cb6a49115a1f3d /analyse-logs.sh | |
parent | 017dc19af71052394989085e11273be3bd8608e2 (diff) |
Capture latest changes as of 2014-02-17.
Diffstat (limited to 'analyse-logs.sh')
-rwxr-xr-x | analyse-logs.sh | 112 |
1 files changed, 73 insertions, 39 deletions
diff --git a/analyse-logs.sh b/analyse-logs.sh index 6b98489..338f78c 100755 --- a/analyse-logs.sh +++ b/analyse-logs.sh @@ -40,16 +40,24 @@ fi #WEB_NAME="releases.linaro.org" #WEB_NAME="www.linaro.org" -# Which tools are we running -#AWFFULL=$TRUE -#WEBALIZER=$TRUE -#WEBDRUID=$TRUE -#VISITORS=$TRUE +# Which tools are we running, if not set at all set to false +if [ -z "$AWFFULL" ] ; then + AWFFULL=$FALSE +fi +if [ -z "$WEBALIZER" ] ; then + WEBALIZER=$FALSE +fi +if [ -z "$WEBDRUID" ] ; then + WEBDRUID=$FALSE +fi +if [ -z "$VISITORS" ] ; then + VISITORS=$FALSE +fi # this allows an external script to set DEBUG, or if it's not set, # then set it to false here so the script is run quietly if [ -z "$DEBUG" ] ; then - DEBUG=$TRUE + DEBUG=$FALSE fi # do we want to extract file info and run the log analyzers on only that data @@ -88,11 +96,12 @@ TMP_LOG_NAME="tmp.$PROCESSED_LOG_NAME" FILTERED_LOG_NAME="filtered.$PROCESSED_LOG_NAME" TOOLCHAIN_LOG_NAME="toolchain.$PROCESSED_LOG_NAME" -if [ $WEB_NAME = "snapshots.linaro.org" ] || [ $WEB_NAME = "releases.linaro.org" ] ; then - RAW_LOG_NAME="$WEB_NAME-$PROCESSED_LOG_NAME" -elif [ $WEB_NAME = "www.linaro.org" ] ; then - RAW_LOG_NAME="$PROCESSED_LOG_NAME" -fi +RAW_LOG_NAME="*access.log-2014*" +#if [ $WEB_NAME = "snapshots.linaro.org" ] || [ $WEB_NAME = "releases.linaro.org" ] ; then +# RAW_LOG_NAME="$WEB_NAME-$PROCESSED_LOG_NAME" +#elif [ $WEB_NAME = "www.linaro.org" ] ; then +# RAW_LOG_NAME="$PROCESSED_LOG_NAME" +#fi ######################################################################### # # @@ -420,34 +429,48 @@ cleanup () extract_logs () { + # Now in 2014 we can just preprocess all 2012 and 2013 files and save procssing time for all 3 web servers + # then just grab all of the 2014 files to process + # *access.log-2014* + # preprocessed-*-2013-access.log.gz + x=`ls $CLOG/$RAW_LOG_NAME | wc -l` + if [ x > 0 ] ; then + zcat $CLOG/$RAW_LOG_NAME > $PPATH/$TMP_LOG_NAME + if [ $DEBUG -eq $TRUE ] ; then + echo "$WEB_NAME making access.log by zcat $CLOG/$RAW_LOG_NAME" + fi + fi + # 2013 logs preprocessed into a single compressed file to save processing time. + zcat $CLOG/preprocessed*access.log.gz > $PPATH/$PROCESSED_LOG_NAME + # make sure there is not an existing access.log file # Build a single log file that is not gzipped. - cat /dev/null > $PPATH/$TMP_LOG_NAME - if [ $WEB_NAME = "www.linaro.org" ] ; then +# cat /dev/null > $PPATH/$TMP_LOG_NAME +# if [ $WEB_NAME = "www.linaro.org" ] ; then # This is becasue the logs were stored differently for part of the year # on www.linaro.org # zcat the daily files that are stored by date access.log-yyyymmdd on to # the tail of the new access.log file. - x=`ls $CLOG/$PROCESSED_LOG_NAME-*.gz | wc -l` - if [ x > 0 ] ; then - zcat $CLOG/$PROCESSED_LOG_NAME-*.gz >> $PPATH/$TMP_LOG_NAME - if [ $DEBUG -eq $TRUE ] ; then - echo "$WEB_NAME making access.log by zcat $CLOG/$PROCESSED_LOG_NAME-*.gz" - fi - fi +# x=`ls $CLOG/$PROCESSED_LOG_NAME-*.gz | wc -l` +# if [ x > 0 ] ; then +# zcat $CLOG/$PROCESSED_LOG_NAME-*.gz >> $PPATH/$TMP_LOG_NAME +# if [ $DEBUG -eq $TRUE ] ; then +# echo "$WEB_NAME making access.log by zcat $CLOG/$PROCESSED_LOG_NAME-*.gz" +# fi +# fi # zcat the preprocessed log into the file log, 9 Gig's worth - zcat $CLOG/preprocessed-$PROCESSED_LOG_NAME-*.gz > $PPATH/$PROCESSED_LOG_NAME +# zcat $CLOG/preprocessed-$PROCESSED_LOG_NAME-*.gz > $PPATH/$PROCESSED_LOG_NAME # releases and snapshots are stored with a "slightly" different naming convention # releases.linaro.org-access.log-yyyymmdd.gz & snapshots.linaro.org-access.log-yyyymmdd.gz - elif [ $WEB_NAME = "releases.linaro.org" ] || [ $WEB_NAME = "snapshots.linaro.org" ] ; then - x=`ls $CLOG/$RAW_LOG_NAME-*.gz | wc -l` - if [ x > 0 ] ; then - zcat $CLOG/$RAW_LOG_NAME-*.gz > $PPATH/$TMP_LOG_NAME - if [ $DEBUG -eq $TRUE ] ; then - echo "$WEB_NAME making access.log by zcat $CLOG/$RAW_LOG_NAME-*.gz" - fi - fi - fi +# elif [ $WEB_NAME = "releases.linaro.org" ] || [ $WEB_NAME = "snapshots.linaro.org" ] ; then +# x=`ls $CLOG/$RAW_LOG_NAME-*.gz | wc -l` +# if [ x > 0 ] ; then +# zcat $CLOG/$RAW_LOG_NAME-*.gz > $PPATH/$TMP_LOG_NAME +# if [ $DEBUG -eq $TRUE ] ; then +# echo "$WEB_NAME making access.log by zcat $CLOG/$RAW_LOG_NAME-*.gz" +# fi +# fi +# fi if [ $DO_REV_DNS_LOOKUP -eq $TRUE ] || [ $DO_GEOIP_LOOKUP -eq $TRUE ] ; then # If it's www.linaro.org build the DNS database @@ -463,7 +486,8 @@ extract_logs () # Now translate ip addresses to DNS names for all log files if [ $DO_GEOIP_LOOKUP -eq $TRUE ] ; then - # if GEOIP LOOKUP requested do that first and then do reverse DNS + # if GEOIP LOOKUP is desired do both GEOIP and reverse DNS lookup at the sametime + # the iploc.py program was modified to read both databases and do both in one pass. if [ $DEBUG -eq $TRUE ] ; then echo "About to do GEOIP LOOKUP and dnshistory replace" fi @@ -482,15 +506,24 @@ extract_logs () cat $PPATH/$TMP_LOG_NAME >> $PPATH/$PROCESSED_LOG_NAME fi - # now make a new file with only .gz and bz2 files downloaded + # now make a new file with only .gz, bz2, xz,exe, and zip files downloaded # this grep can take some time to run, it's using a regular expression to extract compressed files if [ $EXTRACT_GZ_BZ2_FILES -eq $TRUE ] ; then - cat $PPATH/$PROCESSED_LOG_NAME | grep -E '\<*\.(bz2|gz|xz|exe|zip)\>' | grep -v -E '\<*\.asc\>' | grep -v HEAD | grep -v "gcc-linaro\ " > $PPATH/$TMP_LOG_NAME + cat $PPATH/$PROCESSED_LOG_NAME | grep -E '\<*\.(bz2|gz|xz|exe|zip)\>' | grep -v "gcc-linaro\ " > $PPATH/$TMP_LOG_NAME if [ $DEBUG -eq $TRUE ] ; then echo "creating filtered log" fi - # strip out our known IP's - cat $PPATH/$TMP_LOG_NAME | grep -v validation.linaro.org > $PPATH/$FILTERED_LOG_NAME + # strip out our known IP's and some standard extra junk we don't need or care about + cat $PPATH/$TMP_LOG_NAME \ + | grep -v .asc \ + | grep -v HEAD \ + | grep -v OPTIONS \ + | grep -v .png \ + | grep -v .ico \ + | grep -v .css \ + | grep -v .js \ + | grep -v validation.linaro.org \ + > $PPATH/$FILTERED_LOG_NAME if [ $EXTRACT_TOOLCHAIN_LOG -eq $TRUE ] ; then if [ $DEBUG -eq $TRUE ] ; then echo "creating toochain log" @@ -626,9 +659,10 @@ extract_logs process_logs # cleanup the extra files and stuff -cleanup - -# change back to where we were called from -cd $STARTING_LOCATION +if [ $DEBUG -eq $FALSE ] ; then + cleanup + # change back to where we were called from + cd $STARTING_LOCATION +fi # done, out of here |