Spelling

author: Mike Holmes <mike.holmes@linaro.org> 2017-06-28 12:31:57 -0400
committer: Mike Holmes <mike.holmes@linaro.org> 2017-06-30 15:00:11 -0400
commit: 82b2c5c818fbe44845087d5c0cf58bd0c0a1fecc (patch)
tree: cdc25417ac207c5b7fd8cf379e42ae389ab18d17
parent: 5034c81f6a45075087e44f1be542f5be5415fee0 (diff)
2 files changed, 103 insertions, 61 deletions
diff --git a/Signed_off_heatmaps.R b/Signed_off_heatmaps.R
index 523ae7f..b549cce 100644
--- a/Signed_off_heatmaps.R
+++ b/Signed_off_heatmaps.R
@@ -230,7 +230,7 @@ op <- par(mar=c(15,4,4,2))
 inverse.percentage_patches <- t(percentage_patches)
 percentage_patches
 barplot(inverse.percentage_patches, las=2,  
-        main="Team conribution as % of Linaro", 
+        main="Team contribution as % of Linaro", 
         ylab = "%", 
         col =colours, 
         legend.text=TRUE, 
diff --git a/member_heatmaps.R b/member_heatmaps.R
index e7fea24..2599f05 100644
--- a/member_heatmaps.R
+++ b/member_heatmaps.R
@@ -13,11 +13,16 @@ companies <- function (names)
   companys = list()
   for (name in names) {
     for (n in name) {
-      companys[[length(companys) + 1]] <-
-        strapplyc(n, "@(.*)", simplify = TRUE)
+      company <- as.character(strapplyc(n, "@(.*)", simplify = TRUE))
+      if (company != "character(0)") {
+        companys[[length(companys) + 1]] <- company
+      } else {
+        print(paste("Source data problem ", n, " is not a valid email address/n"))
+      }
+      
     }
   }
-  company <- unique(companys)
+  company <- as.character(unique(companys))
   
   return (company)
 }
@@ -62,81 +67,115 @@ display_heatmap <- function(report, m, suffix) {
 watcher_heatmap <- function(report) {
   baseurl <- "https://projects.linaro.org/rest/api/2/"
   fields <- "&fields=watches,key,project"
-  
   type <- "search?"
-  jql <- paste ("jql=project+in+(",report["team"],")+AND+issuetype+in+(Initiative,Epic)&maxResults=1000", sep = "")
   
-  url <- paste(baseurl, type, jql, fields, sep = "")
-  cat (url,"\n")
-  webpage <- curl (url, handle = h)
+  page <- 0
+  #empty container for all data returned
+  jira_raw = data.frame()
   
-  #get data it into R
-  jira_raw <- fromJSON(webpage, simplifyDataFrame = TRUE)
-  # extract the data node
-  keys <- jira_raw$issues$key
-
-  #x axis the number of projects
-  projects <-  unique(jira_raw$issues$fields$project$key)
-  num_projects <- length(projects)
+  repeat {
+    jql <-
+      paste (
+        "jql=project+in+(",
+        report["team"],
+        ")+AND+issuetype+in+(Initiative,Epic)&maxResults=1000",
+        sep = ""
+      )
+    url <- paste(baseurl, type, jql, fields, sep = "")
+    webpage <- curl (url, handle = h)
+    
+    #get data it into R
+    new_data <- fromJSON(webpage, simplifyDataFrame = TRUE)
+    new_data_issues <- data.frame(
+      key = new_data$issues$key,
+      project_key = new_data$issues$fields$project$key,
+      watches = new_data$issues$fields$watches$self
+    )
+    
+    jira_raw <- rbind(jira_raw, new_data_issues)
+    
+    #get following pages if there are any
+    page <- page + 1
+    
+    if (new_data$total < (new_data$maxResults * new_data$maxResults))
+      break
+  }
   
   #projects
   #from the website now get watchers names with another query for all keys found previously
+  #This will only get upto 1000 watchers per issue - we normally see at most 5
+  #no watchers returns NULL
   fields <- "/watchers"
   type <- "issue/"
- 
-  
-  #TODO, get all the data
-  
   
   names = list()
-  for (key in keys) {
-    url <- paste(baseurl, type, key, fields, sep = "")
+  for (url in jira_raw$watches) {
     webpage <- curl (url, handle = h)
-    jira_raw_names <- fromJSON(webpage, simplifyDataFrame = TRUE)
-
-    names[[length(names) + 1]] <-
-      as.list(jira_raw_names$watchers$name)
+    jira_raw_names <- fromJSON(webpage, simplifyDataFrame = TRUE, simplifyVector = TRUE)
+    if ( jira_raw_names$watchCount > 0) {
+      new_name <- unlist(jira_raw_names$watchers$name)
+      names[[length(names) + 1]] <- new_name
+    } else
+    {
+      names[[length(names) + 1]] <- ""
+    }
   }
+  #attch the names to the dataframe
+  jira_raw$names <- cbind(jira_raw$names,names)
   
-  #create empty matrix
-  comp <- companies(names)
-  num_comp <- length(comp)
-  m <- matrix(NA, nrow = num_comp, ncol = num_projects)
-  colnames(m) <- projects
-  m[] <- 0L
+  #create empty matrix to hold the results summary
+  m <- data.frame(Person = NA)
   
-  #from the website now get watchers names with another query for all keys found previously
-  fields <- "/watchers"
-  type <- "issue/"
-
-  for (key in keys) {
-    #y
-    mike <- match (key, keys)
-    team <- jira_raw$issues$fields$project$key[mike]
-    y <- match (team, projects)
-    
-    #x
-    names = list()
-    url <- paste(baseurl, type, key, fields, sep = "")
-    webpage <- curl (url, handle = h)
-    jira_raw_names <- fromJSON(webpage, simplifyDataFrame = TRUE)
+  # count companies per project by looking at every key and update the summary
+  for (project in unique(jira_raw$project_key)) {
+    team <- jira_raw[jira_raw$project_key == project, ]
     
-    names[[length(names) + 1]] <- as.list(jira_raw_names$watchers$name)
-    interested = companies(names)
-    for (c in interested)
-    {
-      x <- match (c, comp)
-      m[x, y] <- m[x, y] + 1
-    }
+    team <- unlist(team['names'])
+    freq <- data.frame(table(unlist(team)))
+    freq <- rename (freq, c("Freq" = project, "Var1" = "Person"))
+    m <- merge(m, freq, all = TRUE)
   }
   
-  #remove companies
-  rows.to.delete <- c('linaro.org', 'broadcom.com', 'character')
+  #delete the NA row - should try to figure out which bad data causes this
+  m <- m[rowSums(is.na(m)) != 7,]
+  # add a name for the unwatched issues
+  m$Person[1] <-"unwatched"
+  #fix the NA's in the data they really mean 0
+  m[is.na(m)] <- 0
+  #add row names and remove the  colum that was the row names
+  rownames(m) <- m$Person
+  m$Person <- NULL
+  
+  print(m)
+  #remove companies that make no sense
+  rows.to.delete <- c('linaro.org')
   for (d in rows.to.delete) {
-    m <- as.matrix(m[!grepl(d, rownames(m)),])
+    m <- m[!grepl(d, rownames(m)), ]
   }
-
-  display_heatmap(report, m,"_wt") 
+  
+  #generate a list by company to compliment this list by person
+  Company <- companies(rownames(m))
+  
+  #create empty matrix to hold the results summary by company
+  nd <- as.data.frame(Company)
+  print(nd)
+  
+  for (c in Company) {
+    matches <- grepl(c, rownames(m))
+    comp <- m[matches, ]
+    comp_col_sum <- as.data.frame(colSums(comp))
+    comp_col_sum <- rename(comp_col_sum, c('colSums(comp)' = c))
+    comp_col_sum <- data.frame(t(comp_col_sum))
+    comp_col_sum <- cbind(Company = rownames(comp_col_sum), comp_col_sum)
+    nd <- merge(nd, comp_col_sum , all = TRUE)
+    print(nd)
+  }
+  nd <- nd[complete.cases(nd),]
+  #add row names and remove the  colum that was the row names
+  rownames(nd) <- nd$Company
+  nd$Company <- NULL
+  print(nd)
+  display_heatmap(report, nd, "_wt")
 }
 
 
@@ -211,7 +250,9 @@ sponsor_heatmap <- function (report) {
 
   #remove companies we don't like ;) - Linaro adds no value to this analysis
   rows.to.delete <- c('Linaro')
-  m <- m[!rownames(m) %in% rows.to.delete,]
+  for (d in rows.to.delete) {
+    m <- as.matrix(m[!grepl(d, rownames(m)),])
+  }
   
   #fix the NA's they really mean 0
   m[is.na(m)] <- 0
@@ -327,6 +368,7 @@ team_list <-
     "LHG,LEG,LNG,LITE,LMG",
     "CTT,BB,LAVA,LAB,QA,SYS,TCWG"
   )
+
 reports <-
   data.frame(name = report_name ,
              team = team_list,
author	Mike Holmes <mike.holmes@linaro.org>	2017-06-28 12:31:57 -0400
committer	Mike Holmes <mike.holmes@linaro.org>	2017-06-30 15:00:11 -0400
commit	82b2c5c818fbe44845087d5c0cf58bd0c0a1fecc (patch)
tree	cdc25417ac207c5b7fd8cf379e42ae389ab18d17
parent	5034c81f6a45075087e44f1be542f5be5415fee0 (diff)