DCMSstats · DCMSstats · Mar 7, 2017 · Mar 14, 2017 · Mar 24, 2017 · Apr 27, 2017
diff --git a/NAMESPACE b/NAMESPACE
@@ -14,6 +14,7 @@ export(figure3.1)
 export(figure3.2)
 export(figure3.3)
 export(integrity_check)
+export(jobs_timeseries)
 export(na_cols)
 export(raise_issue)
 export(save_rds)

diff --git a/R/APS_ct.R b/R/APS_ct.R
@@ -0,0 +1,53 @@
+#' @title APS Crosstabber
+#'
+#' @description Crosstabulates two variables from an APS dataframe
+#'
+#'   IT IS HIGHLY ADVISEABLE TO ENSURE THAT THE DATA WHICH ARE CREATED BY THIS
+#'   FUNCTION ARE NOT STORED IN A FOLDER WHICH IS A GITHUB REPOSITORY TO
+#'   MITIGATE AGAINST ACCIDENTAL COMMITTING OF OFFICIAL DATA TO GITHUB. TOOLS TO
+#'   FURTHER HELP MITIGATE THIS RISK ARE AVAILABLE AT
+#'   https://github.com/ukgovdatascience/dotfiles.
+#'
+#' @details The best way to understand what happens when you run this function
+#'   is to look at the source code, which is available at
+#'   \url{https://github.com/ukgovdatascience/eesectors/blob/master/R/}. The
+#'   code is relatively transparent and well documented. A brief explanation of
+#'   what the function does here:
+#'
+#'   1. The function checks if the inputs are file directories or data frames
+#'
+#'   2. The function finds the year of each inputted dataset
+#'
+#'   3. The function returns a list of datasets named per year (x$y20XX)
+#'
+#' @param ... either the locations fo the APS SPSS files from which you want
+#'   to form the time series, or the data frames you want to form into the time
+#'   series list.
+#'
+#' @return The function a list of APS data frames, with each list element named
+#'   after the year from which the data come
+#'
+#' @examples
+#'
+#' \dontrun{
+#' library(eesectors)
+#' ct<-APS_ct(data, x, y)
+#' }
+#'
+#'
+
+APS_ct <- function(data,x,y,sector="all_dcms"){
+
+  #Run append sectors on the data frame to make sure they're assigned
+  data<-eesectors::appendSectors(data)
+
+  #Make sure we've worked out if each person has a job in a sector
+  data<-eesectors:::sector_jobs(data)
+
+  jobfilter<-data[,sector]>0
+  data<-data[jobfilter,]
+
+  tab<-questionr::wtd.table(x=as.data.frame(data[,y]),y=as.data.frame(data[,x]),weights = as.data.frame(data[,"PWTA14"]))
+
+  return(tab)
+  }
diff --git a/R/APS_tables.R b/R/APS_tables.R
@@ -0,0 +1,72 @@
+#' @title Sector employment time series table
+#'
+#' @description Produces a time series table of employment by DCMS sector
+#'
+#' @details The function \code{eesectors::jobs_timeseries} produces a basic time series
+#'   of data based on jobs per DCMS sector.
+#'
+#' @param ... APS time series `data.frames`s or the file destinations of APS datasets
+#'   (providing APS data sets will cause the function to run slowly as it loads the data)
+#'
+#' @return The function returns the sector employment time series table
+#'
+#' @examples
+#'
+#' \dontrun{
+#' library(eesectors)
+#' employment_timeseries <- jos_timeseries(
+#' aps2011, aps2012, aps2013, aps2014, aps2015
+#' )
+#' }
+#'
+#' \dontrun{
+#' library(eesectors)
+#' employment_timeseries <- jos_timeseries(
+#' "./APS_data_2013.sav","./APS_data_2014.sav","./APS_data_2015.sav",
+#' )
+#' }
+#'
+#' @export
+#'
+
+jobs_timeseries <- function(...,tourism=NULL,sectors = eesectors::DCMS_sectors){
+
+  #Time seriesify the data and do sector analysis
+
+  message("Combining data frames...")
+  APS_ts <- eesectors:::APS_timeseries(...)
+  message("Adding sector membership for first and second jobs...")
+  APS_ts <- lapply(APS_ts,eesectors::appendSectors)
+  message("Counting jobs per sector per person...")
+  APS_ts <- lapply(APS_ts,eesectors:::sector_jobs)
+
+  #Start to build the table
+
+  #Get sector names
+  sector_names <- sapply(unique(sectors$sector),eesectors:::simpleCap)
+
+  #Add column 1, sector names
+  jobs_ts <- data.frame(Sector <- c(sector_names))
+  jobs_ts[,1] <- gsub(jobs_ts[,1],pattern = "All_dcms",replacement = "All DCMS")
+
+  #If there is Tourism data to read, read it in
+  if  (!is.null(tourism)){
+    tdata<-readxl::read_excel(tourism)
+  }
+
+  #Add jobs statistics
+  for (year in APS_ts){
+    jobs_count<-eesectors:::sector_jobs_count(year)
+    jobs_ts[toString(eesectors:::yearfind(year))]<-NA
+
+    if (!is.null(tourism)){
+      jobs_count["tourism",]<-tdata$`Tourism employment`[tdata$Year==eesectors:::yearfind(year)]
+      jobs_count["all_dcms",]<-jobs_count["all_dcms",]+tdata$`Tourism employment`[tdata$Year==eesectors:::yearfind(year)]-tdata$Overlap[tdata$Year==eesectors:::yearfind(year)]
+    }
+
+    jobs_ts[toString(eesectors:::yearfind(year))]<-jobs_count
+  }
+
+  return(jobs_ts)
+}
+
diff --git a/R/APS_timeseries.R b/R/APS_timeseries.R
@@ -0,0 +1,78 @@
+#' @title APS time series maker
+#'
+#' @description Builds a list of APS data frames by year that can be passed to
+#'   other table making functions
+#'
+#'   IT IS HIGHLY ADVISEABLE TO ENSURE THAT THE DATA WHICH ARE CREATED BY THIS
+#'   FUNCTION ARE NOT STORED IN A FOLDER WHICH IS A GITHUB REPOSITORY TO
+#'   MITIGATE AGAINST ACCIDENTAL COMMITTING OF OFFICIAL DATA TO GITHUB. TOOLS TO
+#'   FURTHER HELP MITIGATE THIS RISK ARE AVAILABLE AT
+#'   https://github.com/ukgovdatascience/dotfiles.
+#'
+#' @details The best way to understand what happens when you run this function
+#'   is to look at the source code, which is available at
+#'   \url{https://github.com/ukgovdatascience/eesectors/blob/master/R/}. The
+#'   code is relatively transparent and well documented. A brief explanation of
+#'   what the function does here:
+#'
+#'   1. The function checks if the inputs are file directories or data frames
+#'
+#'   2. The function finds the year of each inputted dataset
+#'
+#'   3. The function returns a list of datasets named per year (x$y20XX)
+#'
+#' @param ... either the locations fo the APS SPSS files from which you want
+#'   to form the time series, or the data frames you want to form into the time
+#'   series list.
+#'
+#' @return The function a list of APS data frames, with each list element named
+#'   after the year from which the data come
+#'
+#' @examples
+#'
+#' \dontrun{
+#' library(eesectors)
+#' extract_APS_data(...)
+#' }
+#'
+#'
+
+APS_timeseries <- function(...){
+
+  # List the input datasets, if reading from file
+  if (typeof(c(...))=="character"){
+
+    # Warn if it doesn't look like an SPSS file (it should fail anyway)
+    for(i in c(...)){
+      if(substr(i, nchar(i)-3, nchar(i)) != ".sav"){
+        warning("At least one file does not appear to be a .sav file. Are you sure this is an SPSS file?")
+        break
+      }
+    }
+
+    # This takes ages if it's importing multiple SPSS datasets
+    # Haven doesn't have column selection functionality yet...
+    message(paste0("Importing ",toString(length(c(...)))," SPSS files. This may take a few minutes..."))
+
+    # Listify the data imported by eesectors::extract_APS_data
+    ts_list <- c(...)
+    ts_list <- lapply(ts_list,eesectors::extract_APS_data)
+  }
+
+  # Or list the data frames if supplied with data frames
+  if (typeof(c(...))=="list"){
+    ts_list <- list(...)
+    message(paste0(toString(length(ts_list))," data frames supplied"))
+  }
+
+  # Find the year of each data set and rename the list element accordingly
+  rlist <- lapply(ts_list,eesectors:::yearfind)
+  names(ts_list) <- paste0("y",rlist)
+
+  message(paste0(toString(length(ts_list))," APS datasets added to time series"))
+  message("Time series consists of the following years, in this order:")
+  for(y in rlist){message(y)}
+
+  return(ts_list)
+}
+
diff --git a/R/APS_utils.R b/R/APS_utils.R
@@ -0,0 +1,185 @@
+#' @title Recode functions
+#'
+#' @description Recodes various APS functions into ones usable
+#' for the analysis
+#'
+#' @details These functions recode APS variables into variales
+#' useful for the statistical release, for example, the regional variables
+#' into NUTS1/GOR regions
+#'
+#' @param x a `data.frame` column as extracted by \code{eesectors::extract_APS_data}
+#'
+#' @return The function returns the APS variable as a dataframe column with recoded
+#' regional variables
+#'
+#' @examples
+#'
+#' \dontrun{
+#' library(eesectors)
+#' APS_data_jobs$var<-regionsRecode(APS_data$var)
+#' )
+#' }
+#'
+
+regionsRecode<-function(x){
+  1 ->x[x==1|x==2]
+  2 ->x[x==3|x==4|x==5]
+  3 ->x[x==6|x==7|x==8]
+  4 ->x[x==9]
+  5 ->x[x==10|x==11]
+  6 ->x[x==12]
+  7 ->x[x==13|x==14|x==15]
+  8 ->x[x==16]
+  9 ->x[x==17]
+  10->x[x==18]
+  11->x[x==19|x==20]
+  12->x[x==21]
+  13->x[x==22|x==23]
+
+  x <- labelled::labelled(x, c("North East"=1, "North West"=2,
+                     "Yorkshire and The Humber"=3,
+                     "East Midlands"=4,"West Midlands"=5,
+                     "East of England"=6,"London"=7,
+                     "South East"=8,"South West"=9,
+                     "Wales"=10,"Scotland"=11,
+                     "Northern Ireland"=12,
+                     "Other"=13))
+
+  return(x)
+}
+
+secondjobRecode<-function(x){
+  1 ->x[x==3]
+  return(x)
+}
+
+ethnicityRecode<-function(x){
+  1 ->x[x==1]
+  2 ->x[x==3]
+  3 ->x[x==4|x==5|x==6|x==7|x==8]
+  4 ->x[x==9]
+  5 ->x[x==10|x==11|x==2]
+
+  x <- labelled::labelled(x, c("White"=1, "Mixed / Multiple ethnic groups"=2,
+                               "Asian / Asian British"=3,
+                               "Black / African / Caribbean / Black British"=4,
+                               "Other"=5, "Missing"=0))
+
+  return(x)
+}
+
+
+
+
+#' @title Sector jobs
+#'
+#' @description Counts the number of jobs a person has in each sector (0, 1 or 2)
+#'
+#' @details This function sums up the jobs per sector found using the appendSectors
+#'   function.
+#'
+#' @param x a `data.frame` as extracted by \code{eesectors::extract_APS_data}
+#'
+#' @return The function returns the APS data as a dataframe with the sector
+#'   job count variables appended
+#'
+#' @examples
+#'
+#' \dontrun{
+#' library(eesectors)
+#' APS_data_jobs=sector_jobs(APS_data)
+#' )
+#' }
+#'
+
+sector_jobs<-function(x){
+
+  # Find out how many sectors there are
+  unique_sectors <- unique(DCMS_sectors$sector)
+
+  for(s in unique_sectors){
+    x[s] <- x[paste0(s,"_main")]+x[paste0(s,"_second")]
+  }
+  return(x)
+}
+
+#' @title Sector jobs count
+#'
+#' @description Counts the number of jobs per sector, after removing those
+#'   without a second or first jobs
+#'
+#' @details Counts the number of jobs per sector, after removing those
+#'   without a second or first jobs using the \code{eesectors:::main_job_mask}
+#'   and \code{eesectors:::second_job_mask} functions.
+#'
+#' @param x an APS `data.frame` as extracted by \code{eesectors::extract_APS_data}
+#'   and with sectors added with \code{eesectors::appendSectors} and
+#'   \code{eesectors:::sector_jobs}
+#'
+#' @return The function returns a `data.frame` contains the jobs per DCMS sector
+#'
+#' @examples
+#'
+#' \dontrun{
+#' library(eesectors)
+#' job_counts_20xx=sector_jobs_count(APS_data_secotr_jobs)
+#' )
+#' }
+#'
+
+sector_jobs_count<-function(x){
+
+  # Find out how many sectors there are
+  unique_sectors <- unique(DCMS_sectors$sector)
+
+  # Append sectors
+  x<-eesectors::appendSectors(x)
+
+  # create output df
+  y <- data.frame(matrix(, nrow=length(unique_sectors), ncol=0))
+  row.names(y) <- unique_sectors
+
+  # create masked data sets of first/second jobs only
+  xm<-eesectors:::main_job_mask(x)
+  xs<-eesectors:::second_job_mask(x)
+
+  # get the jobs per sector and weight by PWTA14
+  for(s in unique_sectors){
+    y[s,"count"] <- sum(xm[paste0(s,"_main")]*xm$PWTA14)+sum(xs[paste0(s,"_second")]*xs$PWTA14)
+  }
+  return(y)
+}
+
+#' @title Economic activity filters
+#'
+#' @description Filters out people based on their main/second job
+#'   economic activity status
+#'
+#' @details Filters out people based on their main/second job
+#'   economic activity status - INECAC05 for first job and SECJMBR for second
+#'   job (1 = full time, 2 = part time).
+#'
+#' @param x a `data.frame` as extracted by \code{eesectors::extract_APS_data}
+#'
+#' @return The function returns the APS `data.frame` with only those
+#'   with a first or second job depending on functions used.
+#'
+#' @examples
+#'
+#' \dontrun{
+#' library(eesectors)
+#' APS_data_jobs=sector_jobs(APS_data)
+#' )
+#' }
+#'
+
+main_job_mask<-function(x){
+  return(x[x$INECAC05==1 | x$INECAC05==2 & !is.na(x$INECAC05),])
+}
+
+second_job_mask<-function(x){
+  return(x[(x$SECJMBR==1 | x$SECJMBR==2) & !is.na(x$SECJMBR),])
+}
+
+
+