Switched over to file.path()

nethi · Feb 17, 2012 · b8e9690 · b8e9690
1 parent 261991d
commit b8e9690
Show file tree

Hide file tree

Showing 3 changed files with 35 additions and 37 deletions.
diff --git a/01-Introduction/ufo_sightings.R b/01-Introduction/ufo_sightings.R
@@ -29,7 +29,7 @@ library('ggplot2')    # We'll use ggplot2 for all of our visualizations
 # We also have to alter two defaults; first, we want the strings to not be converted to
 # factor types; and, this data has does not have header labels in the first row, so
 # we want to keep the first row as data.
-ufo <- read.delim("data/ufo/ufo_awesome.tsv",
+ufo <- read.delim(file.path("data", "ufo", "ufo_awesome.tsv"),
                   sep = "\t",
                   stringsAsFactors = FALSE,
                   header = FALSE, 
@@ -125,7 +125,7 @@ quick.hist <- ggplot(ufo.us, aes(x = DateOccurred)) +
   geom_histogram() + 
   scale_x_date(major = "50 years")
 ggsave(plot = quick.hist,
-       filename = 'images/quick_hist.pdf',
+       filename = file.path("images", "quick_hist.pdf"),
        height = 6,
        width = 8)
 
@@ -138,7 +138,7 @@ new.hist <- ggplot(ufo.us, aes(x = DateOccurred)) +
   geom_histogram() +
   scale_x_date(major = "50 years")
 ggsave(plot = quick.hist,
-       filename = "images/new_hist.pdf",
+       filename = file.path("images", "new_hist.pdf"),
        height = 6,
        width = 8)
 
@@ -208,6 +208,6 @@ state.plot <- ggplot(all.sightings, aes(x = YearMonth,y = Sightings)) +
   opts(title = "Number of UFO sightings by Month-Year and U.S. State (1990-2010)")
 # Save the plot as a PDF
 ggsave(plot = state.plot,
-       filename = "images/ufo_sightings.pdf",
+       filename = file.path("images", "ufo_sightings.pdf"),
        width = 14,
        height = 8.5)
diff --git a/03-Classification/email_classify.R b/03-Classification/email_classify.R
@@ -25,12 +25,12 @@ library('tm')
 library('ggplot2')
 
 # Set the global paths
-spam.path <- "data/spam/"
-spam2.path <- "data/spam_2/"
-easyham.path <- "data/easy_ham/"
-easyham2.path <- "data/easy_ham_2/"
-hardham.path <- "data/hard_ham/"
-hardham2.path <- "data/hard_ham_2/"
+spam.path <- file.path("data", "spam")
+spam2.path <- file.path("data", "spam_2")
+easyham.path <- file.path("data", "easy_ham")
+easyham2.path <- file.path("data", "easy_ham_2")
+hardham.path <- file.path("data", "hard_ham")
+hardham2.path <- file.path("data", "hard_ham_2")
 
 # Create motivating plot
 x <- runif(1000, 0, 40)
@@ -50,7 +50,7 @@ ex1 <- ggplot(val, aes(x, V2)) +
   xlab("X") +
   ylab("Y")
 ggsave(plot = ex1,
-       filename = "images/00_Ex1.pdf",
+       filename = file.path("images", "00_Ex1.pdf"),
        height = 10,
        width = 10)
 
@@ -137,7 +137,7 @@ classify.email <- function(path, training.df, prior = 0.5, c = 1e-6)
 spam.docs <- dir(spam.path)
 spam.docs <- spam.docs[which(spam.docs != "cmds")]
 all.spam <- sapply(spam.docs,
-                   function(p) get.msg(paste(spam.path, p, sep = "")))
+                   function(p) get.msg(file.path(spam.path, p)))
 
 # Create a DocumentTermMatrix from that vector
 spam.tdm <- get.tdm(all.spam)
@@ -166,7 +166,7 @@ spam.df <- transform(spam.df,
 easyham.docs <- dir(easyham.path)
 easyham.docs <- easyham.docs[which(easyham.docs != "cmds")]
 all.easyham <- sapply(easyham.docs[1:length(spam.docs)],
-                      function(p) get.msg(paste(easyham.path, p, sep = "")))
+                      function(p) get.msg(file.path(easyham.path, p)))
 
 easyham.tdm <- get.tdm(all.easyham)
 
@@ -193,12 +193,10 @@ hardham.docs <- dir(hardham.path)
 hardham.docs <- hardham.docs[which(hardham.docs != "cmds")]
 
 hardham.spamtest <- sapply(hardham.docs,
-                           function(p) classify.email(paste(hardham.path, p, sep = ""),
-                           training.df = spam.df))
+                           function(p) classify.email(file.path(hardham.path, p), training.df = spam.df))
 
 hardham.hamtest <- sapply(hardham.docs,
-                          function(p) classify.email(paste(hardham.path, p, sep = ""),
-                          training.df = easyham.df))
+                          function(p) classify.email(file.path(hardham.path, p), training.df = easyham.df))
 
 hardham.res <- ifelse(hardham.spamtest > hardham.hamtest,
                       TRUE,
@@ -207,15 +205,15 @@ summary(hardham.res)
 
 # Find counts of just terms 'html' and 'table' in all SPAM and EASYHAM docs, and create figure
 html.spam <- sapply(spam.docs,
-                    function(p) count.word(paste(spam.path, p, sep = ""), "html"))
+                    function(p) count.word(file.path(spam.path, p), "html"))
 table.spam <- sapply(spam.docs,
-                     function(p) count.word(paste(spam.path, p, sep = ""), "table"))
+                     function(p) count.word(file.path(spam.path, p), "table"))
 spam.init <- cbind(html.spam, table.spam, "SPAM")
 
 html.easyham <- sapply(easyham.docs,
-                       function(p) count.word(paste(easyham.path, p, sep = ""), "html"))
+                       function(p) count.word(file.path(easyham.path, p), "html"))
 table.easyham <- sapply(easyham.docs,
-                        function(p) count.word(paste(easyham.path, p, sep = ""), "table"))
+                        function(p) count.word(file.path(easyham.path, p), "table"))
 easyham.init <- cbind(html.easyham, table.easyham, "EASYHAM")
 
 init.df <- data.frame(rbind(spam.init, easyham.init),
@@ -233,7 +231,7 @@ init.plot1 <- ggplot(init.df, aes(x = html, y = table)) +
   stat_abline(yintersept = 0, slope = 1) +
   theme_bw()
 ggsave(plot = init.plot1,
-       filename = "images/01_init_plot1.pdf",
+       filename = file.path("images", "01_init_plot1.pdf"),
        width = 10,
        height = 10)
 
@@ -245,7 +243,7 @@ init.plot2 <- ggplot(init.df, aes(x = html, y = table)) +
   stat_abline(yintersept = 0, slope = 1) +
   theme_bw()
 ggsave(plot = init.plot2,
-       filename = "images/02_init_plot2.pdf",
+       filename = file.path("images", "02_init_plot2.pdf"),
        width = 10,
        height = 10)
 
@@ -272,17 +270,17 @@ spam2.docs <- spam2.docs[which(spam2.docs != "cmds")]
 easyham2.class <- suppressWarnings(lapply(easyham2.docs,
                                    function(p)
                                    {
-                                     spam.classifier(paste(easyham2.path, p, sep = ""))
+                                     spam.classifier(file.path(easyham2.path, p))
                                    }))
 hardham2.class <- suppressWarnings(lapply(hardham2.docs,
                                    function(p)
                                    {
-                                     spam.classifier(paste(hardham2.path, p, sep = ""))
+                                     spam.classifier(file.path(hardham2.path, p))
                                    }))
 spam2.class <- suppressWarnings(lapply(spam2.docs,
                                 function(p)
                                 {
-                                  spam.classifier(paste(spam2.path,p,sep = ""))
+                                  spam.classifier(file.path(spam2.path, p))
                                 }))
 
 # Create a single, final, data frame with all of the classification data in it
@@ -319,7 +317,7 @@ class.plot <- ggplot(class.df, aes(x = Pr.HAM, Pr.SPAM)) +
     theme_bw() +
     opts(axis.text.x = theme_blank(), axis.text.y = theme_blank())
 ggsave(plot = class.plot,
-       filename = "images/03_final_classification.pdf",
+       filename = file.path("images", "03_final_classification.pdf"),
        height = 10,
        width = 10)
 
@@ -340,5 +338,5 @@ colnames(class.res) <- c("NOT SPAM", "SPAM")
 print(class.res)
 
 # Save the training data for use in Chapter 4
-write.csv(spam.df, "data/spam_df.csv", row.names = FALSE)
-write.csv(easyham.df, "data/easyham_df.csv", row.names = FALSE)
+write.csv(spam.df, file.path("data", "spam_df.csv"), row.names = FALSE)
+write.csv(easyham.df, file.path("data", "easyham_df.csv"), row.names = FALSE)
diff --git a/04-Ranking/priority_inbox.R b/04-Ranking/priority_inbox.R
@@ -26,8 +26,8 @@ library('tm')
 library('ggplot2')
 
 # Set the global paths
-data.path <- "../03-Classification/data/"
-easyham.path <- paste(data.path, "easy_ham/", sep = "")
+data.path <- file.path("..", "03-Classification", "data")
+easyham.path <- file.path(data.path, "easy_ham")
 
 # We define a set of function that will extract the data
 # for the feature set we have defined to rank email
@@ -106,7 +106,7 @@ parse.email <- function(path)
 easyham.docs <- dir(easyham.path)
 easyham.docs <- easyham.docs[which(easyham.docs != "cmds")]
 easyham.parse <- lapply(easyham.docs,
-                        function(p) parse.email(paste(easyham.path, p, sep = "")))
+                        function(p) parse.email(file.path(easyham.path, p)))
 
 # Convert raw data from list to data frame
 ehparse.matrix <- do.call(rbind, easyham.parse)
@@ -168,7 +168,7 @@ from.scales <- ggplot(from.ex) +
   theme_bw() +
   opts(axis.text.y = theme_text(size = 5, hjust = 1))
 ggsave(plot = from.scales,
-       filename = "images/0011_from_scales.pdf",
+       filename = file.path("images", "0011_from_scales.pdf"),
        height = 4.8,
        width = 7)
 
@@ -190,7 +190,7 @@ from.rescaled <- ggplot(from.weight, aes(x = 1:nrow(from.weight))) +
   theme_bw() +
   opts(axis.text.y = theme_blank(), axis.text.x = theme_blank())
 ggsave(plot = from.rescaled,
-       filename = "images/0012_from_rescaled.pdf",
+       filename = file.path("images", "0012_from_rescaled.pdf"),
        height = 4.8,
        width = 7)
 
@@ -426,7 +426,7 @@ threshold.plot <- ggplot(train.ranks.df, aes(x = Rank)) +
   scale_fill_manual(values = c("darkred" = "darkred"), legend = FALSE) +
   theme_bw()
 ggsave(plot = threshold.plot,
-       filename = "images/01_threshold_plot.pdf",
+       filename = file.path("images", "01_threshold_plot.pdf"),
        height = 4.7,
        width = 7)
 
@@ -448,7 +448,7 @@ final.df$Date <- date.converter(final.df$Date, pattern1, pattern2)
 final.df <- final.df[rev(with(final.df, order(Date))), ]
 
 # Save final data set and plot results.
-write.csv(final.df, "data/final_df.csv", row.names = FALSE)
+write.csv(final.df, file.path("data", "final_df.csv"), row.names = FALSE)
 
 testing.plot <- ggplot(subset(final.df, Type == "TRAINING"), aes(x = Rank)) +
   stat_density(aes(fill = Type, alpha = 0.65)) +
@@ -459,6 +459,6 @@ testing.plot <- ggplot(subset(final.df, Type == "TRAINING"), aes(x = Rank)) +
   scale_fill_manual(values = c("TRAINING" = "darkred", "TESTING" = "darkblue")) +
   theme_bw()
 ggsave(plot = testing.plot,
-       filename = "images/02_testing_plot.pdf",
+       filename = file.path("images", "02_testing_plot.pdf"),
        height = 4.7,
        width = 7)