-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathspineScript_KN_v8.R
114 lines (98 loc) · 9.82 KB
/
spineScript_KN_v8.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
library(dplyr) # to install these , enter install.packages("package name") into the console in RStudio. replace package name with each package name in quotes, ie install.packages("dplyr")
library(ggplot2)
library(lattice)
library(latticeExtra)
library(cluster)
library(stats)
library(readr)
fileChosen <- file.choose() # opens file dialog to open
filePath <- dirname(fileChosen) # gets the directory name that that file is in
setwd(filePath) # sets the working directory. important for saving files later
fileList <- list.files(filePath) # a list of files in the directory to go through
fileList <- grep(".csv", fileList, value = TRUE) # finds only lists .csv files in case there are other types
data_all <- data.frame() # initializes a data.frame that will store all of the data for the experiment
for(i in 1:length(fileList)){ ##add back in 'i' when adding for loop back in
fileName <- unlist(strsplit(fileList[i], "[.]"))[1] # removes ".csv" from file name
fileLoc <- file.path(filePath, fileList[i]) # creates a path to each file (differs from FileChosen as the loop runs)
df <- read_csv(fileLoc, col_types = cols()) # reads the csv file, suppresses output information about columns
colnames(df) <- gsub("-", "_", colnames(df)) # R doesn't like dashes in column names, replaces "-" with "_" for all colnames of df
df <- df[complete.cases(df$SOMA_DISTANCE),] # removes cases where there is no soma distance data
df <- df[complete.cases(df$RAYBURST_VOLUME),] # removes cases where there is no data
df <- df[complete.cases(df$MAX_DTS),] # removes cases where there is no data
df$file <- fileName # adds a column with repeated information about which file that spine came from
total_length <- df %>% group_by(SECTION_NUMBER) %>% summarise(section_length=max(SECTION_LENGTH)) %>%ungroup() %>% summarise(total_length=sum(section_length)) %>% as.double()
# ^^this uses the dplyr package to take the data.frame and group it by section. Then it finds the length of each section before adding them all together
total_spines <- nrow(df) # number of rows = number of spines
density_overall <- total_spines/total_length # calculate density of segment
density_mushroom <- sum(as.numeric(df$TYPE=="mushroom"))/total_length # counts the number of "mushroom" rows and divides by length for mushroom density
density_thin <- sum(as.numeric(df$TYPE=="thin"))/total_length # same as above, with "thin"
density_stubby <- sum(as.numeric(df$TYPE=="stubby"))/total_length # same as above, with "stubby"
agn <- agnes(df[c("X", "Y", "Z")], metric = "euclidean", method = "average") # runs the agnes, computes agglomerative hierarchical clustering, "average" = UPGMA
dist <- as.matrix(dist(df[c("X", "Y", "Z")])) # creates a distance matrix of soma distance
df$nn_dist <- apply(dist,2, function(x) sort(x)[2]) # finds nearest neighbor for each spine
df$nn2_dist <- apply(dist,2, function(x) sort(x)[3]) # finds second nearest neighbor
df$nn3_dist <- apply(dist,2, function(x) sort(x)[4]) # finds third
df$exp_nn <- 1/density_overall # calculates the expected nearest neighbor based on overall density
df$nn_clustered <- apply(dist,2, function(x) sum(as.numeric(x<df$exp_nn))) # how many spines are within the expected distance
df$is_it_2clustered <- as.numeric(df$nn_dist<df$exp_nn) # if nearest neighbor is closer than expected neighbor (given the density), 1 = TRUE, 0 = FALSE
df$is_it_3clustered <- as.numeric(df$nn2_dist<df$exp_nn)
df$is_it_4clustered <- as.numeric(df$nn3_dist<df$exp_nn)
dendrite_ac <- agn$ac # defines agglomeration coefficent of the dendrite
ac <- list() #initializes list for ac's from next for loop
for(j in 1:15000){
test_data <- data.frame(sample(df$X), sample(df$Y), sample(df$Z)) # randomize the X's, Y's, and Z's to make a "biologically plausible" dataframe.
test_dist <- as.matrix(dist(test_data)) #creates distance matrix for random sample
assign("test_cluster", agnes(test_data, metric = "euclidean", method = "average")) #runs UPGMA on sample and labels the agn output "test_cluster"
ac <- rbind(ac,test_cluster$ac) # add row to ac using the 'test_cluster' ac
}
# mean density of the simulation
m <- mean(unlist(ac))
# standard deviation of the simulation
s <- sd(ac)
# calculated zscore (assuming simulation data are normally distributed which appears true)
df$zscore <- (dendrite_ac - m) / s
cScore <- sum(as.numeric(ac<dendrite_ac))/15000 # average (divide by 15000 samples) how many times random ac is smaller than dendrite_ac, the smaller the value, the more "clustering"
df$c_score <- cScore #add a row to df with the cScore of the dendrite
df$density_overall <- density_overall # add these data to df
df$density_mushroom <- density_mushroom
df$density_thin <- density_thin
df$density_stubby <- density_stubby
data_all <- rbind(data_all, df) # adds df as next row in the data_all file
} # end of for loop, all files in folder should be added to data_all
data_all$animal_num <- lapply(data_all$file, function(x) unlist(strsplit(x, "-"))[2]) # this pulls out an animal number from the file number
data_all$retro_label <- substring(data_all$animal_num, nchar(data_all$animal_num), nchar(data_all$animal_num)) #pulls off 'L' or 'N' from ID to indicate whether cell was retro-gradely labeled
#data_all$PDB <- substring(data_all$animal_num, nchar(data_all$animal_num), nchar(data_all$animal_num))
data_all$PDB <- lapply(data_all$file, function(x) unlist(strsplit(x, "-"))[3]) # pulls dendrite location out of name and adds column
data_all$PDB <- replace(data_all$PDB, data_all$PDB=="p", "prox")
data_all$PDB <- replace(data_all$PDB, data_all$PDB=="d", "dist")
data_all$PDB <- replace(data_all$PDB, data_all$PDB=="b", "basal")
data_all$retro_label <- replace(data_all$retro_label, data_all$retro_label=="L", "labeled")
data_all$retro_label <- replace(data_all$retro_label, data_all$retro_label=="N", "not labeled")
data_all$stack <- lapply(data_all$file, function(x) unlist(strsplit(x, "-"))[4]) #gives letter ID of different imaging days
data_all$stack <- unlist(data_all$stack)
data_all$animal_num <- lapply(data_all$animal_num, function(x) unlist(strsplit(x, "L"))[1]) #removes letter from behind animal ID name
data_all$animal_num <- lapply(data_all$animal_num, function(x) unlist(strsplit(x, "N"))[1])
data_all$RAYBURST_VOLUME <- as.numeric(data_all$RAYBURST_VOLUME)
data_all$MAX_DTS <- as.numeric(data_all$MAX_DTS)
n <- readline(prompt="Enter group name:") # gives prompt on console to enter animal group
data_all$group <- n # adds the group to the master data file
data_all$animal_num <- unlist(data_all$animal_num) #changes animal number to a vector rather than a list, which is important for executing the following task
data_all$PDB <- unlist(data_all$PDB) # same as above
TYPE_ave <- data_all %>% group_by(group, animal_num, retro_label, TYPE) %>% summarise(Cscore = mean(c_score), zscore = mean(zscore), nn = mean(nn_dist), nn2 = mean(nn2_dist), nn3 = mean(nn3_dist), density_overall = mean(density_overall), density_mushroom = mean(density_mushroom), density_thin = mean(density_thin), density_stubby = mean(density_stubby), spine_vol_overall = mean(RAYBURST_VOLUME, na.rm = TRUE), spine_length_overall = mean(MAX_DTS, na.rm = TRUE))
PDB_TYPE_ave <- data_all %>% group_by(group, animal_num, stack, retro_label, PDB, TYPE) %>% summarise(Cscore = mean(c_score), zscore = mean(zscore), nn = mean(nn_dist), nn2 = mean(nn2_dist), nn3 = mean(nn3_dist), density_overall = mean(density_overall), density_mushroom = mean(density_mushroom), density_thin = mean(density_thin), density_stubby = mean(density_stubby), spine_vol_overall = mean(RAYBURST_VOLUME, na.rm = TRUE), spine_length_overall = mean(MAX_DTS, na.rm = TRUE))
PDB_ave <- data_all %>% group_by(group, animal_num, retro_label, PDB) %>% summarise(Cscore = mean(c_score), zscore = mean(zscore), nn = mean(nn_dist), nn2 = mean(nn2_dist), nn3 = mean(nn3_dist), density_overall = mean(density_overall), density_mushroom = mean(density_mushroom), density_thin = mean(density_thin), density_stubby = mean(density_stubby), spine_vol_overall = mean(RAYBURST_VOLUME, na.rm = TRUE), spine_length_overall = mean(MAX_DTS, na.rm = TRUE))
# ^first groups by treatment group, then by animal, then by day of image (i.e. -F), then by dendrite location
# summarise then finds averages for each group for all various pieces of data
animal_ave <- data_all %>% group_by(group, animal_num) %>% summarise(Cscore = mean(c_score), zscore = mean(zscore), nn = mean(nn_dist), nn2 = mean(nn2_dist), nn3 = mean(nn3_dist), density_overall = mean(density_overall), density_mushroom = mean(density_mushroom), density_thin = mean(density_thin), density_stubby = mean(density_stubby), spine_vol_overall = mean(RAYBURST_VOLUME, na.rm = TRUE), spine_length_overall = mean(MAX_DTS, na.rm = TRUE))
# groups by treatment then animal, then finds averages per animal for various data
labeled_ave <- data_all %>% group_by(group, animal_num, retro_label) %>% summarise(Cscore = mean(c_score), zscore = mean(zscore), nn = mean(nn_dist), nn2 = mean(nn2_dist), nn3 = mean(nn3_dist), density_overall = mean(density_overall), density_mushroom = mean(density_mushroom), density_thin = mean(density_thin), density_stubby = mean(density_stubby), spine_vol_overall = mean(RAYBURST_VOLUME, na.rm = TRUE), spine_length_overall = mean(MAX_DTS, na.rm = TRUE))
# groups by treatment then animal, then finds averages per animal for various data
dir.create("analysis") #creates a directory to create a file on the computer
savePath <- paste(filePath,"/", "analysis", collapse = "/", sep="") # creates the path where files can be saved
setwd(savePath) # sets working directory to the path created above
write.csv(data_all, "data_all.csv")
write.csv(PDB_ave, "PDB averages.csv")
write.csv(animal_ave, "Animal Averages.csv")
write.csv(TYPE_ave, "spine_type_averages.csv")
write.csv(PDB_TYPE_ave, "PDB_spine_type_averages.csv")
write.csv(labeled_ave, "labeled_averages.csv")