Skip to content

Commit d95daf6

Browse files
author
Mike Jiang
committed
unify th argument initial values amoong tmixfiler, flowClust and .flowClustK. #15
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/flowClust@122084 bc3139a8-67e5-0310-9ffc-ced21a209358
1 parent 5356b3c commit d95daf6

File tree

4 files changed

+62
-33
lines changed

4 files changed

+62
-33
lines changed

R/SetClasses.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ setClass("tmixFilter",
6969
prototype(expName="Flow Experiment", K=numeric(0), B=500, tol=1e-5,
7070
nu=4, lambda=1, nu.est=0, trans=1, min.count=10, max.count=10,
7171
min=NA, max=NA, level=0.9, u.cutoff=NA_real_, z.cutoff=0,
72-
randomStart=10, B.init=500, tol.init=1e-2, seed=1, criterion="BIC",
72+
randomStart=0, B.init=500, tol.init=1e-2, seed=1, criterion="BIC",
7373
control=vector("list",0), usePrior="no", prior=list(NA)),
7474
contains="parameterFilter")
7575

R/flowClust.R

+16-12
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
flowClust<-function(x, expName="Flow Experiment", varNames=NULL, K, B=500, tol=1e-5, nu=4, lambda=1, nu.est=0, trans=1, min.count=10, max.count=10, min=NULL, max=NULL, level=0.9, u.cutoff=NULL, z.cutoff=0, randomStart=0, B.init=B, tol.init=1e-2, seed=1, criterion="BIC", control=NULL, prior=NULL,usePrior="no")
1+
flowClust<-function(x, expName="Flow Experiment", varNames=NULL, K
2+
, nu=4, lambda=1,trans=1, min.count=10, max.count=10, min=NULL, max=NULL
3+
, randomStart=0, prior=NULL,usePrior="no", criterion="BIC", ...)
24
{
35
if (is(x, "flowFrame")) {
46
if (length(varNames)==0) {
@@ -115,24 +117,23 @@ flowClust<-function(x, expName="Flow Experiment", varNames=NULL, K, B=500, tol=1
115117
{
116118
message("Using the serial version of flowClust")
117119
# C version
118-
result<-lapply(as.list(1:length(K)),.flowClustK, y, expName=expName, varNames=varNames, K=K, B=B, tol=tol, nu=nu, lambda=lambda, nu.est=nu.est, trans=trans, min.count=min.count, max.count=max.count, min=min, max=max, level=level, u.cutoff=u.cutoff, z.cutoff=z.cutoff, randomStart=randomStart, B.init=B.init, tol.init=tol.init, seed=seed, criterion=criterion, control=control,include=include, rm.max, rm.min, prior,usePrior)
120+
result<-lapply(as.list(1:length(K)),.flowClustK, y, expName=expName, varNames=varNames, K=K, criterion=criterion
121+
, nu=nu, lambda=lambda, trans=trans, min.count=min.count, max.count=max.count, min=min, max=max
122+
, randomStart=randomStart, include=include, rm.max, rm.min, prior,usePrior
123+
, ...)
119124
}
120125
else if(length(grep("parallel",loadedNamespaces()))==1)
121126
{
122127
require(parallel)
123128
# Split into nClust segReadsList
124129
# We solely rely on getOption("mc.cores",2L) to determine parallel cores.
125130
# and don't want to pass mc.cores explicitly because on windows, mclapply does not take mc.cores>1
126-
result<-mclapply(as.list(1:length(K)),.flowClustK, y, expName=expName, varNames=varNames, K=K, B=B, tol=tol, nu=nu, lambda=lambda, nu.est=nu.est, trans=trans, min.count=min.count, max.count=max.count, min=min, max=max, level=level, u.cutoff=u.cutoff, z.cutoff=z.cutoff, randomStart=randomStart, B.init=B.init, tol.init=tol.init, seed=seed, criterion=criterion, control=control,include=include, rm.max, rm.min, prior,usePrior, mc.preschedule=FALSE)
131+
result<-mclapply(as.list(1:length(K)),.flowClustK, y, expName=expName, varNames=varNames, K=K, criterion=criterion
132+
, nu=nu, lambda=lambda, trans=trans, min.count=min.count, max.count=max.count, min=min, max=max
133+
, randomStart=randomStart, include=include, rm.max, rm.min, prior,usePrior, mc.preschedule=FALSE
134+
, ...)
127135
}
128-
#else if(length(grep("snowfall",loadedNamespaces()))==1 && sfParallel())
129-
#{
130-
# Number of clusters
131-
# nClust<-sfCpus()
132-
# message("Using the parallel (snowfall) version of flowClust with ", nClust, " cpus or cores")
133-
# result<-sfLapply(as.list(1:length(K)),.flowClustK, y, expName=expName, varNames=varNames, K=K, B=B, tol=tol, nu=nu, lambda=lambda, nu.est=nu.est, trans=trans, min.count=min.count, max.count=max.count, min=min, max=max, level=level, u.cutoff=u.cutoff, z.cutoff=z.cutoff, randomStart=randomStart, B.init=B.init, tol.init=tol.init, seed=seed, criterion=criterion, control=control,include=include, rm.max, rm.min, prior,usePrior)
134-
#}
135-
136+
136137
# Simply return a flowClust object
137138
if (length(K)==1)
138139
{
@@ -147,7 +148,10 @@ flowClust<-function(x, expName="Flow Experiment", varNames=NULL, K, B=500, tol=1
147148
}
148149
}
149150

150-
.flowClustK<-function(i, y, expName="Flow Experiment", varNames=NULL, K, B=500, tol=1e-5, nu=4, lambda=1, nu.est=0, trans=1, min.count=10, max.count=10, min=NULL, max=NULL, level=0.9, u.cutoff=NULL, z.cutoff=0, randomStart=10, B.init=B, tol.init=1e-2, seed=1, criterion="BIC", control=NULL, include, rm.max,rm.min, prior,usePrior)
151+
.flowClustK<-function(i, y, expName="Flow Experiment", varNames=NULL, K
152+
, nu, lambda, trans, min.count, max.count, min, max, randomStart, include, rm.max,rm.min, prior,usePrior, criterion # default values set in flowClust API
153+
, nu.est=0, B=500, tol=1e-5, level=0.9, u.cutoff=NULL, z.cutoff=0, B.init=B, tol.init=1e-2, seed=1, control=NULL
154+
)
151155
{
152156
oorder<-1:K[i]
153157
.model<-1; #Tells the C code whether to run ECM with non-conjugate priors, or classic flowClust.'

man/flowClust.Rd

+41-16
Original file line numberDiff line numberDiff line change
@@ -17,40 +17,65 @@ This function performs automated clustering for identifying cell populations in
1717
}
1818

1919
\usage{
20-
flowClust(x, expName="Flow Experiment", varNames=NULL, K, B=500,
21-
tol=1e-5, nu=4, lambda=1, nu.est=0, trans=1,
20+
flowClust(x, expName="Flow Experiment", varNames=NULL, K, nu=4, lambda=1, trans=1,
2221
min.count=10, max.count=10, min=NULL, max=NULL,
23-
level=0.9, u.cutoff=NULL, z.cutoff=0, randomStart=0,
24-
B.init=B, tol.init=1e-2, seed=1, criterion="BIC",
25-
control=NULL,prior=NULL,usePrior="no")
22+
randomStart=0, prior=NULL,usePrior="no", criterion = "BIC", ...)
2623
}
2724

2825
\arguments{
2926
\item{x}{A numeric vector, matrix, data frame of observations, or object of class \code{flowFrame}. Rows correspond to observations and columns correspond to variables.}
3027
\item{expName}{A character string giving the name of the experiment.}
28+
3129
\item{varNames}{A character vector specifying the variables (columns) to be included in clustering. When it is left unspecified, all the variables will be used.}
30+
3231
\item{K}{An integer vector indicating the numbers of clusters.}
33-
\item{B}{The maximum number of EM iterations.}
34-
\item{tol}{The tolerance used to assess the convergence of the EM.}
32+
3533
\item{nu}{The degrees of freedom used for the \eqn{t} distribution. Default is 4. If \code{nu=Inf}, Gaussian distribution will be used.}
34+
3635
\item{lambda}{The initial transformation to be applied to the data.}
37-
\item{nu.est}{A numeric indicating whether \code{nu} is to be estimated or not. May take 0 (no estimation, default), 1 (estimation) or 2 (cluster-specific estimation).}
36+
3837
\item{trans}{A numeric indicating whether the Box-Cox transformation parameter is estimated from the data. May take 0 (no estimation), 1 (estimation, default) or 2 (cluster-specific estimation).}
38+
3939
\item{min.count}{An integer specifying the threshold count for filtering data points from below. The default is 10, meaning that if 10 or more data points are smaller than or equal to \code{min}, they will be excluded from the analysis. If \code{min} is \code{NULL}, then the minimum of data as per each variable will be used. To suppress filtering, set it as -1.}
40+
4041
\item{max.count}{An integer specifying the threshold count for filtering data points from above. Interpretation is similar to that of \code{min.count}.}
42+
4143
\item{min}{The lower boundary set for data filtering. Note that it is a vector of length equal to the number of variables (columns), implying that a different value can be set as per each variable.}
44+
4245
\item{max}{The upper boundary set for data filtering. Interpretation is similar to that of \code{min}.}
43-
\item{level}{A numeric value between 0 and 1 specifying the threshold quantile level used to call a point an outlier. The default is 0.9, meaning that any point outside the 90\% quantile region will be called an outlier.}
44-
\item{u.cutoff}{Another criterion used to identify outliers. If this is \code{NULL}, then \code{level} will be used. Otherwise, this specifies the threshold (e.g., 0.5) for \eqn{u}, a quantity used to measure the degree of \dQuote{outlyingness} based on the Mahalanobis distance. Please refer to Lo et al. (2008) for more details.}
45-
\item{z.cutoff}{A numeric value between 0 and 1 underlying a criterion which may be used together with \code{level}/\code{u.cutoff} to identify outliers. A point with the probability of assignment \eqn{z} (i.e., the posterior probability that a data point belongs to the cluster assigned) smaller than \code{z.cutoff} will be called an outlier. The default is 0, meaning that assignment will be made no matter how small the associated probability is, and outliers will be identified solely based on the rule set by \code{level} or \code{cutoff}.}
46+
4647
\item{randomStart}{A numeric value indicating how many times a random parition of the data is generated for initialization. The default is 0, meaning that a deterministic partition based on kmeans clustering is used. A value of 10 means random partitions of the data will be generated, each of which is followed by a short EM run. The partition leading to the highest likelihood value will be adopted to be the initial partition for the eventual long EM run.} % If \code{randomStart} is 0, this initialization strategy is not applied and hierarchical clustering is used instead.}
47-
\item{B.init}{The maximum number of EM iterations following each random partition in random initialization.}
48-
\item{tol.init}{The tolerance used as the stopping criterion for the short EM runs in random initialization.}
49-
\item{seed}{An integer giving the seed number used when \code{randomStart>0}.}
50-
\item{criterion}{A character string stating the criterion used to choose the best model. May take either \code{"BIC"} or \code{"ICL"}. This argument is only relevant when \code{length(K)>1}.}
51-
\item{control}{An argument reserved for internal use.}
48+
5249
\item{prior}{The specification of the prior. Used if usePrior="yes"}
50+
5351
\item{usePrior}{Argument specifying whether or not the prior will be used. Can be "yes","no","vague". A vague prior will be automatically specified if usePrior="vague"}
52+
53+
\item{criterion}{A character string stating the criterion used to choose the best model. May take either \code{"BIC"} or \code{"ICL"}. This argument is only relevant when \code{length(K)>1}. Default is "BIC".}
54+
55+
\item{...}{other arguments:
56+
B: The maximum number of EM iterations.Default is 500.
57+
58+
tol: The tolerance used to assess the convergence of the EM. default is 1e-5.
59+
60+
nu.est: A numeric indicating whether \code{nu} is to be estimated or not. May take 0 (no estimation, default), 1 (estimation) or 2 (cluster-specific estimation). Default is 0.
61+
62+
level: A numeric value between 0 and 1 specifying the threshold quantile level used to call a point an outlier. The default is 0.9, meaning that any point outside the 90\% quantile region will be called an outlier.
63+
64+
u.cutoff: Another criterion used to identify outliers. If this is \code{NULL}, which is default, then \code{level} will be used. Otherwise, this specifies the threshold (e.g., 0.5) for \eqn{u}, a quantity used to measure the degree of \dQuote{outlyingness} based on the Mahalanobis distance. Please refer to Lo et al. (2008) for more details.
65+
66+
z.cutoff: A numeric value between 0 and 1 underlying a criterion which may be used together with \code{level}/\code{u.cutoff} to identify outliers. A point with the probability of assignment \eqn{z} (i.e., the posterior probability that a data point belongs to the cluster assigned) smaller than \code{z.cutoff} will be called an outlier. The default is 0, meaning that assignment will be made no matter how small the associated probability is, and outliers will be identified solely based on the rule set by \code{level} or \code{cutoff}.
67+
68+
B.init: The maximum number of EM iterations following each random partition in random initialization. Default is the same as B.
69+
70+
tol.init: The tolerance used as the stopping criterion for the short EM runs in random initialization. Default is 1e-2.
71+
72+
seed: An integer giving the seed number used when \code{randomStart>0}.Default is 1.
73+
74+
75+
control: An argument reserved for internal use.
76+
77+
}
78+
5479
}
5580

5681
\details{

tests/testthat/test_1d.R

+4-4
Original file line numberDiff line numberDiff line change
@@ -104,13 +104,13 @@ test_that("flowClust:FL2-H, 3 mode", {
104104
})
105105

106106
test_that("flowClust:FL2-A, 3 mode", {
107-
chnl <- "FL2-A"
107+
# chnl <- "FL2-A"
108108

109-
res <- flowClust(fr, varNames = chnl, tol = 1e-10, K = 1:4, randomStart = 0)
109+
# res <- flowClust(fr, varNames = chnl, tol = 1e-10, K = 1:4, randomStart = 0)
110110

111111
#3 mode is best fit
112-
scores <- sapply(res, slot, "ICL")
113-
scores.diff <- diff(scores)
112+
# scores <- sapply(res, slot, "ICL")
113+
# scores.diff <- diff(scores)
114114
# expect_gt(scores.diff[2], 0) #2nd is pos
115115
# expect_true(all(scores.diff[-2] < 0))# the rest are neg
116116
# par(mfrow=c(1,4))

0 commit comments

Comments
 (0)