From 309ea08e7b5cc58a8a7366d557a49e07d75e8d55 Mon Sep 17 00:00:00 2001 From: "Michael J. Crowther" Date: Wed, 12 Jan 2022 20:51:38 +0100 Subject: [PATCH] v4.0.8 built for ssc --- ssc/version_4_0_8/survsim.ado | 434 ++++++++ ssc/version_4_0_8/survsim.sthlp | 75 ++ ssc/version_4_0_8/survsim_details.txt | 12 + ssc/version_4_0_8/survsim_model.ado | 56 + ssc/version_4_0_8/survsim_model.sthlp | 119 +++ ssc/version_4_0_8/survsim_msm.ado | 958 ++++++++++++++++++ ssc/version_4_0_8/survsim_msm.sthlp | 337 ++++++ ssc/version_4_0_8/survsim_msm_mataf.mata | 7 + .../survsim_msm_mataf_total.mata | 11 + ssc/version_4_0_8/survsim_parametric.sthlp | 179 ++++ ssc/version_4_0_8/survsim_user.ado | 453 +++++++++ ssc/version_4_0_8/survsim_user.sthlp | 202 ++++ ssc/version_4_0_8/survsim_user_core.ado | 133 +++ 13 files changed, 2976 insertions(+) create mode 100644 ssc/version_4_0_8/survsim.ado create mode 100644 ssc/version_4_0_8/survsim.sthlp create mode 100644 ssc/version_4_0_8/survsim_details.txt create mode 100644 ssc/version_4_0_8/survsim_model.ado create mode 100644 ssc/version_4_0_8/survsim_model.sthlp create mode 100644 ssc/version_4_0_8/survsim_msm.ado create mode 100644 ssc/version_4_0_8/survsim_msm.sthlp create mode 100644 ssc/version_4_0_8/survsim_msm_mataf.mata create mode 100644 ssc/version_4_0_8/survsim_msm_mataf_total.mata create mode 100644 ssc/version_4_0_8/survsim_parametric.sthlp create mode 100644 ssc/version_4_0_8/survsim_user.ado create mode 100644 ssc/version_4_0_8/survsim_user.sthlp create mode 100644 ssc/version_4_0_8/survsim_user_core.ado diff --git a/ssc/version_4_0_8/survsim.ado b/ssc/version_4_0_8/survsim.ado new file mode 100644 index 0000000..be75c9b --- /dev/null +++ b/ssc/version_4_0_8/survsim.ado @@ -0,0 +1,434 @@ +*! version 4.0.8 14jan2022 MJC + +/* +History +12jan2022 version 4.0.8 - build error; now fixed +02dec2021 version 4.0.7 - bug fix with msm simulation, would occur if directory of installed files had a space in it + - missing .mata files in pkg file +13oct2021 version 4.0.6 - bug fix with varnames in chazard() or logchazard() not parsed correctly with survsim user; now fixed + - error check for negative hazard() added + - Now tidies up after itself if it errors out +02sep2021 version 4.0.5 - bug fix; mixture + tde() would error out, now fixed +24aug2020 version 4.0.4 - re-syncing with predictms for merlin model simulation +- bug fix in missing value warning text; now fixed +25may2020 version 4.0.3 - efficiency of _msm improved + - bug fix: Stata variables in user-defined hazard functions not parsed correctly; now fixed +30mar2020 version 4.0.2 - bug fix; missing value issue when t = 0 in chq function, now fixed + - bug fix; indexing issue occurred in varied circumstances, now fixed +29mar2020 version 4.0.1 - bug fix; error in distribution(gompertz) when specified in a hazard#() -> now fixed + - help file edits +26mar2020 version 4.0.0 - computation time reduced by about 70% for multi-state model simulation + - reset added to hazard#() for semi-Markov models + - syntax changed for user-defined functions, #t now {t} for more robust parsing + - support for general multiple timescales, including: + - {t0} can be used in user() and tdefunction() for time of entry into initial state for associated transition + - cr subclass removed, now part of general msm syntax + - bug fix; variables used directly in user() with a multi-state model were not passed to Mata -> now fixed + - bug fix; if the same variable was used in multiple hazard#()s, parsing would only pick up the first -> now fixed +24mar2020 version 3.1.0 - general Markov multi-state simulation now added + - transmatrix() option added + - tolerance changed to 0 from 1e-08 in mm_root() +21mar2020 version 3.0.0 - complete re-write of core code + - can now simulate from a fitted merlin survival model + - can now simulate from cause-specific hazards competing risks models, with either parametric or user-defined hazard functions, with a new syntax + - distribution() now required for parameteric models + - ltruncated() added, can be number or varname, synced with all except merlin model simulation + - maxtime() can now be a number or a varname + - help files re-written, hugely improved + - added error report on missing values when general algorithm used + - parsing made more robust +18dec2013 version 2.0.3 - tde() with logcumh() or cumh() caused an error. Now fixed. +10oct2013 version 2.0.2 - bug fix -> exact option added to confirm vars +08jul2013 version 2.0.1 - minor bug fix in mixture models +04jan2013 version 2.0.0 - maxtime() added to specify maximum generated survival time and event indicator + - n() removed so must set obs + - loghazard() and hazard() added for user-defined hazard functions, simulated using quadrature and root finding + - default centol() changed to 1E-08 + - varnames can now appear in loghazard()/hazard() allowing time-dependent covariates + - mixture models now use Brent method -> much more reliable than NR, and allows tdes + - cumhazard() and logcumhazard() now added which just use root finding +15Nov2011 version 1.1.2 - Fixed bug when generating covariate tempvars with competing risks. +20sep2011 version 1.1.1 - Exponential distribution added. +10sep2011 version 1.1.0 - Added Gompertz distribution. Time-dependent effects available for all models except mixture. showerror option added. +09sep2011 version 1.0.1 - Time dependent effects now allowed for standard Weibull. +*/ + +program define survsim + version 14.2 + + CheckObs + + local opts1 /// -fitted merlin model- + MODel(passthru) /// + // + + local opts2 /// -user-defined- + LOGHazard(passthru) /// + Hazard(passthru) /// + LOGCHazard(passthru) /// + CHazard(passthru) /// + NODES(passthru) /// -# nodes- + TDEFUNCtion(passthru) /// -function of time to interact with time-dependent effects- + MIXture /// -two-component mixture- + PMix(passthru) /// -mixture parameter- + CR /// -simulate competing risks- + NCR(string) /// -number of competing risks- + Distribution(passthru) /// + Lambdas(passthru) /// + Gammas(passthru) /// + COVariates(passthru) /// -baseline covariates, e.g, (sex 0.5 race -0.4)- + TDE(passthru) /// -time dependent effects- + MARGinal /// + // + + local commonopts /// + MAXTime(string) /// -right censoring time- + LTruncated(string) /// -left truncation/delayed entry- + // + + //==============================================================================================================// + //parse opts that are common to all three settings + + syntax newvarname(min=1 max=3) , [`commonopts' *] + local newvars `varlist' + local stime : word 1 of `newvars' + local died : word 2 of `newvars' + local hasmaxtime = "`maxtime'"!="" + + if `hasmaxtime' { + + if "`died'"=="" { + di as error "2 new variable names required" + exit 198 + } + + cap confirm number `maxtime' + if _rc { + cap confirm numeric variable `maxtime' + if _rc { + di as error "Invalid maxtime()" + exit 198 + } + } + else { + if `maxtime'<0 { + di as error "maxtime() must be >0" + exit 198 + } + } + + tempvar maxtvar + gen `maxtvar' = `maxtime' + local maxtopt maxtime(`maxtvar') + } + + if "`ltruncated'"!="" { + + cap confirm number `ltruncated' + if _rc { + cap confirm numeric variable `ltruncated' + if _rc { + di as error "Invalid ltruncated()" + exit 198 + } + } + else { + if `ltruncated'<0 { + di as error "ltruncated() must be >0" + exit 198 + } + } + + tempvar ltvar + gen `ltvar' = `ltruncated' + local ltopt ltruncated(`ltvar') + } + + local 0 , `options' + + //==============================================================================================================// + //survsim_model + + syntax , [`opts1' *] + + if "`model'"!="" { + capture survsim_model `newvars', `model' /// + `maxtopt' // + local rc = c(rc) + if `rc' { + cap drop _survsim_rc + cap drop `stime' + cap drop `died' + exit `rc' + } + else { + RCREPORT _survsim_rc + MISSREPORT `stime' + GENEVENT `stime' `died' `maxtime' + exit + } + } + + local 0 , `options' + + //==============================================================================================================// + //survsim_user + + syntax , [`opts2' *] + + local useropt "`loghazard'`hazard'`logchazard'`chazard'`mixture'" + + if "`useropt'"!="" { + capture survsim_user `newvars', `useropt' /// + `nodes' /// + `maxtopt' /// + `ltopt' /// + `covariates' /// + `tde' /// + `tdefunction' /// + `distribution' /// -for mixture- + `lambdas' /// + `gammas' /// + `pmix' /// + `marginal' // + local rc = c(rc) + if `rc' { + cap drop _survsim_rc + cap drop `stime' + cap drop `died' + exit `rc' + } + else { + RCREPORT _survsim_rc + MISSREPORT `stime' + GENEVENT `stime' `died' `maxtime' + exit + } + } + + local 0 , `options' + + //==============================================================================================================// + //survsim_msm + + syntax , [HAZARD1(passthru) HAZARD2(passthru) TRANSMATrix(passthru) STARTSTATE(passthru) *] + + if "`hazard1'"!="" { + + capture program drop survsim_msm // refreshes Mata functions + + survsim_msm `newvars', `transmatrix' /// + `hazard1' /// + `hazard2' /// + `maxtopt' /// + `ltopt' /// + `startstate' /// + `nodes' /// + `options' // + exit + + } + + local 0 , `options' `distribution' `lambdas' `gammas' `covariates' `tde' + + //==============================================================================================================// + //parametric distribution + + syntax , Distribution(string) /// + Lambdas(string) /// + [ /// + Gammas(string) /// + COVariates(string) /// + TDE(string) /// + `opts3' /// + ] // + + local ld = length("`distribution'") + if substr("exponential",1,max(1,`ld'))=="`distribution'" { + local dist "exp" + } + else if substr("gompertz",1,max(3,`ld'))=="`distribution'" { + local dist "gompertz" + } + else if substr("weibull",1,max(1,`ld'))=="`distribution'" { + local dist "weibull" + } + else { + di as error "Unknown distribution" + exit 198 + } + + foreach l of numlist `lambdas' `gammas' { + if `l'<0 { + di as error "lambdas()/gammas() must be > 0" + exit 198 + } + } + + if "`dist'"=="exp" & "`gammas'"!="" { + di as error "gammas cannot be specified with distribution(exponential)" + exit 198 + } + + //==============================================================================================================// + //baseline covariates and time-dependent effects + + if "`covariates'"!="" { + + tokenize `covariates' + local ncovlist : word count `covariates' + local ncovvars = `ncovlist'/2 + cap confirm integer number `ncovvars' + if _rc>0 { + di as error "Variable/number missing in covariates" + exit 198 + } + local ind = 1 + local error = 0 + forvalues i=1/`ncovvars' { + cap confirm var ``ind'', exact + if _rc { + local errortxt "invalid covariates(... ``ind'' ``=`ind'+1'' ...)" + local error = 1 + } + cap confirm num ``=`ind'+1'' + if _rc { + local errortxt "invalid covariates(... ``ind'' ``=`ind'+1'' ...)" + local error = 1 + } + tempvar vareffect`i' + gen double `vareffect`i'' = ``ind''*``=`ind'+1'' + + local ind = `ind' + 2 + } + if `error' { + di as error "`errortxt'" + exit 198 + } + local cov_linpred "`vareffect1'" + if `ncovvars'>1 { + forvalues k=2/`ncovvars' { + local cov_linpred "`cov_linpred' + `vareffect`k''" + } + } + local cov_linpred "* exp(`cov_linpred')" + + } + + if "`tde'"!="" { + + tokenize `tde' + local ntde : word count `tde' + local ntdevars = `ntde'/2 + cap confirm integer number `ntdevars' + if _rc>0 { + di as error "Variable/number missing in tde" + exit 198 + } + + local ind = 1 + local error = 0 + forvalues i=1/`ntdevars' { + cap confirm var ``ind'', exact + if _rc { + local errortxt "invalid tde(... ``ind'' ``=`ind'+1'' ...)" + local error = 1 + } + cap confirm num ``=`ind'+1'' + if _rc { + local errortxt "invalid tde(... ``ind'' ``=`ind'+1'' ...)" + local error = 1 + } + tempvar tdeeffect`i' + gen double `tdeeffect`i'' = ``ind''*``=`ind'+1'' + + local ind = `ind' + 2 + } + if `error' { + di as error "`errortxt'" + exit 198 + } + local tde_linpred "`tdeeffect1'" + if `ntdevars'>1 { + forvalues k=2/`ntdevars' { + local tde_linpred "`tde_linpred' + `tdeeffect`k''" + } + } + local tde_linpred "+ `tde_linpred'" + + } + + //==============================================================================================================// + //stime and died + + tempvar u + qui gen double `u' = runiform() + + if "`dist'"=="exp" { + if "`ltruncated'"!="" { + local ltcontrib "* exp(-`lambdas' `cov_linpred' * `ltvar' ^ (1 `tde_linpred') / (1 `tde_linpred'))" + } + qui gen double `stime' = (-ln(`u' `ltcontrib')*(1 `tde_linpred')/(`lambdas' `cov_linpred'))^(1/(1 `tde_linpred')) + } + else if "`dist'"=="weibull" { + if "`ltruncated'"!="" { + local ltcontrib "* exp(-`lambdas' `cov_linpred' * `gammas' * `ltvar' ^ (`gammas' `tde_linpred') / (`gammas' `tde_linpred'))" + } + qui gen double `stime' = (-ln(`u' `ltcontrib')*(`gammas' `tde_linpred')/(`lambdas'*`gammas' `cov_linpred'))^(1/(`gammas' `tde_linpred')) + } + else{ + if "`ltruncated'"!="" { + local ltcontrib "* exp(- `lambdas' `cov_linpred' / (`gammas' `tde_linpred') * (exp(`ltvar' * (`gammas' `tde_linpred') ) - 1))" + } + qui gen double `stime' = (1/(`gammas' `tde_linpred'))*log(1-(((`gammas' `tde_linpred')*log(`u' `ltcontrib'))/(`lambdas' `cov_linpred'))) + } + + MISSREPORT `stime' + if "`maxtime'"!="" { + GENEVENT `stime' `died' `maxtvar' + qui replace `stime' = `maxtvar' if `died'==0 + } + +end + +program CheckObs + if _N==0 { + di as error "No observations" + exit 198 + } +end + +program GENEVENT + args stime died maxtime + qui gen byte `died' = `stime'<`maxtime' if `stime'!=. + qui replace `stime' = `maxtime' if `stime'>`maxtime' & `stime'!=. +end + +program RCREPORT + args rcvar + qui su `rcvar' if `rcvar'==1, meanonly + if r(N)>0 { + di in yellow "Warning: `r(N)' survival times did not converge" + di in yellow " They have been set to the final iteration value" + di in yellow " You can identify them by _survsim_rc = 1" + } + qui su `rcvar' if `rcvar'==2, meanonly + if r(N)>0 { + di in yellow "Warning: `r(N)' survival times were below the lower limit" + di in yellow " You can identify them by _survsim_rc = 2" + } + qui su `rcvar' if `rcvar'==3, meanonly + if r(N)>0 { + di in yellow "Warning: `r(N)' survival times were above the upper limit of maxtime()" + di in yellow " They have been set to maxtime()" + di in yellow " You can identify them by _survsim_rc = 3" + } +end + +program MISSREPORT + args time + qui count if `time'==. + if `r(N)'>0 { + di in yellow "`r(N)' missing values generated in simulated survival times" + } +end diff --git a/ssc/version_4_0_8/survsim.sthlp b/ssc/version_4_0_8/survsim.sthlp new file mode 100644 index 0000000..353966c --- /dev/null +++ b/ssc/version_4_0_8/survsim.sthlp @@ -0,0 +1,75 @@ +{smcl} +{* *! version 1.0.0}{...} +{vieweralsosee "survsim parametric" "help survsim parametric"}{...} +{vieweralsosee "survsim user" "help survsim user"}{...} +{vieweralsosee "survsim model" "help survsim model"}{...} +{vieweralsosee "survsim msm" "help survsim msm"}{...} +{vieweralsosee "merlin" "help merlin"}{...} +{vieweralsosee "galahad" "help galahad"}{...} +{title:Title} + +{p2colset 5 16 16 2}{...} +{p2col :{cmd:survsim} {hline 2}}Simulate survival data from a parametric distribution, +a user-defined distribution, from a fitted {helpb merlin} model, from a cause-specific +hazards competing risks model, or from a general multi-state model{p_end} +{p2colreset}{...} + + +{title:Description} + +{pstd} +{cmd:survsim} simulates survival data from: +{p_end} + +{phang2} +{helpb survsim parametric:help survsim parametric} - a parametric distribution including the exponential, Gompertz and Weibull, +and 2-component mixtures of them. Baseline covariates can be included, with specified associated log hazard ratios. +Non-proportional hazards can also be included with all models; under an exponential or Weibull model covariates are interacted +with log time, under a Gompertz model covariates are interacted with time. See {helpb survsim##paper1:Crowther and Lambert (2012)} +for more details. +{p_end} + +{phang2} +{helpb survsim user:help survsim user} - a user-defined distribution. Survival times can be simulated from bespoke, +user-defined [log] [cumulative] hazard functions. The function must be specified in Mata code (using colon operators), +with survival times generated using a combination of numerical integration and root finding techniques. Time-dependent +effects can also be specified with a user-defined function of time. See {helpb survsim##paper2:Crowther and Lambert (2013)} +for more details. +{p_end} + +{phang2} +{helpb survsim model:help survsim model} - a fitted {helpb merlin} model. {helpb merlin} fits a broad class of survival models, +including standard parametric models, spline-based survival models, and user-defined survival models. +{p_end} + +{phang2} +{helpb survsim msm:help survsim msm} - a competing risks or general multi-state model. Event times can be simulated from +transition-specific hazards, where each transition hazard function can be a standard parametric distribution, or a +user-defined complex hazard function. Covariates and time-dependent effects can be specified for each transition-specific +hazard independently. +{p_end} + + +{title:Author} + +{pstd}{cmd:Michael J. Crowther}{p_end} +{pstd}Red Door Analytics{p_end} +{pstd}E-mail: {browse "mailto:michael@reddooranalytics.se":michael@reddooranalytics.se}{p_end} + +{phang}Please report any errors you may find.{p_end} + + +{title:References} + +{phang}Bender R, Augustin T and Blettner M. Generating survival times to simulate Cox proportional hazards models. {it:Statistics in Medicine} 2005;24:1713-1723.{p_end} + +{phang}Beyersmann J, Latouche A, Buchholz A and Schumacher M. Simulating competing risks data in survival analysis. {it:Statistics in Medicine} 2009;28:956-971.{p_end} + +{marker paper1}{...} +{phang}Crowther MJ and Lambert PC. {browse "http://www.stata-journal.com/article.html?article=st0275":Simulating complex survival data.}{it: The Stata Journal} 2012;12(4):674-687.{p_end} + +{marker paper2}{...} +{phang}Crowther MJ and Lambert PC. {browse "http://onlinelibrary.wiley.com/doi/10.1002/sim.5823/abstract":Simulating biologically plausible complex survival data.} {it:Statistics in Medicine} 2013;32(23):4118-4134.{p_end} + +{phang}Jann, B. 2005. moremata: Stata module (Mata) to provide various functions. Available from http://ideas.repec.org/c/boc/bocode/s455001.html.{p_end} + diff --git a/ssc/version_4_0_8/survsim_details.txt b/ssc/version_4_0_8/survsim_details.txt new file mode 100644 index 0000000..63512ee --- /dev/null +++ b/ssc/version_4_0_8/survsim_details.txt @@ -0,0 +1,12 @@ +TITLE: `SURVSIM' : module to simulate survival/time-to-event data + +DESCRIPTION/AUTHOR: survsim simulates survival data from a parametric distribution, +a user-defined distribution, a cause-specific hazards competing risks model, a general +multi-state model, or from an estimated -merlin- model. Baseline covariates and +time-dependent effects can be specified when defining a data-generating model. +Delayed entry/left truncation is allowed. + +Author: Michael J. Crowther, Red Door Analytics +Support: email michael@reddooranalytics.se + +Requires: Stata version 14.2 diff --git a/ssc/version_4_0_8/survsim_model.ado b/ssc/version_4_0_8/survsim_model.ado new file mode 100644 index 0000000..081367e --- /dev/null +++ b/ssc/version_4_0_8/survsim_model.ado @@ -0,0 +1,56 @@ + +program define survsim_model + version 14.2 + + syntax newvarname(min=1 max=2), /// + /// + Model(string) /// + MAXTime(string) /// -Maximum simulated time- + // + + local nvars : word count `varlist' + local stime : word 1 of `varlist' + local event : word 2 of `varlist' + + cap which merlin + if _rc { + display in yellow "You need to install the merlin package. This can be installed using," + display in yellow ". {stata ssc install merlin}" + exit 198 + } + cap which predictms + if _rc { + display in yellow "You need to install the multistate package. This can be installed using," + display in yellow ". {stata ssc install multistate}" + exit 198 + } + + //====================================================================================================================// + + quietly { + + qui gen double `stime' = . + cap drop _survsim_rc + qui gen _survsim_rc = 0 + tempvar modtouse + gen byte `modtouse' = 1 + local N = _N + + //make sure right censoring handled + //-> gets replaced if less than row specific maxtime() afterwards + tempvar tvar + su `maxtime' + gen `tvar' = `r(max)' in 1 + + predictms , models(`model') /// + singleevent /// + survsim(`stime') /// + survsimtouse(`modtouse') /// + n(`N') /// + timevar(`tvar') /// + probability /// + simulate // + + } + +end diff --git a/ssc/version_4_0_8/survsim_model.sthlp b/ssc/version_4_0_8/survsim_model.sthlp new file mode 100644 index 0000000..6004f12 --- /dev/null +++ b/ssc/version_4_0_8/survsim_model.sthlp @@ -0,0 +1,119 @@ +{smcl} +{* *! version 1.0.0}{...} +{vieweralsosee "survsim" "help survsim"}{...} +{vieweralsosee "survsim parametric" "help survsim parametric"}{...} +{vieweralsosee "survsim user" "help survsim user"}{...} +{vieweralsosee "survsim msm" "help survsim msm"}{...} +{vieweralsosee "merlin" "help merlin"}{...} +{vieweralsosee "galahad" "help galahad"}{...} +{viewerjumpto "Syntax" "survsim model##syntax"}{...} +{viewerjumpto "Description" "survsim model##description"}{...} +{viewerjumpto "Options" "survsim model##options"}{...} +{viewerjumpto "Examples" "survsim model##examples"}{...} +{title:Title} + +{p2colset 5 16 16 2}{...} +{p2col :{cmd:survsim} {hline 2}}Simulate survival data from a parametric distribution, +a user-defined distribution, from a fitted {helpb merlin} model, from a cause-specific +hazards competing risks model, or from a general multi-state model{p_end} +{p2colreset}{...} + + +{marker syntax}{...} +{title:Syntax} + +{phang} +Syntax for simulating survival times from a fitted {helpb merlin} survival model: + +{phang2} +{cmd: survsim} {it:newvarname1} {it:newvarname2} {cmd:,} {opt mod:el(name)} {opt maxt:ime(#|varname)} + + +{synoptset 36 tabbed}{...} +{synopthdr:Options} +{synoptline} +{synopt:{opt mod:el(name)}}the {helpb estimates store} {it:name} of the fitted {helpb merlin} model; see details{p_end} +{synopt:{opt maxt:ime(#|varname)}}right censoring time(s); either a common number or a {varname}{p_end} +{synoptline} +{p2colreset}{...} + + +{marker description}{...} +{title:Description} + +{pstd} +{helpb survsim} simulates survival data from a parametric distribution, a user-defined distribution, from a fitted +{helpb merlin} model, from a cause-specific hazards competing risks model, or from a Markov multi-state model. +This help file centres on simulating from a fitted {helpb merlin} model. +{p_end} +{pstd} +Survival times can be simulated from the estimated model, using the variables in your current dataset. The variables can simply +be altered, to provide a flexible framework to simulate using the estimated parameter vector. +{p_end} + +{pstd} +{it:newvarname1} specifies the new variable name to contain the generated survival times. {it:newvarname2} specifies the +new variable name to contain the generated event indicator. +{p_end} + + +{marker options}{...} +{title:Options} + +{phang}{opt model(name)} specifies the name of the {helpb estimates store} object containing the estimates of the model +fitted. The survival must be estimated using the {helpb merlin} command. For example,{p_end} + +{phang2}{cmd:. merlin (_t trt , family(weibull, failure(_d)))}{p_end} +{phang2}{cmd:. estimates store m1}{p_end} +{phang2}{cmd:. survsim stime died, model(m1) maxtime(10)}{p_end} + +{phang2}{cmd:survsim} will simulate from the fitted model, using covariate values that are in your current dataset.{p_end} + +{phang}{opt maxtime(#|varname)} specifies the right censoring time(s). Either a common maximum follow-up time {cmd:#} can be +specified for all observations, or observation specific censoring times can be specified by using a {varname}. {p_end} + + +{title:Remarks} + +{pstd}Always {helpb set seed}, to ensure reproducibility.{p_end} + + +{marker examples}{...} +{title:Examples} + +{phang}Simulate from a fitted Weibull survival model:{p_end} +{phang2}{cmd:. webuse brcancer}{p_end} +{phang2}{cmd:. stset rectime, failure(censrec) scale(365)}{p_end} +{phang2}{cmd:. merlin (_t hormon , family(weibull, failure(_d)))}{p_end} +{phang2}{cmd:. estimates store m1}{p_end} +{phang2}{cmd:. survsim stime died, model(m1) maxtime(10)}{p_end} + +{phang}Simulate from a fitted Royston-Parmar spline-based survival model:{p_end} +{phang2}{cmd:. webuse brcancer}{p_end} +{phang2}{cmd:. stset rectime, failure(censrec) scale(365)}{p_end} +{phang2}{cmd:. merlin (_t hormon , family(rp, failure(_d) df(3)))}{p_end} +{phang2}{cmd:. estimates store m2}{p_end} +{phang2}{cmd:. survsim stime died, model(m2) maxtime(10)}{p_end} + + +{title:Author} + +{pstd}{cmd:Michael J. Crowther}{p_end} +{pstd}Red Door Analytics{p_end} +{pstd}E-mail: {browse "mailto:michael@reddooranalytics.se":michael@reddooranalytics.se}{p_end} + +{phang}Please report any errors you may find.{p_end} + + +{title:References} + +{phang}Bender R, Augustin T and Blettner M. Generating survival times to simulate Cox proportional hazards models. {it:Statistics in Medicine} 2005;24:1713-1723.{p_end} + +{marker paper1}{...} +{phang}Crowther MJ and Lambert PC. {browse "http://www.stata-journal.com/article.html?article=st0275":Simulating complex survival data.}{it: The Stata Journal} 2012;12(4):674-687.{p_end} + +{marker paper2}{...} +{phang}Crowther MJ and Lambert PC. {browse "http://onlinelibrary.wiley.com/doi/10.1002/sim.5823/abstract":Simulating biologically plausible complex survival data.} {it:Statistics in Medicine} 2013;32(23):4118-4134.{p_end} + +{phang}Jann, B. 2005. moremata: Stata module (Mata) to provide various functions. Available from http://ideas.repec.org/c/boc/bocode/s455001.html.{p_end} + diff --git a/ssc/version_4_0_8/survsim_msm.ado b/ssc/version_4_0_8/survsim_msm.ado new file mode 100644 index 0000000..a286920 --- /dev/null +++ b/ssc/version_4_0_8/survsim_msm.ado @@ -0,0 +1,958 @@ + +program define survsim_msm + version 14.2 + syntax newvarname(min=3 max=3), /// + HAZARD1(string) /// + MAXTime(string) /// + /// + [ /// + TRANSMATrix(name) /// + LTruncated(string) /// + STARTSTATE(string) /// + NODES(int 30) /// + /// + * /// hazard#() + ] // + + local stime : word 1 of `varlist' + local state : word 2 of `varlist' + local event : word 3 of `varlist' + + cap which lmoremata.mlib + if _rc { + display in yellow "You need to install the moremata package. This can be installed using," + display in yellow ". {stata ssc install moremata}" + exit 198 + } + + //====================================================================================================================// + //parse hazard#()s + + local opts `options' + local Nhazards = 2 + while "`opts'"!="" & `Nhazards'<51 { + local 0 , `opts' + syntax , [HAZARD`Nhazards'(string) * ] + local opts `options' + local Nhazards = `Nhazards' + 1 + } + if `Nhazards'==51 { + di as error "hazard#() limit reached, or unknown options found" + exit 198 + } + local Nhazards = `Nhazards' - 1 + if `Nhazards'==1 { + di as error "hazard2() required" + exit 198 + } + + forvalues i=1/`Nhazards' { + + local 0 , `hazard`i'' + syntax , /// + [ /// + User(string) /// + Distribution(string) /// + Lambda(numlist max=1 >0) /// + Gamma(numlist max=1 >0) /// + /// + COVariates(string) /// + TDE(string) /// + TDEFUNCtion(string) /// + /// + RESET /// + /// + ] // + + if "`user'"!="" & "`distribution'"!="" { + di as error "user() and distribution() cannot both be specified in hazard`i'()" + exit 198 + } + else if "`user'"=="" & "`distribution'"=="" { + di as error "One of user() or distribution() must be specified in hazard`i'()" + exit 198 + } + + local user`i' `user' + + if "`distribution'"!="" { + local ld = length("`distribution'") + if substr("exponential",1,max(1,`ld'))=="`distribution'" { + local dist`i' "exp" + } + else if substr("gompertz",1,max(3,`ld'))=="`distribution'" { + local dist`i' "gompertz" + if "`gamma'"=="" { + di as error "gamma() required" + exit 198 + } + } + else if substr("weibull",1,max(1,`ld'))=="`distribution'" { + local dist`i' "weibull" + if "`gamma'"=="" { + di as error "gamma() required" + exit 198 + } + } + else { + di as error "Unknown distribution" + exit 198 + } + local l`i' `lambda' + local g`i' `gamma' + } + local cov`i' `covariates' + local tde`i' `tde' + local tdefunction`i' `tdefunction' + local reset`i' = "`reset'"!="" + local resetind `resetind' `reset`i'' + + } + + //====================================================================================================================// + //transition matrix + + if "`transmatrix'"!="" { + cap confirm matrix `transmatrix' + if _rc>0 { + di as error "transmatrix(`transmatrix') not found" + exit 198 + } + mata: check_transmatrix() + //leaves behind `Ntrans' + local Nstates = rowsof(`transmatrix') + + if `Ntrans'!=`Nhazards' { + di as error "Number of hazard#() specifications does not match number of transitions in transmatrix()" + exit 198 + } + + //get start and stop states for each transition + local ind = 1 + forvalues i=1/`Nstates' { + forvalues j=1/`Nstates' { + if `transmatrix'[`i',`j']!=. { + local startstate`ind' = `i' + local ind = `ind' + 1 + } + } + } + } + else { + + //competing risks + local Nstates = `Nhazards' + 1 + local Ntrans = `Nhazards' + tempname transmatrix + mat `transmatrix' = J(`Nstates',`Nstates',.) + forvalues i=2/`Nstates' { + local transind = `i' - 1 + mat `transmatrix'[1,`i'] = `transind' + local startstate`transind' = 1 + } + + } + + //====================================================================================================================// + //starting state + + if "`startstate'"!="" { + + cap confirm integer number `startstate' + if _rc { + cap confirm numeric variable `startstate' + if _rc { + di as error "Invalid startstate()" + exit 198 + } + } + else { + if `startstate'<1 { + di as error "startstate() must be >0" + exit 198 + } + } + + tempvar ssvar + gen `ssvar' = `startstate' + + } + + //====================================================================================================================// + //baseline covariates + + forvalues z = 1/`Nhazards' { + + tempvar expxb`z' + if "`cov`z''"!="" { + + tokenize `cov`z'' + local ncovlist : word count `cov`z'' + local ncovvars = `ncovlist'/2 + cap confirm integer number `ncovvars' + if _rc>0 { + di as error "Variable/number missing in covariates" + exit 198 + } + local ind = 1 + local error = 0 + forvalues i=1/`ncovvars' { + cap confirm numeric var ``ind'', exact + if _rc { + local errortxt "invalid covariates(... ``ind'' ``=`ind'+1'' ...) in hazard`z'()" + local error = 1 + } + cap confirm num ``=`ind'+1'' + if _rc { + local errortxt "invalid covariates(... ``ind'' ``=`ind'+1'' ...) in hazard`z'()" + local error = 1 + } + tempvar vareffect`z'`i' + qui gen double `vareffect`z'`i'' = ``ind''*``=`ind'+1'' + local ind = `ind' + 2 + } + if `error' { + di as error "`errortxt'" + exit 198 + } + local cov_linpred "`vareffect`z'1'" + if `ncovvars'>1 { + forvalues k=2/`ncovvars' { + local cov_linpred "`cov_linpred' + `vareffect`z'`k''" + } + } + + qui gen double `expxb`z'' = exp(`cov_linpred') + } + else qui gen byte `expxb`z'' = 1 + + local expxb `expxb' `expxb`z'' + } + + //====================================================================================================================// + // Time-dependent effects + + forvalues z = 1/`Nhazards' { + + tempvar tdexb`z' + if "`tde`z''"!="" { + tokenize `tde`z'' + local ntde : word count `tde`z'' + local ntdevars = `ntde'/2 + cap confirm integer number `ntdevars' + if _rc>0 { + di as error "Variable/number missing in tde" + exit 198 + } + + local ind = 1 + local error = 0 + forvalues i=1/`ntdevars' { + cap confirm var ``ind'', exact + if _rc { + local errortxt "invalid tde(... ``ind'' ``=`ind'+1'' ...) in hazard`z'()" + local error = 1 + } + cap confirm num ``=`ind'+1'' + if _rc { + local errortxt "invalid tde(... ``ind'' ``=`ind'+1'' ...) in hazard`z'()" + local error = 1 + } + tempvar tdeeffect`z'`i' + qui gen double `tdeeffect`z'`i'' = ``ind''*``=`ind'+1'' + + local ind = `ind' + 2 + } + if `error' { + di as error "`errortxt'" + exit 198 + } + local tde_linpred "`tdeeffect`z'1'" + if `ntdevars'>1 { + forvalues k=2/`ntdevars' { + local tde_linpred "`tde_linpred' + `tdeeffect`z'`k''" + } + } + + qui gen double `tdexb`z'' = `tde_linpred' + } + else qui gen double `tdexb`z'' = 0 + + local tdexb `tdexb' `tdexb`z'' + } + + //====================================================================================================================// + //core + + //first pass to turn distributions into user functions + forvalues z = 1/`Nhazards' { + + if "`dist`z''"!="" { + if "`dist`z''"=="exp" { + local user`z' "`l`z''" + } + else if "`dist`z''"=="weibull" { + local user`z' "`l`z'' :* `g`z'' :* {t} :^ (`g`z'' :- 1)" + } + else { + local user`z' "`l`z'' :* exp(`g`z'' :* {t})" + } + } + + } + + //mata-fy the user functions + local Nhvars = 0 + + forvalues i = 1/`Nhazards' { + + local hazard `user`i'' + + //#t0s + mata: st_local("matahazard`i'",subinstr("`hazard'","{t0}","time0[,`startstate`i'']")) + forvalues j=1/`Nhazards' { + mata: st_local("matahazard`i'",subinstr("`matahazard`i''","{t0`j'}","time0[,`startstate`j'']")) + } + + //#t + if `reset`i'' { + mata: st_local("matahazard`i'",subinstr("`matahazard`i''","{t}","(tnodes:-lt)")) + } + else { + mata: st_local("matahazard`i'",subinstr("`matahazard`i''","{t}","tnodes")) + } + + local matahazard`i' "(`matahazard`i'') :* expxb[,`i']" + + if "`tde`i''"!="" { + if "`tdefunction`i''"=="" { + if `reset`i'' { + local tdefunction tnodes :- lt + } + else { + local tdefunction tnodes + } + } + else { + //{t0}s + mata: st_local("tdefunction`i'",subinstr("`tdefunction`i''","{t0}","time0[,`startstate`i'']")) + forvalues j=1/`Nhazards' { + mata: st_local("tdefunction`i'",subinstr("`tdefunction`i''","{t0`j'}","time0[,`startstate`j'']")) + } + //{t} + if `reset`i'' { + mata: st_local("tdefunction",subinstr("`tdefunction`i''","{t}","tnodes:-lt")) + } + else { + mata: st_local("tdefunction",subinstr("`tdefunction`i''","{t}","tnodes")) + } + } + + local matahazard`i' "`matahazard`i'' :* exp(tdexb[,`i'] :* (`tdefunction'))" + } + + //push any variables found in the function into Mata + if "`dist`i''"=="" { + gettoken first rest : matahazard`i', parse("[ ,\^\*\(\)-\+/:<>=]") + while "`rest'"!="" { + if trim("`first'")!="," { + cap confirm var `first', exact + if !_rc { + local test1 = 0 + local hvarindex = 0 + foreach var in `covlist' { + local hvarindex = `hvarindex' + 1 + if "`first'"=="`var'" { + local test1 = 1 + local refhvarindex = `hvarindex' + } + } + if `test1'==0 { + local covlist `covlist' `first' //contains a list of all varnames specified, be they time-indep or time-dependent + local Nhvars = `Nhvars' + 1 + mata: st_local("matahazard`i'",subinstr("`matahazard`i''","`first'","hvars[,`Nhvars']")) + } + else { + mata: st_local("matahazard`i'",subinstr("`matahazard`i''","`first'","hvars[,`refhvarindex']")) + } + } + } + gettoken first rest : rest, parse("[ ,\^\*\(\)-\+/:<>=]") + } + } + + } + + //now get total hazards for each starting state + + forvalues i=1/`Nstates' { + + local totalhazard`i' + + forvalues j=1/`Nstates' { + + local trans = `transmatrix'[`i',`j'] + if `trans'!=. { + if "`totalhazard`i''"=="" { + local totalhazard`i' `matahazard`trans'' + } + else { + local totalhazard`i' `totalhazard`i'' :+ `matahazard`trans'' + } + } + + } +// n di "`totalhazard`i''" + } + + //====================================================================================================================// + //define Mata functions & pointers + + quietly { + + //transition-specific hazards + mata: Phfs = J(`Nhazards',1,NULL) + findfile survsim_msm_mataf.mata + local smfile `r(fn)' + forvalues i=1/`Nhazards' { + global i `i' + global matahazard`i' `matahazard`i'' + do "`smfile'" + //mata: function hf`i'(tnodes,expxb,tdexb,hvars,lt,time0) return(`matahazard`i'') + mata: Phfs[`i'] = &hf`i'() + macro drop i matahazard`i' + } + + //total hazard from each state + mata: Ptotalhfs = J(`Nstates',1,NULL) + findfile survsim_msm_mataf_total.mata + local smfile `r(fn)' + forvalues i=1/`Nstates' { + if "`totalhazard`i''"!="" { + global i `i' + global totalhazard`i' `totalhazard`i'' + do "`smfile'" + //mata: function totalhf`i'(tnodes,expxb,tdexb,hvars,lt,time0) return(`totalhazard`i'') + mata: Ptotalhfs[`i'] = &totalhf`i'() + macro drop i totalhazard`i' + } + } + + } + + gaussquad_ss, n(`nodes') //Gauss-Legendre nodes and weights + + mata: survsim_msm(Phfs,Ptotalhfs) + + //done + mata mata drop Phfs Ptotalhfs + forvalues i=1/`Nhazards' { + mata mata drop hf`i'() + } + forvalues i=1/`Nstates' { + if "`totalhazard`i''"!="" { + mata mata drop totalhf`i'() + } + } + +end + +program define gaussquad_ss, rclass + syntax [, N(int 30)] + tempname weights nodes + mata ss_gq("`weights'","`nodes'") + return matrix weights = `weights' + return matrix nodes = `nodes' +end + +local RS real scalar +local RC real colvector +local RM real matrix +local SS string scalar +local PC pointer colvector +local PS pointer scalar +local opts "o1, o2, o3, o4, o5, o6, o7, o8, o9, o10" + +mata: + +void survsim_msm(`PC' Phfs, `PC' Ptotalhfs) +{ + N = st_nobs() + tmat = st_matrix(st_local("transmatrix")) + Nstates = rows(tmat) + Nnextstates = rownonmissing(tmat) + reset = strtoreal(tokens(st_local("resetind"))) + + st_view(maxt = .,.,st_local("maxtime")) + if (st_local("ltruncated")!="") { + st_view(lt=.,.,st_local("ltruncated")) + } + else lt = J(N,1,0) + + if (st_local("startstate")!="") { + st_view(ss=.,.,st_local("ssvar")) + } + else ss = J(N,1,1) + + st_view(expxb = .,.,st_local("expxb")) + st_view(tdexb = .,.,st_local("tdexb")) + + Nhvars = strtoreal(st_local("Nhvars")) + if (Nhvars) st_view(hvars=.,.,tokens(st_local("covlist"))) + else hvars = J(N,1,.) + + //time of entry to each state: obs x state + //-> gets updated so can be used in user() functions + time0 = J(N,Nstates,.) + for (i=1;i<=Nstates;i++) { + sindex = selectindex(ss:==i) + Ns = rows(sindex) + if (Ns) time0[sindex,i] = lt[sindex] + } + + //newvarname stubs + stime = st_local("stime") + state = st_local("state") + event = st_local("event") + + nodes = st_matrix("r(nodes)")' + weights = st_matrix("r(weights)") + + done = J(N,1,0) + + states = J(1,2,ss) + times = J(1,2,lt) + coreindex = 1::N + tol = 0 + maxit = 1000 + + //initial variables + varindex = 0 + id1 = st_addvar("double", stime+strofreal(varindex)) + id2 = st_addvar("int", state+strofreal(varindex)) + st_store(.,id1,.,times[,1]) + st_store(.,id2,.,states[,1]) + + //handle time = 0 + anyt0 = selectindex(times[,1]:==0) + Nanyt0 = rows(anyt0)\cols(anyt0) + if (Nanyt0[1] & Nanyt0[2]) times[anyt0,2] = J(Nanyt0[1],1,smallestdouble()) + + while (sum(done)1) { + pmatrix = J(Nsim[1],Nnextstates[i],.) + for (s=1;s<=Nnextstates[i];s++) { + pmatrix[,s] = (*Phfs[tmat[i,stateid[s]]])(times[index,2],expxb[index,],tdexb[index,],hvars[index,],times[index,1],time0[index,]) + } + pmatrix = pmatrix :/ quadrowsum(pmatrix) + newstates = J(Nsim[1],1,.) + for (s=1;s<=Nsim[1];s++) { + newstates[s] = rdiscrete(1,1,pmatrix[s,]) + } + states[index,2] = stateid[newstates] + } + else states[index,2] = J(Nsim[1],1,stateid) + + } + + } + + // update done for those now in an absorbing state + // and new times of entry into each state + notdoneindex = selectindex(done:==0) + Nnotdone = rows(notdoneindex)\cols(notdoneindex) + + if (Nnotdone[1] & Nnotdone[2]) { + for (j=1;j<=Nstates;j++) { + //entry times + toupdate = select(notdoneindex,states[notdoneindex,2]:==j) + Ntoupdate = rows(toupdate)\cols(toupdate) + if (Ntoupdate[1] & Ntoupdate[2]) { + time0[toupdate,j] = times[toupdate,2] + //done index + if (!Nnextstates[j]) done[toupdate] = J(Ntoupdate[1],1,1) + } + + } + } + + //post new variables + varindex++ + id1 = st_addvar("double", stime+strofreal(varindex)) + id2 = st_addvar("int", state+strofreal(varindex)) + id3 = st_addvar("byte", event+strofreal(varindex)) + + st_store(.,id1,.,times[,2]) + st_store(.,id2,.,states[,2]) + st_store(.,id3,.,events) + + //now update done to missing, so not carried forward + doneindex = selectindex(done:==1) + Ndone = rows(doneindex)\cols(doneindex) + if (Ndone[1] & Ndone[2]) { + times[doneindex,2] = states[doneindex,2] = J(Ndone[1],1,.) + } + + } + + printf("variables "+stime+"0 to "+stime+strofreal(varindex)+" created\n" ) + printf("variables "+state+"0 to "+state+strofreal(varindex)+" created\n" ) + printf("variables "+event+"1 to "+event+strofreal(varindex)+" created\n" ) + +} + +function survsim_msm_sim( `RC' t, `RC' logu, `RM' nodes, `RM' weights, `PS' hfunc, /// + `RC' lt, `RM' time0, /// + `RM' expxb, `RM' tdexb, `RM' hvars) +{ + tnodes = (t :- lt) :* 0.5 :* J(rows(t),1,nodes) :+ (t :+ lt) :/ 2 + chq = (*hfunc)(tnodes,expxb,tdexb,hvars,lt,time0) + _editmissing(chq,0) + return((t :- lt) :/2 :* chq * weights :+ logu) +} + +void ss_gq(`SS' weightsname, `SS' nodesname) +{ + n = strtoreal(st_local("n")) + i = range(1,n,1)' + i1 = range(1,n-1,1)' + + muzero = 2 + a = J(1,n,0) + b = i1:/sqrt(4 :* i1:^2 :- 1) + + A= diag(a) + for(j=1;j<=n-1;j++){ + A[j,j+1] = b[j] + A[j+1,j] = b[j] + } + symeigensystem(A,vec,nodes) + weights = (vec[1,]:^2:*muzero)' + weights = weights[order(nodes',1)] + nodes = nodes'[order(nodes',1)'] + st_matrix(weightsname,weights) + st_matrix(nodesname,nodes) +} + +void check_transmatrix() +{ + tmat = st_matrix(st_local("transmatrix")) + tmat_ind = tmat:!=. //indicator matrix + + //Error checks + if (max(diagonal(tmat_ind))>0) { + errprintf("All elements on the diagonal of transmatrix() must be coded missing = .\n") + exit(198) + } + row = 1 + rtmat = rows(tmat) + trans = 1 + while (row1 & tmat[row,i]!=.) { + errprintf("Elements of transmatrix() are not unique\n") + exit(198) + } + if (tmat[row,i]!=. & tmat[row,i]!=trans){ + errprintf("Elements of transmatrix() must be sequentially numbered from 1,...,K, where K = number of transitions\n") + exit(198) + } + if (tmat[row,i]!=.) trans++ + } + row++ + } + st_local("Ntrans",strofreal(trans-1)) +} + +`RC' survsim_mm_root( transmorphic x, /// bj: will be replaced by solution + pointer(real matrix function) scalar f, /// Address of the function whose zero will be sought for + `RC' ax, /// Root will be sought for within a range [ax,bx] + `RC' bx, /// + real scalar tol, /// Acceptable tolerance for the root value (default 0) + real scalar maxit, /// maximum # of iterations (default: 1000) + `RC' index,| /// + `opts') // additional args to pass on to f +{ + transmorphic fs // setup for f + `RC' a, b, c // Abscissae, descr. see above + `RC' fa, fb, fc // f(a), f(b), f(c) + real scalar prev_step // Distance from the last but one + real scalar tol_act // Actual tolerance + real scalar p // Interpolation step is calcu- + real scalar q // lated in the form p/q; divi- + // sion operations is delayed + // until the last moment + real scalar new_step // Step at this iteration + real scalar t1, cb, t2 + real scalar itr + + `RS' nobs + + fs = mm_callf_setup(f, args()-7, `opts') // bj: prepare function call + + //x = . // bj: initialize output + + //copy everything that gets indexed and subsequently updated + tempo1 = o1 + tempo5 = o5 + tempo6 = o6 + tempo7 = o7 + tempo8 = o8 + tempo9 = o9 + nobs = rows(x) + + result = J(nobs,1,.) + x = J(nobs,1,.) + + a = ax + b = bx + + fa = mm_callf(fs, a); fb = mm_callf(fs, b) + c = a; fc = fa + + //if ( fa==. ) return(0) // bj: abort if fa missing + tempindex = selectindex(fa:==.) + nti = rows(tempindex) + if (nti & cols(tempindex)) result[tempindex,] = J(nti,1,0) + + //remove tempindex as they are done + index = select(index,fa:!=.) + + if (rows(index)) { //not done + + tempindex = select(index,((fa[index]:>0) :* (fb[index]:>0))) + if (cols(tempindex)) { + + flag1 = abs(fa[tempindex]) :< abs(fb[tempindex]) + flag2 = 1:-flag1 + tempindex2 = select(tempindex,flag1) + nti = rows(tempindex2) + if (nti) { + result[tempindex2] = J(nti,1,2) + x[tempindex2] = a[tempindex2] + } + tempindex2 = select(tempindex,flag2) + nti = rows(tempindex2) + if (nti) { + result[tempindex2] = J(nti,1,3) + x[tempindex2] = b[tempindex2] + } + //update index + index = select(index,x[index]:==.) + if (rows(index)==0) return(result) + } + + tempindex = select(index,((fa[index]:<0) :* (fb[index]:<0))) + if (cols(tempindex)) { + + flag1 = abs(fa[tempindex]) :< abs(fb[tempindex]) + flag2 = 1:-flag1 + tempindex2 = select(tempindex,flag1) + nti = rows(tempindex2) + if (nti) { + result[tempindex2] = J(nti,1,2) + x[tempindex2] = a[tempindex2] + } + + tempindex2 = select(tempindex,flag2) + nti = rows(tempindex2) + if (nti) { + result[tempindex2] = J(nti,1,3) + x[tempindex2] = b[tempindex2] + } + //update index + index = select(index,x[index]:==.) + if (rows(index)==0) return(result) + } + } + else return(result) + + for (itr=1; itr<=maxit; itr++) { + + tempindex = index[selectindex(fb[index]:==.)] + + if (cols(tempindex)) result[tempindex] = J(rows(tempindex),1,0) + + //remove tempindex as they are done + index = select(index,fb[index]:!=.) + + if (!cols(index)) return(result) + + tempindex = select(index,abs(fc[index]) :< abs(fb[index])) + + if (cols(tempindex)) { + a[tempindex] = b[tempindex]; b[tempindex] = c[tempindex]; c[tempindex] = a[tempindex]; // best approximation + fa[tempindex] = fb[tempindex]; fb[tempindex] = fc[tempindex]; fc[tempindex] = fa[tempindex] + } + + tol_act = 2 :* survsim_epsilon_vec(b[index]) :+ tol:/2 + new_step = (c[index]:-b[index]):/2 + + flag1 = (abs(new_step):<=tol_act) :+ (fb[index]:==0) + flag2 = (flag1:==0) + tempindex = select(index,flag1) + + if (cols(tempindex)) { + x[tempindex] = b[tempindex] + result[tempindex] = J(rows(tempindex),1,0) + } + + index = select(index,flag2) + if (!cols(index) | !rows(index)) return(result) + + //update stuff + tol_act = select(tol_act,flag2) + new_step = select(new_step,flag2) + + // Decide if the interpolation can be tried + prev_step = b[index]:-a[index] + + tempindex11 = (abs(prev_step) :>= tol_act) :* (abs(fa[index]) :> abs(fb[index])) + tempindex = select(index,tempindex11) + + if (cols(tempindex)) { + + cb = c[tempindex] :- b[tempindex] + + p = q = cb:*0 //fix + + flag1 = a[tempindex] :== c[tempindex] + flag2 = 1:-flag1 + tempindex2 = select(tempindex,flag1) + if (cols(tempindex2)) { + t1 = fb[tempindex2]:/fa[tempindex2] + p[selectindex(flag1)] = select(cb,flag1) :* t1 + q[selectindex(flag1)] = 1:- t1 + } + tempindex2 = select(tempindex,flag2) + if (cols(tempindex2)) { + q[selectindex(flag2)] = fa[tempindex2]:/fc[tempindex2]; t1 = fb[tempindex2]:/fc[tempindex2]; t2 = fb[tempindex2]:/fa[tempindex2] + p[selectindex(flag2)] = t2 :* ( select(cb,flag2) :* q[selectindex(flag2)] :* (q[selectindex(flag2)] :- t1) :- (b[tempindex2]:-a[tempindex2]):*(t1:-1) ) + q[selectindex(flag2)] = (q[selectindex(flag2)]:-1) :* (t1:-1) :* (t2:-1) + } + flag1 = p:>0 + flag2 = 1:-flag1 + tempindex = selectindex(flag1) + if (cols(tempindex)) q[tempindex] = -q[tempindex] + tempindex = selectindex(flag2) + if (cols(tempindex)) p[tempindex] = -p[tempindex] + + tempindex = (p :< (0.75:*cb:*q:-abs(select(tol_act,tempindex11):*q):/2)) :* (p :< abs(select(prev_step,tempindex11):*q:/2)) + if (cols(tempindex)) { + //update tempindex11 + tempindex22 = select(selectindex(tempindex11),tempindex) + if (cols(tempindex22) & rows(tempindex22)) new_step[tempindex22] = p[selectindex(tempindex)]:/q[selectindex(tempindex)] + } + + } + + tempindex = selectindex(abs(new_step) :< tol_act) + if (rows(tempindex)) { + flag1 = new_step[tempindex] :> 0 + flag2 = 1:-flag1 + tempindex2 = select(tempindex,flag1) + if (rows(tempindex2)) new_step[tempindex2] = tol_act[tempindex2] + tempindex2 = select(tempindex,flag2) + if (rows(tempindex2)) new_step[tempindex2] = -tol_act[tempindex2] + } + + a[index] = b[index]; fa[index] = fb[index] // Save the previous approx. + b[index] = b[index] :+ new_step + + o1 = tempo1[index] + o5 = tempo5[index] + o6 = tempo6[index,] + o7 = tempo7[index,] + o8 = tempo8[index,] + o9 = tempo9[index,] + + fb[index] = mm_callf(fs, b[index]) // Do step to a new approxim. + + tempindex1 = select(index,((fb[index]:>0) :* (fc[index]:>0))) + tempindex2 = select(index,((fb[index]:<0) :* (fc[index]:<0))) + + if (cols(tempindex1)) { + c[tempindex1] = a[tempindex1] + fc[tempindex1] = fa[tempindex1] + } + if (cols(tempindex2)) { + c[tempindex2] = a[tempindex2] + fc[tempindex2] = fa[tempindex2] + } + } + + x[index] = b[index] + result[index] = J(rows(index),1,0) + return(result) // bj: convergence not reached +} + + +real colvector survsim_epsilon_vec(real colvector y) +{ + res = J(rows(y),1,.) + for (i=1;i<=rows(y);i++) res[i] = epsilon(y[i]) + return(res) +} + + +end diff --git a/ssc/version_4_0_8/survsim_msm.sthlp b/ssc/version_4_0_8/survsim_msm.sthlp new file mode 100644 index 0000000..9275dcb --- /dev/null +++ b/ssc/version_4_0_8/survsim_msm.sthlp @@ -0,0 +1,337 @@ +{smcl} +{* *! version 1.0.0}{...} +{vieweralsosee "survsim" "help survsim"}{...} +{vieweralsosee "survsim parametric" "help survsim parametric"}{...} +{vieweralsosee "survsim user" "help survsim user"}{...} +{vieweralsosee "survsim model" "help survsim model"}{...} +{vieweralsosee "merlin" "help merlin"}{...} +{vieweralsosee "galahad" "help galahad"}{...} +{viewerjumpto "Syntax" "survsim msm##syntax"}{...} +{viewerjumpto "Description" "survsim msm##description"}{...} +{viewerjumpto "Options" "survsim msm##options"}{...} +{viewerjumpto "Examples" "survsim msm##examples"}{...} +{title:Title} + +{p2colset 5 16 16 2}{...} +{p2col :{cmd:survsim} {hline 2}}Simulate survival data from a parametric distribution, +a user-defined distribution, from a fitted {helpb merlin} model, from a cause-specific +hazards competing risks model, or from a general multi-state model{p_end} +{p2colreset}{...} + + +{marker syntax}{...} +{title:Syntax} + +{phang} +Syntax for simulating survival times from a multi-state model: + +{phang2} +{cmd: survsim} {it:timestub} {it:statestub} {it:eventstub} {cmd:,} {cmd:hazard1(}{help survsim cr##hazard_options:{it:haz_options}}{cmd:)} +{cmd:hazard2(}{help survsim cr##hazard_options:{it:haz_options}}{cmd:)} {opt maxt:ime(#|varname)} +[{opt transmat:rix(name)} {cmd:hazard3(}{help survsim cr##hazard_options:{it:haz_options}}{cmd:)} +{cmd: ...} {help survsim cr##commonopts:{it:options}}] + + +{synoptset 36 tabbed}{...} +{synopthdr:Transition-specific hazard options} +{synoptline} +{synopt:{cmdab:d:istribution(}{cmdab:e:xponential)}}exponential survival distribution{p_end} +{synopt:{cmdab:d:istribution(}{cmdab:gom:pertz)}}Gompertz survival distribution{p_end} +{synopt:{cmdab:d:istribution(}{cmdab:w:eibull)}}Weibull survival distribution{p_end} +{synopt:{opt l:ambda(#)}}scale parameter for exponential, Weibull or Gompertz{p_end} +{synopt:{opt g:amma(#)}}shape parameter for Weibull or Gompertz{p_end} +{synopt:{opt user(function)}}user-defined hazard function, written in Mata code; see details{p_end} +{synopt:{opt cov:ariates(varname # [# ...] ...)}}baseline covariates{p_end} +{synopt:{opt tde(varname # [# ...] ...)}}time-dependent effects{p_end} +{synopt:{opt tdefunc:tion(string)}}function of time to interact with covariates specified in {bf:tde()}; see details{p_end} +{synopt:{opt reset}}clock-reset timescale, i.e. the timescale is reset to zero on state entry{p_end} +{synoptline} + +{synopthdr:Options} +{synoptline} +{synopt:{opt transmat:rix(name)}}a transition matrix defining possible transitions between states{p_end} +{synopt:{opt maxt:ime(#|varname)}}right censoring time(s); either a common number or a {varname}{p_end} +{synopt:{opt startstate(#|varname)}}starting state for observations; either a common number or a {varname}{p_end} +{synopt:{opt lt:runcated(#|varname)}}starting time for observations, i.e. left truncation time(s); either a common number or a {varname}{p_end} +{synopt:{opt nodes(#)}}number of Gauss-Legendre quadrature nodes, default {cmd:nodes(30)}{p_end} +{synoptline} +{p2colreset}{...} + + +{marker description}{...} +{title:Description} + +{pstd} +{helpb survsim} simulates survival data from a parametric distribution, a user-defined distribution, from a fitted +{helpb merlin} model, from a cause-specific hazards competing risks model, or from a multi-state model. +This help file centres on simulating from a competing risks or multi-state model, specified using transition-specific +hazard functions. +{p_end} + +{pstd} +Each transition-specific hazard function can be specified as a standard parametric model, such as the exponential, Weibull or +Gompertz distributions, or a user-defined bespoke hazard function written in Mata code. Baseline covariates can be included +in each transition-specific hazard, with specified associated log hazard ratios. Non-proportional hazards can also be included. +Transition-specific hazard models can be as similar, or as different, as required. Up to 50 transition-specific hazard +functions may be specified. +{p_end} + +{pstd} +From each potential starting state, event times are simulated from the overall, total hazard function, using the method of +{help survsim cr##paper3:Beyersmann et al. (2009)}, i.e. the sum of all the transition-specific hazards out of the starting +state. This is implemented using a combination of numerical integration and root finding techniques using the algorithm +developed in {help survsim cr##paper2:Crowther and Lambert (2013)}. +{p_end} + +{pstd} +{it:timestub} specifies the new variable name stub to contain the generated survival times for each transition. +{it:statestub} specifies the new variable name stub to contain the generated state identifiers, i.e which state each observation +has transitioned to/is in, at the associated times. {it:eventstub} specifies the new variable name stub to contain the +event indicators, i.e. whether an event or right-censoring occurred at the associated times. When observations have +reached an absorbing state, or reached {cmd:maxtime()}, they will have missing observations in any further generated variables. +When all observations have reached an absorbing state, or are right-censored, then the simulation will cease and the generated +variables are returned. +{p_end} + + +{marker options}{...} +{marker hazard_options}{...} +{title:Transition-specific hazard options} + +{phang}{opt distribution}({it:string}) specifies the parametric survival distribution to use, including {cmd:exponential}, +{cmd:gompertz} or {cmd:weibull}.{p_end} + +{phang}{opt lambda(#)} defines the scale parameter for a exponential/Weibull/Gompertz distribution(s).{p_end} + +{phang}{opt gamma(#)} defines the shape parameter for a Weibull/Gompertz parametric distribution(s).{p_end} + +{phang}{opt user(function)} defines a custom hazard function. The function can include:{p_end} +{synoptset 15 notes}{...} +{synopt:{cmd:{t}}} which denotes the main timescale, measured on the time since starting state, {cmd:startstate()}, +timescale (which may be {cmd:ltruncated()}){p_end} +{synopt:{cmd:{t0}}} which denotes the time of entry to the current state of the associated transition hazard, measured on the +time since initial starting state timescale{p_end} +{synopt:{cmd:varname}} which denotes a variable in your dataset{p_end} +{synopt:{cmd:+-*/^}} standard Mata mathematical operators, using colon notation i.e. {cmd:2 :* {t}}, see +{helpb help [M-2] op_colon}. Colon operators must be used as behind the scenes, {cmd:{t}} gets replaced by an +{cmd:_N} x {cmd:nodes()} matrix when numerically integrating the transition hazard function.{p_end} +{synopt:{it:mata_function}} any Mata function, e.g. {cmd:log()} and {cmd:exp()}{p_end} +{p2colreset}{...} + +{phang2}For example, {cmd:dist(weibull) lambda(0.1) gamma(1.2)} is equivalent to {cmd:user(0.1:*1.2:*{t}:^(1.2:-1))}{p_end} + +{phang}{opt covariates(varname # ...)} defines baseline covariates to be included in the linear predictor of the +transition-specific hazard function, along with the value of the corresponding coefficient. For example, a treatent variable coded 0/1 can be +included, with a log hazard ratio of 0.5, by {cmd:covariates(treat 0.5)}. Variable {cmd:treat} must be in the dataset before +{cmd:survsim} is called.{p_end} + +{phang}{opt tde(varname # ...)} creates non-proportional hazards by interacting covariates with a function of time. +Covariates are interacted with {cmd:tdefunction()}, on the log hazard scale. Values should be entered as {cmd:tde(trt 0.5)}, +for example. Multiple time-dependent effects can be specified, but they will all be interacted with the same function of time. + +{phang}{opt tdefunction(string)} defines the function of time to which covariates specified in {cmd:tde()} are interacted +with, to create time-dependent effects in the transition-specific hazard function. The default is {cmd:{t}}, i.e. linear time. +The function can include:{p_end} +{synoptset 15 notes}{...} +{synopt:{cmd:{t}}} which denotes the main timescale, measured on the time since starting state, {cmd:startstate()}, +timescale (which may be {cmd:ltruncated()}){p_end} +{synopt:{cmd:+-*/^}} standard Mata mathematical operators, using colon notation i.e. {cmd:2 :* {t}}, see +{helpb help [M-2] op_colon}. Colon operators must be used as behind the scenes, {cmd:{t}} gets replaced by an +{cmd:_N} x {cmd:nodes()} matrix when numerically integrating the transition hazard function.{p_end} +{synopt:{it:mata_function}} any Mata function, e.g. {cmd:log()} and {cmd:exp()}{p_end} +{p2colreset}{...} + +{phang}{opt reset} specifies that this transition model is on a clock-reset timescale. The timescale is reset to 0 on entry, +i.e. the timescale for this transition is measured on a time since state entry timescale, rather than the default +clock-forward. If you specify a {cmd:user()} function with {cmd:reset}, then {cmd:survsim} will replace any occurences of +{cmd:{t}} with {cmd:{t}-{t0}}, including those specified in {cmd:tdefunction()}.{p_end} + + +{marker commonopts}{...} +{title:Options} + +{phang} +{opt transmatrix(matname)} specifies the transition matrix which governs the multi-state model. Transitions must be +numbered as an increasing sequence of integers from 1,...,K, from left to right, top to bottom of the matrix. +Reversible transitions are allowed. If {cmd:transmatrix()} is not specified, a competing risks model is assumed.{p_end} + +{phang}{opt maxtime(#|varname)} specifies right censoring time(s). Either a common maximum follow-up time {cmd:#} can be +specified for all observations, or observation specific censoring times can be specified by using a {varname}. {p_end} + +{phang}{opt startstate(#|varname)} specifies the state(s) in which observations begin. Either a common state {cmd:#} can +be specified for all observations, or observation specific starting states can be specified by using a {varname}. +Default is {cmd:startstate(1)}.{p_end} + +{phang}{opt ltruncated(#|varname)} specifies left truncated/delayed entry time(s), which is the time(s) at which observations +start in the initial starting state(s). Either a common time {cmd:#} can be specified for all observations, or observation +specific left truncation times can be specified by using a {varname}. Default is {cmd:ltruncated(0)}.{p_end} + +{phang}{opt nodes(#)} defines the number of Gauss-Legendre quadrature points used to evaluate the total cumulative hazard +function for each potential next transition. To simulate survival times from such a function, a combination of numerical +integration and root-finding is used. The default is {cmd:nodes(30)}.{p_end} + + +{title:Remarks} + +{pstd}When simulating multi-state event data using {helpb survsim}, numerical quadrature is used to calculate the total +cumulative hazard function, within iterations of Brent's univariate root finder. As with all model frameworks +which use numerical integration, it is important to assess the stability of the simulated survival times with an increasing +number of quadrature nodes. Any {cmd:survsim} call that requires Brent's method is executed under a tolerance of 0, to +ensure accurracy of the simulated survival times.{p_end} + +{pstd}Note that even if {cmd:reset} transitions are used in the data-generating model, the returned simulated event times +are still on the main timescale, time since initial {cmd:startstate()} (which could be {cmd:ltruncated()}).{p_end} + +{pstd}Always {helpb set seed}, to ensure reproducibility.{p_end} + + +{marker examples}{...} +{title:Examples} + +{phang}{ul:{bf:Example 1: Simulate from a competing risks model}}{p_end} + +{pstd} +We'll simulate 1000 observations, and generate a binary treatment group indicator, remembering to {helpb set seed} first. +{p_end} + +{cmd: . set obs 1000} +{cmd: . set seed 9865} +{cmd: . gen trt = runiform()>0.5} + +{pstd} +Simulate from a competing risk model with 2 competing events. The first cause-specific hazard has a Weibull distribution, +with no covariates. The second cause-specific hazard model has an exponential distribution, with a beneficial treatment effect. +Right censoring applied at 10 years. +{p_end} + +{cmd: . survsim time state event , hazard1(dist(weibull) lambda(0.1) gamma(1.2)) ///} +{cmd: hazard2(dist(exponential) lambda(0.01) covariates(trt -0.5)) ///} +{cmd: maxtime(10)} +{cmd: variables time0 to time1 created} +{cmd: variables state0 to state1 created} +{cmd: variables event1 to event1 created} + + +{phang}{ul:{bf:Example 2: Simulate from an illness-death model}}{p_end} + +{pstd} +We first define the transition matrix for an illness-death model. It has three states: +{p_end} + +{phang2}State 1 - A "healthy" state. Observations can move from state 1 to state 2 or 3.{p_end} +{phang2}State 2 - An intermediate "illness" state. Observations can come from state 1, and move on to state 3.{p_end} +{phang2}State 3 - An absorbing "death" state. Observations can come from state 1 or 2, but not leave.{p_end} + +{pstd} +This gives us three potential transitions between states: +{p_end} + +{phang2}Transition 1 - State 1 -> State 2{p_end} +{phang2}Transition 2 - State 1 -> State 3{p_end} +{phang2}Transition 3 - State 2 -> State 3{p_end} + +{pstd} +which is defined by the following matrix: +{p_end} + +{cmd: . matrix tmat = (.,1,2\.,.,3\.,.,.)} + +{pstd} +The key is to think of the column/row numbers as the states, and the elements of the matrix as the transition numbers. +Any transitions indexed with a missing value {cmd:.} means that the transition between the row state and the column state +is not possible. Let's make it obvious, sticking with our "healthy", "ill" and "dead" names for the states: +{p_end} + +{cmd: . mat colnames tmat = "healthy" "ill" "dead"} +{cmd: . mat rownames tmat = "healthy" "ill" "dead"} +{cmd: . mat list tmat} + +{cmd: tmat[3,3]} +{cmd: healthy ill dead} +{cmd: healthy . 1 2} +{cmd: ill . . 3} +{cmd: dead . . .} + +{pstd} +Now we've defined the transition matrix, we can use {cmd:survsim} to simulate some data. We'll simulate 1000 observations, and +generate a binary treatment group indicator, remembering to {helpb set seed} first. +{p_end} + +{cmd: . set obs 1000} +{cmd: . set seed 9865} +{cmd: . gen trt = runiform()>0.5} + +{pstd} +The first transition-specific hazard has a user defined baseline hazard function, with a harmful treatment effect. +The second transition-specific hazard model has a Weibull distribition, with a beneficial treatment effect. The +third transition-specific hazard has a user-defined baseline hazard function, with an initially beneficial treatment +effect that reduces linearly with respect to log time. Right censoring is applied at 3 years. +{p_end} + +{cmd: . survsim time state event, transmatrix(tmat) ///} +{cmd: hazard1(user(exp(-2 :+ 0.2:* log({t}) :+ 0.1:*{t})) covariates(trt 0.1)) ///} +{cmd: hazard2(dist(weibull) lambda(0.01) gamma(1.3) covariates(trt -0.5)) ///} +{cmd: hazard3(user(0.1 :* {t} :^ 1.5) covariates(trt -0.5) tde(trt 0.1) ///} +{cmd: tdefunction(log({t}))) ///} +{cmd: maxtime(3)} +{cmd: variables time0 to time2 created} +{cmd: variables state0 to state2 created} +{cmd: variables event1 to event2 created} + +{pstd} +{cmd:survsim} creates variables storing the times at which states were entered, with the associated state number, and whether +an event or right-censoring occured. It begins by creating the {cmd:0} variables, which represents the time at which +observatations entered the inital state, {cmd:time0}, and the associated state number, {cmd:state0}. As {cmd:ltruncated()} +and {cmd:startstate()} were not specified, all observations are assumed to start in state 1 at time 0. Subsequent transitions +are simulated until all observations have either entered an absorbing state, or are right-censored at their {cmd:maxtime()}. +For simplicity, I will assume time is measured in years. We can see what {cmd:survsim} has created: +{p_end} + +{cmd: . list if inlist(_n,1,4,16,112)} + +{cmd: +----------------------------------------------------------------------------------+} +{cmd: | trt time0 state0 time1 state1 event1 time2 state2 event2 |} +{cmd: |----------------------------------------------------------------------------------|} +{cmd: 1. | 0 0 1 3 1 0 . . . |} +{cmd: 4. | 1 0 1 .95636156 2 1 3 2 0 |} +{cmd: 16. | 0 0 1 1.0755764 2 1 2.4401409 3 1 |} +{cmd: 112. | 1 0 1 2.3290322 3 1 . . . |} +{cmd: +----------------------------------------------------------------------------------+} + +{pstd} +All observations start initially in state 1 at time 0, which are stored in {cmd:state0} and {cmd:time0}, respectively. Then, +{p_end} + +{phang2}{cmd:Observation 1} is right-censored at 3 years, remaining in state 1{p_end} +{phang2}{cmd:Observation 4} moves to state 2 at 0.956 years, and is subsequently right-censored at 3 years, still in +state 2{p_end} +{phang2}{cmd:Observation 16} moves to state 2 at 1.076 years, and then moves to state 3 at 2.440 years. Since state 3 +is an absorbing state, there are no further transitions.{p_end} +{phang2}{cmd:Observation 112} moves to state 3 at 2.329 years. Again, since state 3 is absorbing, there are no further +transitions{p_end} + + +{title:Author} + +{pstd}{cmd:Michael J. Crowther}{p_end} +{pstd}Red Door Analytics{p_end} +{pstd}E-mail: {browse "mailto:michael@reddooranalytics.se":michael@reddooranalytics.se}{p_end} + +{phang}Please report any errors you may find.{p_end} + + +{title:References} + +{phang}Bender R, Augustin T and Blettner M. Generating survival times to simulate Cox proportional hazards models. {it:Statistics in Medicine} 2005;24:1713-1723.{p_end} + +{marker paper3}{...} +{phang}Beyersmann J, Latouche A, Buchholz A and Schumacher M. Simulating competing risks data in survival analysis. {it:Statistics in Medicine} 2009;28:956-971.{p_end} + +{marker paper1}{...} +{phang}Crowther MJ and Lambert PC. {browse "http://www.stata-journal.com/article.html?article=st0275":Simulating complex survival data.}{it: The Stata Journal} 2012;12(4):674-687.{p_end} + +{marker paper2}{...} +{phang}Crowther MJ and Lambert PC. {browse "http://onlinelibrary.wiley.com/doi/10.1002/sim.5823/abstract":Simulating biologically plausible complex survival data.} {it:Statistics in Medicine} 2013;32(23):4118-4134.{p_end} + +{phang}Jann, B. 2005. moremata: Stata module (Mata) to provide various functions. Available from http://ideas.repec.org/c/boc/bocode/s455001.html.{p_end} + diff --git a/ssc/version_4_0_8/survsim_msm_mataf.mata b/ssc/version_4_0_8/survsim_msm_mataf.mata new file mode 100644 index 0000000..fca2ba9 --- /dev/null +++ b/ssc/version_4_0_8/survsim_msm_mataf.mata @@ -0,0 +1,7 @@ +mata: +function hf${i}(tnodes,expxb,tdexb,hvars,lt,time0) +{ + hf = ${matahazard${i}} + return(hf) +} +end diff --git a/ssc/version_4_0_8/survsim_msm_mataf_total.mata b/ssc/version_4_0_8/survsim_msm_mataf_total.mata new file mode 100644 index 0000000..5ffd994 --- /dev/null +++ b/ssc/version_4_0_8/survsim_msm_mataf_total.mata @@ -0,0 +1,11 @@ +mata: +function totalhf${i}(tnodes,expxb,tdexb,hvars,lt,time0) +{ + hf = ${totalhazard${i}} + if (min(hf)<0) { + errprintf("Negative hazard\n") + exit(198) + } + return(hf) +} +end diff --git a/ssc/version_4_0_8/survsim_parametric.sthlp b/ssc/version_4_0_8/survsim_parametric.sthlp new file mode 100644 index 0000000..7a2771d --- /dev/null +++ b/ssc/version_4_0_8/survsim_parametric.sthlp @@ -0,0 +1,179 @@ +{smcl} +{* *! version 1.0.0}{...} +{vieweralsosee "survsim" "help survsim"}{...} +{vieweralsosee "survsim user" "help survsim user"}{...} +{vieweralsosee "survsim model" "help survsim model"}{...} +{vieweralsosee "survsim msm" "help survsim msm"}{...} +{vieweralsosee "merlin" "help merlin"}{...} +{vieweralsosee "galahad" "help galahad"}{...} +{viewerjumpto "Syntax" "survsim parametric##syntax"}{...} +{viewerjumpto "Description" "survsim parametric##description"}{...} +{viewerjumpto "Options" "survsim parametric##options"}{...} +{viewerjumpto "Examples" "survsim parametric##examples"}{...} +{title:Title} + +{p2colset 5 16 16 2}{...} +{p2col :{cmd:survsim} {hline 2}}Simulate survival data from a parametric distribution, +a user-defined distribution, from a fitted {helpb merlin} model, from a cause-specific +hazards competing risks model, or from a general multi-state model{p_end} +{p2colreset}{...} + + +{marker syntax}{...} +{title:Syntax} + +{phang} +Syntax for simulating survival times from a parametric distribution: + +{phang2} +{cmd: survsim} {it:newvarname1} [{it:newvarname2}] {cmd:,} {opt distribution(string)} [{help survsim parametric##options:{it:options}}] + +{phang} +See {helpb survsim:help survsim} for more on simulating survival times in other settings. + + +{synoptset 36 tabbed}{...} +{marker options}{...} +{synopthdr:Options} +{synoptline} +{synopt:{cmdab:d:istribution(}{cmdab:e:xponential)}}exponential survival distribution{p_end} +{synopt:{cmdab:d:istribution(}{cmdab:gom:pertz)}}Gompertz survival distribution{p_end} +{synopt:{cmdab:d:istribution(}{cmdab:w:eibull)}}Weibull survival distribution{p_end} +{synopt:{opt l:ambdas(numlist)}}scale parameters{p_end} +{synopt:{opt g:ammas(numlist)}}shape parameters{p_end} +{synopt:{opt cov:ariates(varname # [# ...] ...)}}baseline covariates{p_end} +{synopt:{opt tde(varname # [# ...] ...)}}time-dependent effects{p_end} +{synopt:{opt maxt:ime(#|varname)}}right censoring time(s); either a common number or a {varname}{p_end} +{synopt:{opt lt:runcated(#|varname)}}left truncation time(s) (delayed entry); either a common number or a {varname}{p_end} + +{syntab:2-component mixture} +{synopt:{opt mix:ture}}simulate survival times from a 2-component mixture model{p_end} +{synopt:{opt pm:ix(real)}}mixture parameter, default is 0.5{p_end} +{synopt:{opt nodes(#)}}number of Gauss-Legendre quadrature nodes, default 30; see details{p_end} +{synoptline} +{p2colreset}{...} + + +{marker description}{...} +{title:Description} + +{pstd} +{helpb survsim} simulates survival data from a parametric distribution, a user-defined distribution, from a fitted +{helpb merlin} model, from a cause-specific hazards competing risks model, or from a Markov multi-state model. +This help file centres on the parametric distribution setting. +{p_end} +{pstd} +Survival times can be simulated from standard parametric distributions including the exponential, Gompertz and Weibull, +and 2-component mixtures of them. Baseline covariates can be included, with specified associated log hazard ratios. +Non-proportional hazards can also be included with all models; under an exponential or Weibull model covariates are interacted +with log time, under a Gompertz model covariates are interacted with time. See +{help survsim parametric##paper1:Crowther and Lambert (2012)} for more details. +{p_end} + +{pstd} +{it:newvarname1} specifies the new variable name to contain the generated survival times. {it:newvarname2} is required when +the {cmd:maxtime()} option is specified which defines the time(s) of right censoring. +{p_end} + + +{marker options}{...} +{title:Options} + +{phang}{opt distribution}({it:string}) specifies the parametric survival distribution to use, including {cmd:exponential}, +{cmd:gompertz} or {cmd:weibull}. All listed distributions are parameterised in the proportional hazards metric.{p_end} + +{phang}{opt lambdas(numlist)} defines the scale parameters in the exponential/Weibull/Gompertz distribution(s). The number of +values required depends on the model choice. Default is a single number corresponding to a standard parametric distribution. +Under a {cmd:mixture} model 2 values are required.{p_end} + +{phang}{opt gammas(numlist)} defines the shape parameters of the Weibull/Gompertz parametric distribution(s). Number of +entries must be equal to that of {cmd:lambdas}.{p_end} + +{phang}{opt covariates(varname # ...)} defines baseline covariates to be included in the linear predictor of the +survival model, along with the value of the corresponding coefficient. For example, a treatent variable coded 0/1 can be +included, with a log hazard ratio of 0.5, by {cmd:covariates(treat 0.5)}. Variable {cmd:treat} must be in the dataset before +{cmd:survsim} is called. {p_end} + +{phang}{opt tde(varname # ...)} creates non-proportional hazards by interacting covariates with log time for an +exponential or Weibull model, or time under a Gompertz model or mixture model. Values should be entered as +{cmd:tde(trt 0.5)}, for example. Multiple time-dependent effects can be specified, but they will all be interacted with +the same function of time. + +{phang}{opt maxtime(#|varname)} specifies the right censoring time(s). Either a common maximum follow-up time {cmd:#} can be +specified for all observations, or observation specific censoring times can be specified by using a {varname}. {p_end} + +{phang}{opt ltruncated(#|varname)} specifies the left truncated/delayed entry time(s). Either a common time {cmd:#} can be +specified for all observations, or observation specific left truncation times can be specified by using a {varname}. {p_end} + +{dlgtab:Mixture model} + +{phang}{opt mixture} specifies that survival times are simulated from a 2-component mixture model, with mixture component +distributions defined by {cmd:distribution()}. {cmd:lambdas()} and {cmd:gammas()} must be of length 2.{p_end} + +{phang}{opt pmix(#)} defines the mixture parameter. Default is 0.5.{p_end} + +{phang}{opt nodes(#)} defines the number of Gauss-Legendre quadrature points used to evaluate the cumulative hazard function +when {cmd:mixture} {it:and} {cmd:tde()} are specified together. To simulate survival times from such a mixture model, a combination +of numerical integration and root-finding is used. The default is {cmd:nodes(30)}. {p_end} + + +{title:Remarks} + +{pstd} +When simulating from a {cmd:mixture} model, covariate effects are additive on the log hazard scale, and are applied to the overall +hazard function, rather than each mixture component. +{p_end} + +{pstd}When simulating from a {cmd:mixture} model with time-dependent effects, numerical integration is used to evaluate the +cumulative hazard function, within iterations of Brent's univariate root finder. As with all model frameworks which use +numerical integration, it is important to assess the stability of the simulated survival times with an increasing number +of integration points, through use of the {cmd:nodes()} option. Any {cmd:survsim} call that requires Brent's method is +executed under a tolerance of 1e-08, to ensure accurracy of the simulated survival times. {p_end} + +{pstd}Always {helpb set seed}, to ensure reproducibility.{p_end} + + +{marker examples}{...} +{title:Examples} + +{pstd}Generate times from a Weibull model including a binary treatment variable, with log hazard ratio = -0.5, and censoring +after 5 years:{p_end} +{phang}{stata "set obs 1000":. set obs 1000}{p_end} +{phang}{stata "gen trt = rbinomial(1,0.5)":. gen trt = rbinomial(1,0.5)}{p_end} +{phang}{stata "survsim stime1 died1, distribution(weibull) lambdas(0.1) gammas(1.5) covariates(trt -0.5) maxtime(5)":. survsim stime1 died1, distribution(weibull) lambdas(0.1) gammas(1.5) covariates(trt -0.5) maxtime(5)}{p_end} +{phang}{stata "stset stime1, failure(died1 = 1)":. stset stime1, failure(died1 = 1)}{p_end} +{phang}{stata "streg trt, distribution(weibull) nohr":. streg trt, distribution(weibull) nohr}{p_end} + +{pstd}Generate times from a Gompertz model:{p_end} +{phang}{stata "survsim stime2, distribution(gompertz) lambdas(0.1) gammas(0.05)":. survsim stime2, distribution(gompertz) lambdas(0.1) gammas(0.05)}{p_end} + +{pstd}Generate times from a 2-component mixture Weibull model:{p_end} +{phang}{stata "survsim stime3 died3, mixture distribution(weibull) lambdas(0.1 0.05) gammas(1 1.5) pmix(0.5) maxtime(5)":. survsim stime3 died3, mixture distribution(weibull) lambdas(0.1 0.05) gammas(1 1.5) pmix(0.5) maxtime(5)}{p_end} + +{pstd}Generate times from a Weibull model with diminishing treatment effect:{p_end} +{phang}{stata "survsim stime4, distribution(weibull) lambdas(0.1) gammas(1.5) covariates(trt -0.5) tde(trt 0.05)":. survsim stime5, distribution(weibull) lambdas(0.1) gammas(1.5) covariates(trt -0.5) tde(trt 0.05)}{p_end} + +{pstd}For more examples please see {help survsim parametric##paper2:Crowther and Lambert (2013)}.{p_end} + + +{title:Author} + +{pstd}{cmd:Michael J. Crowther}{p_end} +{pstd}Red Door Analytics{p_end} +{pstd}E-mail: {browse "mailto:michael@reddooranalytics.se":michael@reddooranalytics.se}{p_end} + +{phang}Please report any errors you may find.{p_end} + + +{title:References} + +{phang}Bender R, Augustin T and Blettner M. Generating survival times to simulate Cox proportional hazards models. {it:Statistics in Medicine} 2005;24:1713-1723.{p_end} + +{marker paper1}{...} +{phang}Crowther MJ and Lambert PC. {browse "http://www.stata-journal.com/article.html?article=st0275":Simulating complex survival data.}{it: The Stata Journal} 2012;12(4):674-687.{p_end} + +{marker paper2}{...} +{phang}Crowther MJ and Lambert PC. {browse "http://onlinelibrary.wiley.com/doi/10.1002/sim.5823/abstract":Simulating biologically plausible complex survival data.} {it:Statistics in Medicine} 2013;32(23):4118-4134.{p_end} + +{phang}Jann, B. 2005. moremata: Stata module (Mata) to provide various functions. Available from http://ideas.repec.org/c/boc/bocode/s455001.html.{p_end} + diff --git a/ssc/version_4_0_8/survsim_user.ado b/ssc/version_4_0_8/survsim_user.ado new file mode 100644 index 0000000..6c80005 --- /dev/null +++ b/ssc/version_4_0_8/survsim_user.ado @@ -0,0 +1,453 @@ + +program define survsim_user + version 14.2 + + syntax newvarname(min=2 max=2), /// + MAXTime(string) /// + /// + [ /// + LTruncated(string) /// + /// + LOGHazard(string) /// + Hazard(string) /// + LOGCHazard(string) /// + CHazard(string) /// + NODES(int 30) /// + /// + MIXTURE /// + Distribution(string) /// + Lambdas(numlist) /// + Gammas(numlist) /// + PMIX(real 0.5) /// + /// + COVariates(string) /// + TDE(string) /// + TDEFUNCtion(string) /// + /// + MARGinal /// + ] // + + local stime : word 1 of `varlist' + local died : word 2 of `varlist' + + cap which lmoremata.mlib + if _rc { + display in yellow "You need to install the moremata package. This can be installed using," + display in yellow ". {stata ssc install moremata}" + exit 198 + } + + //====================================================================================================================// + //mixture - build hazard/chazard and continue as user + + if "`mixture'"!="" { + + local Nls : word count `lambdas' + local Ngs : word count `gammas' + + if `Nls'!=2 | `Ngs'!=2 { + di as error "Number of lambdas() and gammas() must be 2 under a mixture model" + exit 198 + } + + local l = length("`distribution'") + if `l'==0 { + di as error "distribution() required" + exit 198 + } + if substr("exponential",1,max(1,`l'))=="`distribution'" local dist "exp" + else if substr("gompertz",1,max(3,`l'))=="`distribution'" local dist "gompertz" + else if substr("weibull",1,max(1,`l'))=="`distribution'" local dist "weibull" + else { + di as error "Unknown distribution" + exit 198 + } + + local l1 : word 1 of `lambdas' + local l2 : word 2 of `lambdas' + if "`dist'"=="weibull" | "`dist'"=="gompertz" { + local g1 : word 1 of `gammas' + local g2 : word 2 of `gammas' + } + + if "`tde'"=="" { + + if "`dist'"=="exp" { + local chazard "-log(`pmix':*exp(-`l1':*{t}) :+ (1:-`pmix'):*exp(-`l2':*{t}))" + } + else if "`dist'"=="weibull" { + local chazard "-log(`pmix':*exp(-`l1':*{t}:^(`g1')) :+ (1:-`pmix'):*exp(-`l2':*{t}:^(`g2')))" + } + else { + local chazard "-log(`pmix':*exp((`l1':/`g1'):*(1:-exp(`g1':*{t}))) :+ (1:-`pmix'):*exp((`l2':/`g2'):*(1:-exp(`g2':*{t}))))" + } + + } + else { + + if "`dist'"=="exp" { + local base_surv "(`pmix':*exp(-`l1':*{t}) :+ (1:-`pmix'):*exp(-`l2':*{t}))" + local numer "(`l1':*`pmix':*exp(-`l1':*{t}) :+ `l2':*(1:-`pmix'):*exp(-`l2':*{t}))" + local hazard "`numer' :/ `base_surv'" + } + else if "`dist'"=="weibull" { + local base_surv "(`pmix':*exp(-`l1':*{t}:^(`g1')) :+ (1:-`pmix'):*exp(-`l2':*{t}:^(`g2')))" + local numer "(`l1':*`g1':*`pmix':*{t}:^(`g1':-1):*exp(-`l1':*{t}:^(`g1')) :+ `l2':*`g2':*(1:-`pmix'):*{t}:^(`g2':-1):*exp(-`l2':*{t}:^(`g1')))" + local hazard "`numer' :/ `base_surv'" + } + else { + local base_surv "(`pmix':*exp((`l1':/`g1'):*(1:-exp(`g1':*{t}))) :+ (1:-`pmix'):*exp((`l2':/`g2'):*(1:-exp(`g2':*{t}))))" + local numer "`pmix':*exp((`l1':/`g1'):*(1:-exp(`g1':*{t}))) :* (-`l1':*exp(`g1':*{t})) + (1:-`pmix'):*exp((`l2':/`g2'):*(1:-exp(`g2':*{t}))) :* (-`l2':*exp(`g2':*{t}))" + local hazard "(`numer') :/ `base_surv'" + } + + } + + //now continues as user function + } + + + //====================================================================================================================// + //baseline covariates + + tempvar expxb + if "`covariates'"!="" { + + tokenize `covariates' + local ncovlist : word count `covariates' + local ncovvars = `ncovlist'/2 + cap confirm integer number `ncovvars' + if _rc>0 { + di as error "Variable/number missing in covariates" + exit 198 + } + local ind = 1 + local error = 0 + forvalues i=1/`ncovvars' { + cap confirm numeric var ``ind'', exact + if _rc { + local errortxt "invalid covariates(... ``ind'' ``=`ind'+1'' ...)" + local error = 1 + } + cap confirm num ``=`ind'+1'' + if _rc { + local errortxt "invalid covariates(... ``ind'' ``=`ind'+1'' ...)" + local error = 1 + } + tempvar vareffect`i' + qui gen double `vareffect`i'' = ``ind''*``=`ind'+1'' + local ind = `ind' + 2 + } + if `error' { + di as error "`errortxt'" + exit 198 + } + local cov_linpred "`vareffect1'" + if `ncovvars'>1 { + forvalues k=2/`ncovvars' { + local cov_linpred "`cov_linpred' + `vareffect`k''" + } + } + + qui gen double `expxb' = exp(`cov_linpred') + } + else qui gen byte `expxb' = 1 + + //====================================================================================================================// + // Time-dependent effects + + tempvar tdexb + if "`tde'"!="" { + tokenize `tde' + local ntde : word count `tde' + local ntdevars = `ntde'/2 + cap confirm integer number `ntdevars' + if _rc>0 { + di as error "Variable/number missing in tde" + exit 198 + } + + local ind = 1 + local error = 0 + forvalues i=1/`ntdevars' { + cap confirm var ``ind'', exact + if _rc { + local errortxt "invalid tde(... ``ind'' ``=`ind'+1'' ...)" + local error = 1 + } + cap confirm num ``=`ind'+1'' + if _rc { + local errortxt "invalid tde(... ``ind'' ``=`ind'+1'' ...)" + local error = 1 + } + tempvar tdeeffect`i' + qui gen double `tdeeffect`i'' = ``ind''*``=`ind'+1'' + + local ind = `ind' + 2 + } + if `error' { + di as error "`errortxt'" + exit 198 + } + local tde_linpred "`tdeeffect1'" + if `ntdevars'>1 { + forvalues k=2/`ntdevars' { + local tde_linpred "`tde_linpred' + `tdeeffect`k''" + } + } + + qui gen double `tdexb' = `tde_linpred' + } + else qui gen double `tdexb' = 0 + + //====================================================================================================================// + //setup stuff + + quietly { + + gen double `stime' = . + + tempvar logu + gen double `logu' = log(runiform()) + + cap drop _survsim_rc + gen _survsim_rc = 0 + + } + + //====================================================================================================================// + //core + + if "`loghazard'"!="" | "`hazard'"!="" { + + gaussquad_ss, n(`nodes') //Gauss-Legendre nodes and weights + + if "`loghazard'"!="" { + local hazard exp(`loghazard') + } + + mata: st_local("matahazard",subinstr("`hazard'","{t}","tnodes")) + + local matahazard "(`matahazard') :* expxb" + + if "`tde'"!="" { + if "`tdefunction'"=="" { + local tdefunction tnodes + } + else { + mata: st_local("tdefunction",subinstr("`tdefunction'","{t}","tnodes")) + } + + local matahazard "`matahazard' :* exp(tdexb :* (`tdefunction'))" + } + + //push variables found in loghazard()/hazard() into Mata + local Nhvars = 0 + if "`mixture'"=="" { + + macro drop overallsyntax1 overallsyntax2 mmrootsyntax1 mmrootsyntax2 + + gettoken first rest : matahazard, parse("[ ,\^\*\(\)-\+/:<>=]") + while "`rest'"!="" { + if trim("`first'")!="," { + cap confirm var `first', exact + if !_rc { + local test1 = 0 + local hvarindex = 0 + foreach var in `covlist' { + local hvarindex = `hvarindex' + 1 + if "`first'"=="`var'" { + local test1 = 1 + local refhvarindex = `hvarindex' + } + } + if `test1'==0 { + local covlist `covlist' `first' //contains a list of all varnames specified, be they time-indep or time-dependent + local Nhvars = `Nhvars' + 1 + mata: st_local("matahazard",subinstr("`matahazard'","`first'","hvars[,`Nhvars']")) + } + else { + mata: st_local("matahazard",subinstr("`matahazard'","`first'","hvars[,`refhvarindex']")) + } + } + } + gettoken first rest : rest, parse("[ ,\^\*\(\)-\+/:<>=]") + } + if `Nhvars' { + mata: st_view(hvars=.,.,tokens("`covlist'")) + global overallsyntax1 real matrix hvars + global overallsyntax2 hvars + if "`marginal'"=="" global mmrootsyntax1 , hvars[i,] + else global mmrootsyntax1 , hvars + global mmrootsyntax2 , hvars + } + } + + //test the hazard function + if `Nhvars'==0 { + mata: tnodes = expxb = tdexb = 0.1 + cap mata: test1 = `matahazard' + if _rc { + di as error "Error in loghazard()/hazard()" + exit 198 + } + mata mata drop tnodes expxb tdexb test1 + } + + //push function into global + global chaz `matahazard' + cap pr drop survsim_user_core + + survsim_user_core, stime(`stime') /// + maxtime(`maxtime') /// + logu(`logu') /// + expxb(`expxb') /// + tdexb(`tdexb') /// + ltruncated(`ltruncated') /// + `marginal' // + + //tidy up + cap macro drop cumhaz + if `Nhvars' { + cap macro drop overallsyntax1 overallsyntax2 mmrootsyntax1 mmrootsyntax2 + } + } + + //cumulative hazard + else { + + if "`logchazard'"!="" { + local chazard exp(`logchazard') + } + mata: st_local("matachazard",subinstr("`chazard'","{t}","t")) + local matachazard "(`matachazard') :* expxb" + if "`ltruncated'"!="" { + mata: st_local("matachazard0",subinstr("`chazard'","{t}","lt")) + local matachazard0 "(`matachazard0') :* expxb" + } + + //tde's + if "`tde'"!="" { + if "`tdefunction'"=="" { + local tdefunction1 t + } + else { + mata: st_local("tdefunction1",subinstr("`tdefunction'","{t}","t")) + } + local matachazard "`matachazard' :* exp(tdexb :* (`tdefunction1'))" + if "`ltruncated'"!="" { + if "`tdefunction'"=="" { + local tdefunction0 lt + } + else { + mata: st_local("tdefunction0",subinstr("`tdefunction'","{t}","lt")) + } + local matachazard0 "`matachazard0' :* exp(tdexb :* (`tdefunction0'))" + } + } + + //any variables in ch() or logch() + local nhazvars = 0 + macro drop overallsyntax1 overallsyntax2 mmrootsyntax1 mmrootsyntax2 + gettoken first rest : matachazard, parse("[ ,\^\*\(\)-\+/:<>=]") + while "`rest'"!="" { + if trim("`first'")!="," { + cap confirm var `first', exact + if !_rc { + local test1 = 0 + foreach var in `covlist' { + if "`first'"=="`var'" local test1 = 1 + } + if `test1'==0 { + local covlist `covlist' `first' //contains a list of all varnames specified, be they time-indep or time-dependent + local nhazvars = `nhazvars' + 1 + mata: st_local("matachazard",subinstr(st_local("matachazard"),"`first'","hvars[1,`nhazvars']")) + if "`ltruncated'"!="" { + mata: st_local("matachazard0",subinstr(st_local("matachazard0"),"`first'","hvars[1,`nhazvars']")) + } + } + } + } + gettoken first rest : rest, parse("[ ,\^\*\(\)-\+/:<>=]") + } + if `nhazvars' { + mata: st_view(hvars=.,.,tokens("`covlist'")) + global overallsyntax1 real matrix hvars + global overallsyntax2 hvars + if "`marginal'"=="" global mmrootsyntax1 , hvars[i,] + else global mmrootsyntax1 , hvars + global mmrootsyntax2 , hvars + } + + //test + if `nhazvars'==0 { + mata: t = expxb = tdexb = 0.1 + cap mata: test1 = `matachazard' + if _rc { + di as error "Error in logchazard()/chazard()" + exit 198 + } + mata mata drop t expxb tdexb test1 + } + + //push function into global + global chaz `matachazard' + if "`ltruncated'"!="" { + global chaz ${chaz} :- `matachazard0' + } + cap pr drop survsim_user_core + + survsim_user_core, stime(`stime') /// + maxtime(`maxtime') /// + logu(`logu') /// + expxb(`expxb') /// + tdexb(`tdexb') /// + ltruncated(`ltruncated') /// + chazard /// + `marginal' // + + //tidy up + cap macro drop chaz + if `nhazvars' { + cap macro drop overallsyntax1 overallsyntax2 mmrootsyntax1 mmrootsyntax2 + } + + } + + //done + +end + +program define gaussquad_ss, rclass + syntax [, N(int 30)] + tempname weights nodes + mata ss_gq("`weights'","`nodes'") + return matrix weights = `weights' + return matrix nodes = `nodes' +end + +mata: +void ss_gq(string scalar weightsname, string scalar nodesname) +{ + n = strtoreal(st_local("n")) + i = range(1,n,1)' + i1 = range(1,n-1,1)' + + muzero = 2 + a = J(1,n,0) + b = i1:/sqrt(4 :* i1:^2 :- 1) + + A= diag(a) + for(j=1;j<=n-1;j++){ + A[j,j+1] = b[j] + A[j+1,j] = b[j] + } + symeigensystem(A,vec,nodes) + weights = (vec[1,]:^2:*muzero)' + weights = weights[order(nodes',1)] + nodes = nodes'[order(nodes',1)'] + st_matrix(weightsname,weights) + st_matrix(nodesname,nodes) +} + +end diff --git a/ssc/version_4_0_8/survsim_user.sthlp b/ssc/version_4_0_8/survsim_user.sthlp new file mode 100644 index 0000000..4177eb1 --- /dev/null +++ b/ssc/version_4_0_8/survsim_user.sthlp @@ -0,0 +1,202 @@ +{smcl} +{* *! version 1.0.0}{...} +{vieweralsosee "survsim" "help survsim"}{...} +{vieweralsosee "survsim parametric" "help survsim parametric"}{...} +{vieweralsosee "survsim model" "help survsim model"}{...} +{vieweralsosee "survsim msm" "help survsim msm"}{...} +{vieweralsosee "merlin" "help merlin"}{...} +{vieweralsosee "galahad" "help galahad"}{...} +{viewerjumpto "Syntax" "survsim user##syntax"}{...} +{viewerjumpto "Description" "survsim user##description"}{...} +{viewerjumpto "Options" "survsim user##options"}{...} +{viewerjumpto "Examples" "survsim user##examples"}{...} +{title:Title} + +{p2colset 5 16 16 2}{...} +{p2col :{cmd:survsim} {hline 2}}Simulate survival data from a parametric distribution, +a user-defined distribution, from a fitted {helpb merlin} model, from a cause-specific +hazards competing risks model, or from a general multi-state model{p_end} +{p2colreset}{...} + + +{marker syntax}{...} +{title:Syntax} + +{phang} +Syntax for simulating survival times from a user-defined distribution: + +{phang2} +{cmd: survsim} {it:newvarname1} {it:newvarname2} {cmd:,} {opt maxt:ime(#|varname)} [{help survsim user##options:{it:options}}] + +{phang} +See {helpb survsim:help survsim} for more on simulating survival times in other settings. + + +{synoptset 36 tabbed}{...} +{synopthdr:Options} +{synoptline} +{synopt:{opt logh:azard(string)}}user-defined log baseline hazard function; see details{p_end} +{synopt:{opt h:azard(string)}}user-defined baseline hazard function; see details{p_end} +{synopt:{opt logch:azard(string)}}user-defined log cumulative baseline hazard function; see details{p_end} +{synopt:{opt ch:azard(string)}}user-defined baseline cumulative hazard function; see details{p_end} +{synopt:{opt cov:ariates(varname # [# ...] ...)}}baseline covariates{p_end} +{synopt:{opt tde(varname # [# ...] ...)}}time-dependent effects{p_end} +{synopt:{opt tdefunc:tion(string)}}function of time to interact with covariates specified in {bf:tde()}; see details{p_end} +{synopt:{opt maxt:ime(#|varname)}}right censoring time(s); either a common number or a {varname}{p_end} +{synopt:{opt lt:runcated(#|varname)}}left truncation time(s) (delayed entry); either a common number or a {varname}{p_end} +{synopt:{opt nodes(#)}}number of Gauss-Legendre quadrature nodes, default 30{p_end} +{synoptline} +{p2colreset}{...} + + +{marker description}{...} +{title:Description} + +{pstd} +{helpb survsim} simulates survival data from a parametric distribution, a user-defined distribution, from a fitted +{helpb merlin} model, from a cause-specific hazards competing risks model, or from a Markov multi-state model. +This help file centres on the user-defined distribution setting. +{p_end} +{pstd} +Survival times can be simulated from bespoke, user-defined [log] [cumulative] hazard functions. The function must be specified +in Mata code (using colon operators), with survival times generated using a combination of numerical integration and root +finding techniques. Baseline covariates can be included, with specified associated log hazard ratios. Time-dependent effects +can also be specified with a user-defined function of time. See {help survsim user##paper2:Crowther and Lambert (2013)} for more +details. +{p_end} + +{pstd} +{it:newvarname1} specifies the new variable name to contain the generated survival times. {it:newvarname2} specifies the +new variable name to contain the generated event indicator. +{p_end} + + +{marker options}{...} +{title:Options} + +{phang}{opt loghazard(string)} is the user-defined log hazard function. The function can include:{p_end} +{synoptset 15 notes}{...} +{synopt:{cmd:{t}}} which denotes the main timescale, measured on the time since starting state, {cmd:startstate()}, +timescale (which may be {cmd:ltruncated()}){p_end} +{synopt:{cmd:varname}} which denotes a variable in your dataset{p_end} +{synopt:{cmd:+-*/^}} standard Mata mathematical operators, using colon notation i.e. {cmd:2 :* {t}}, see +{helpb help [M-2] op_colon}. Colon operators must be used as behind the scenes, {cmd:{t}} gets replaced by an +{cmd:_N} x {cmd:nodes()} matrix when numerically integrating the hazard function.{p_end} +{synopt:{it:mata_function}} any Mata function, e.g. {cmd:log()} and {cmd:exp()}{p_end} +{p2colreset}{...} + +{phang2}See {help survsim user##examples:examples} below. + +{phang}{opt hazard(string)} is the user-defined baseline hazard function. See {cmd:loghazard()} for more details, and +{help survsim user##examples:examples} below.{p_end} + +{phang}{opt logchazard(string)} is the user-defined log cumulative baseline hazard function. See {cmd:loghazard()} for +more details, and {help survsim user##examples:examples} below.{p_end} + +{phang}{opt chazard(string)} is the user-defined baseline cumulative hazard function. See {cmd:loghazard()} for more +details, and {help survsim user##examples:examples} below.{p_end} + +{phang}{opt covariates(varname # [# ...] ...)} defines baseline covariates to be included in the linear predictor of the +survival model, along with the value of the corresponding coefficient. For example, a treatent variable coded 0/1 can be +included, with a log hazard ratio of 0.5, by {cmd:covariates(treat 0.5)}. Variable {cmd:treat} must be in the dataset before +{cmd:survsim} is called. If {cmd:chazard()} or {cmd:logchazard()} are used, then {cmd:covariates()} effects are additive +on the log cumulative hazard scale.{p_end} + +{phang}{opt tde(varname # [# ...] ...)} creates non-proportional hazards by interacting covariates with a function of time, +defined by {cmd:tdefunction()}, on the appropriate log hazard or log cumulative hazard scale. Values should be entered as +{cmd:tde(trt 0.5)}, for example. Multiple time-dependent effects can be specified, but they will all +be interacted with the same {cmd:tdefunction()}. To circumvent this, you can directly specify them in your user function. + +{phang}{opt tdefunction(string)} defines the function of time to which covariates specified in {cmd:tde()} are interacted +with, to create time-dependent effects. The default is {cmd:{t}}, i.e. linear time. +The function can include:{p_end} +{synoptset 15 notes}{...} +{synopt:{cmd:{t}}} which denotes the main timescale, measured on the time since starting state, {cmd:startstate()}, +timescale (which may be {cmd:ltruncated()}){p_end} +{synopt:{cmd:+-*/^}} standard Mata mathematical operators, using colon notation i.e. {cmd:2 :* {t}}, see +{helpb help [M-2] op_colon}. Colon operators must be used as behind the scenes, {cmd:{t}} gets replaced by an +{cmd:_N} x {cmd:nodes()} matrix when numerically integrating the hazard function.{p_end} +{synopt:{it:mata_function}} any Mata function, e.g. {cmd:log()} and {cmd:exp()}{p_end} +{p2colreset}{...} + + +{phang}{opt maxtime(#|varname)} specifies the right censoring time(s). Either a common maximum follow-up time {cmd:#} can be +specified for all observations, or observation specific censoring times can be specified by using a {varname}. {p_end} + +{phang}{opt ltruncated(#|varname)} specifies the left truncated/delayed entry time(s). Either a common time {cmd:#} can be +specified for all observations, or observation specific left truncation times can be specified by using a {varname}. {p_end} + +{phang}{opt nodes(#)} defines the number of Gauss-Legendre quadrature points used to evaluate the cumulative hazard function +when {cmd:loghazard()} or {cmd:hazard()} is specified. To simulate survival times from such a function, a combination +of numerical integration and root-finding is used. The default is {cmd:nodes(30)}.{p_end} + + +{title:Remarks} + +{pstd}When simulating from a user-defined {cmd:loghazard()} or {cmd:hazard()} function, numerical integration is used to +evaluate the cumulative hazard function, within iterations of Brent's univariate root finder. As with all model frameworks +which use numerical integration, it is important to assess the stability of the simulated survival times with an increasing +number of integration points, through use of the {cmd:nodes()} option. Any {cmd:survsim} call that requires Brent's method +is executed under a tolerance of 0, to ensure accurracy of the simulated survival times. {p_end} + +{pstd}Always {helpb set seed}, to ensure reproducibility.{p_end} + + +{marker examples}{...} +{title:Examples} + +{pstd}Simulate 1000 observations and generate a binary treatment group:{p_end} +{phang}{cmd:. set obs 1000}{p_end} +{phang}{cmd:. gen trt = rbinomial(1,0.5)}{p_end} + +{pstd}Generate times from user-defined log hazard function:{p_end} +{phang}{cmd:. survsim stime1 died1, loghazard(-1 :+ 0.02:*{t} :- 0.03:*{t}:^2 :+ 0.005:*{t}:^3) maxtime(1.5)}{p_end} + +{pstd}Generate times from user-defined log hazard function with diminishing treatment effect:{p_end} +{phang}{cmd:. survsim stime2 died2, loghazard(-1 :+ 0.02:*{t} :- 0.03:*{t}:^2 :+ 0.005:*{t}:^3) covariates(trt -0.5) tde(trt 0.03) maxtime(1.5)}{p_end} + +{pstd}Generate survival times from a joint longitudinal-survival model:{p_end} +{phang}{cmd:. clear}{p_end} +{phang}{cmd:. set obs 1000}{p_end} +{phang}{cmd:. gen trt = rbinomial(1,0.5)}{p_end} +{phang}{cmd:. gen age = rnormal(65,12)}{p_end} +{pstd}Define the association between the biomarker and survival{p_end} +{phang}{cmd:. local alpha = 0.25}{p_end} +{pstd}Generate the random intercept and random slopes for the longitudinal submodel{p_end} +{phang}{cmd:. gen b0 = rnormal(0,1)}{p_end} +{phang}{cmd:. gen b1 = rnormal(1,0.5)}{p_end} +{pstd}Generate survival times from an exponential baseline hazard{p_end} +{phang}{cmd:. survsim st1 event, loghazard(`=log(0.1)' :+ `alpha' :* (b0 :+ b1 :* {t})) maxtime(5) covariates(trt -0.5 age 0.02)}{p_end} +{pstd}Generate observed biomarker values at times 0, 1, 2, 3 , 4 years{p_end} +{phang}{cmd:. gen id = _n}{p_end} +{phang}{cmd:. expand 5}{p_end} +{phang}{cmd:. bys id: gen meastime = _n-1}{p_end} +{pstd}Remove observations after event or censoring time{p_end} +{phang}{cmd:. bys id: drop if meastime >= st1}{p_end} +{pstd}Generate observed biomarker values incorporating measurement error{p_end} +{phang}{cmd:. gen response = b0 + b1*meastime + rnormal(0,0.5)}{p_end} + +{pstd}For more examples please see {help survsim user##paper2:Crowther and Lambert (2013)}.{p_end} + + +{title:Author} + +{pstd}{cmd:Michael J. Crowther}{p_end} +{pstd}Red Door Analytics{p_end} +{pstd}E-mail: {browse "mailto:michael@reddooranalytics.se":michael@reddooranalytics.se}{p_end} + +{phang}Please report any errors you may find.{p_end} + + +{title:References} + +{phang}Bender R, Augustin T and Blettner M. Generating survival times to simulate Cox proportional hazards models. {it:Statistics in Medicine} 2005;24:1713-1723.{p_end} + +{marker paper1}{...} +{phang}Crowther MJ and Lambert PC. {browse "http://www.stata-journal.com/article.html?article=st0275":Simulating complex survival data.}{it: The Stata Journal} 2012;12(4):674-687.{p_end} + +{marker paper2}{...} +{phang}Crowther MJ and Lambert PC. {browse "http://onlinelibrary.wiley.com/doi/10.1002/sim.5823/abstract":Simulating biologically plausible complex survival data.} {it:Statistics in Medicine} 2013;32(23):4118-4134.{p_end} + +{phang}Jann, B. 2005. moremata: Stata module (Mata) to provide various functions. Available from http://ideas.repec.org/c/boc/bocode/s455001.html.{p_end} + diff --git a/ssc/version_4_0_8/survsim_user_core.ado b/ssc/version_4_0_8/survsim_user_core.ado new file mode 100644 index 0000000..cbefe94 --- /dev/null +++ b/ssc/version_4_0_8/survsim_user_core.ado @@ -0,0 +1,133 @@ + +local RS real scalar +local RM real matrix + +program define survsim_user_core + version 14.2 + syntax , stime(varname) /// + maxtime(string) /// + logu(varname) /// + expxb(varlist) /// + tdexb(varlist) /// + [ /// + ltruncated(varname) /// + CHazard /// + MARGinal /// + ] // + + mata: survsim_user($overallsyntax2) +end + +mata: + +void survsim_user($overallsyntax1) +{ + N = st_nobs() + ismarginal = st_local("marginal")!="" + st_view(time = .,.,st_local("stime")) + st_view(maxt = .,.,st_local("maxtime")) + + st_view(expxb = .,.,st_local("expxb")) + st_view(tdexb = .,.,st_local("tdexb")) + st_view(rc = .,.,"_survsim_rc") + st_view(logu = .,.,st_local("logu")) + + if (st_local("ltruncated")!="") { + st_view(lt=.,.,st_local("ltruncated")) + } + else lt = J(N,1,smallestdouble()) + + hazard = st_local("chazard")=="" + if (hazard) { + nodes = st_matrix("r(nodes)")' + weights = st_matrix("r(weights)") + } + maxit = 1000 + tol = 0 + + if (hazard) { + if (ismarginal) { + for (i=1;i<=N;i++){ + rc[i] = mm_root( /// + t=.,&survsim_user_hazard_marg(),lt[i],maxt[i],tol,maxit, /// + logu[i],nodes,weights,i,expxb,lt[i],tdexb,N /// + ${mmrootsyntax1} /// + ) // + time[i] = t + } + } + else { + for (i=1;i<=N;i++){ + rc[i] = mm_root( /// + t=.,&survsim_user_hazard(),lt[i],maxt[i],tol,maxit, /// + logu[i],nodes,weights,i,expxb[i,],lt[i],tdexb[i,] /// + ${mmrootsyntax1} /// + ) // + time[i] = t + } + } + } + else { + for (i=1;i<=N;i++){ + rc[i] = mm_root( /// + t=.,&survsim_user_chazard(),lt[i],maxt[i],tol,maxit, /// + logu[i],i,expxb[i,],lt[i],tdexb[i,] /// + ${mmrootsyntax1} /// + ) // + time[i] = t + } + } +} + +function survsim_user_hazard( `RS' t, /// + `RS' logu, /// + `RM' nodes, /// + `RM' weights, /// + `RS' i, /// + `RM' expxb, /// + `RS' lt, /// + `RM' tdexb /// + ${mmrootsyntax2} /// + ) // +{ + tnodes = (t :- lt) :* 0.5 :* nodes :+ (t :+ lt) :/ 2 + tweights = (t :- lt) :* weights :/ 2 + chq = $chaz + if (min(chq)<0) { + errprintf("hazard() gives negative values\n") + exit(198) + } + return(chq * tweights :+ logu) +} + +function survsim_user_chazard( `RS' t, /// + `RS' logu, /// + `RS' i, /// + `RM' expxb, /// + `RS' lt, /// + `RM' tdexb /// + ${mmrootsyntax2} /// + ) // +{ + return((${chaz}) :+ logu) +} + +function survsim_user_hazard_marg( `RS' t, /// + `RS' logu, /// + `RM' nodes, /// + `RM' weights, /// + `RS' i, /// + `RM' expxb, /// + `RS' lt, /// + `RM' tdexb, /// + `RS' N /// + ${mmrootsyntax2} /// + ) // +{ + tnodes = (t :- lt) :* 0.5 :* J(N,1,nodes) :+ (t :+ lt) :/ 2 + tweights = (t :- lt) :* weights :/ 2 + chq = $chaz + return(log(mean(exp(-chq * tweights))) :- logu) +} + +end