Skip to content

Commit 9b36870

Browse files
Set staging bucket to be deleted a specified number of days after deletion of cluster (#792)
* works? * Well that was silly * lets see * debugging * what * catching up * one more unit test to go * wat * unit tests are good * cleaning up PR * no magic numbers * some cleaning up * well that was silly * comment update
1 parent e352353 commit 9b36870

File tree

13 files changed

+85
-20
lines changed

13 files changed

+85
-20
lines changed

src/main/resources/logback.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
<appender-ref ref="console"/>
6262
</logger>
6363
-->
64+
6465
<!--
6566
<logger name="slick.jdbc.JdbcBackend.benchmark" level="DEBUG" additivity="false">
6667
<appender-ref ref="file"/>

src/main/resources/reference.conf

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,3 +121,8 @@ zombieClusterMonitor {
121121
leoExecutionMode {
122122
backLeo = true
123123
}
124+
125+
clusterBucket {
126+
# number of days the staging bucket should continue to exist after a cluster is deleted
127+
stagingBucketExpiration = 10 days
128+
}

src/main/scala/org/broadinstitute/dsde/workbench/leonardo/Boot.scala

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import org.broadinstitute.dsde.workbench.google.GoogleCredentialModes.{Pem, Toke
1010
import org.broadinstitute.dsde.workbench.google.{GoogleStorageDAO, HttpGoogleIamDAO, HttpGoogleProjectDAO, HttpGoogleStorageDAO}
1111
import org.broadinstitute.dsde.workbench.leonardo.api.{LeoRoutes, StandardUserInfoDirectives}
1212
import org.broadinstitute.dsde.workbench.leonardo.auth.{LeoAuthProviderHelper, ServiceAccountProviderHelper}
13-
import org.broadinstitute.dsde.workbench.leonardo.config.{AutoFreezeConfig, ClusterDefaultsConfig, ClusterDnsCacheConfig, ClusterFilesConfig, ClusterResourcesConfig, DataprocConfig, LeoExecutionModeConfig, MonitorConfig, ProxyConfig, SamConfig, SwaggerConfig, ZombieClusterConfig}
13+
import org.broadinstitute.dsde.workbench.leonardo.config.{AutoFreezeConfig, ClusterDefaultsConfig, ClusterDnsCacheConfig, ClusterFilesConfig, ClusterResourcesConfig, DataprocConfig, LeoExecutionModeConfig, MonitorConfig, ProxyConfig, SamConfig, SwaggerConfig, ZombieClusterConfig, ClusterBucketConfig}
1414
import org.broadinstitute.dsde.workbench.leonardo.dao.{HttpJupyterDAO, HttpSamDAO}
1515
import org.broadinstitute.dsde.workbench.leonardo.dao.google.{HttpGoogleComputeDAO, HttpGoogleDataprocDAO}
1616
import org.broadinstitute.dsde.workbench.leonardo.db.DbReference
@@ -51,6 +51,7 @@ object Boot extends App with LazyLogging {
5151
val zombieClusterMonitorConfig = config.as[ZombieClusterConfig]("zombieClusterMonitor")
5252
val clusterDnsCacheConfig = config.as[ClusterDnsCacheConfig]("clusterDnsCache")
5353
val leoExecutionModeConfig = config.as[LeoExecutionModeConfig]("leoExecutionMode")
54+
val clusterBucketConfig = config.as[ClusterBucketConfig]("clusterBucket")
5455

5556
// we need an ActorSystem to host our application in
5657
implicit val system = ActorSystem("leonardo")
@@ -86,7 +87,7 @@ object Boot extends App with LazyLogging {
8687
if(leoExecutionModeConfig.backLeo) {
8788
val googleProjectDAO = new HttpGoogleProjectDAO(dataprocConfig.applicationName, Pem(leoServiceAccountEmail, leoServiceAccountPemFile), "google")
8889
val jupyterDAO = new HttpJupyterDAO(clusterDnsCache)
89-
val clusterMonitorSupervisor = system.actorOf(ClusterMonitorSupervisor.props(monitorConfig, dataprocConfig, gdDAO, googleComputeDAO, googleIamDAO, googleStorageDAO, dbRef, authProvider, autoFreezeConfig, jupyterDAO, leonardoService))
90+
val clusterMonitorSupervisor = system.actorOf(ClusterMonitorSupervisor.props(monitorConfig, dataprocConfig, clusterBucketConfig, gdDAO, googleComputeDAO, googleIamDAO, googleStorageDAO, dbRef, authProvider, autoFreezeConfig, jupyterDAO, leonardoService))
9091
val zombieClusterMonitor = system.actorOf(ZombieClusterMonitor.props(zombieClusterMonitorConfig, gdDAO, googleProjectDAO, dbRef))
9192
}
9293

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
package org.broadinstitute.dsde.workbench.leonardo.config
2+
3+
import scala.concurrent.duration.FiniteDuration
4+
5+
case class ClusterBucketConfig(
6+
stagingBucketExpiration: FiniteDuration
7+
)

src/main/scala/org/broadinstitute/dsde/workbench/leonardo/config/package.scala

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,4 +125,10 @@ package object config {
125125
config.getBoolean("backLeo")
126126
)
127127
}
128+
129+
implicit val clusterBucketConfig: ValueReader[ClusterBucketConfig] = ValueReader.relative { config =>
130+
ClusterBucketConfig(
131+
toScalaDuration(config.getDuration("stagingBucketExpiration"))
132+
)
133+
}
128134
}

src/main/scala/org/broadinstitute/dsde/workbench/leonardo/db/ClusterComponent.scala

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,18 @@ trait ClusterComponent extends LeoComponent {
245245
.map { recs => recs.headOption.flatten.flatMap(head => parseGcsPath(head).toOption) }
246246
}
247247

248+
def getStagingBucket(project: GoogleProject, name: ClusterName): DBIO[Option[GcsPath]] = {
249+
250+
clusterQuery
251+
.filter { _.googleProject === project.value }
252+
.filter { _.clusterName === name.value }
253+
.map(_.stagingBucket)
254+
.result
255+
// staging bucket is saved as a bucket name rather than a path
256+
.map { recs => recs.headOption.flatten.flatMap(head => parseGcsPath("gs://" + head + "/").toOption)
257+
}
258+
}
259+
248260
def getServiceAccountKeyId(project: GoogleProject, name: ClusterName): DBIO[Option[ServiceAccountKeyId]] = {
249261
clusterQuery
250262
.filter { _.googleProject === project.value }

src/main/scala/org/broadinstitute/dsde/workbench/leonardo/monitor/ClusterMonitorActor.scala

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,18 @@
11
package org.broadinstitute.dsde.workbench.leonardo.monitor
22

33
import java.time.Instant
4+
import java.time.temporal.ChronoUnit
45

56
import akka.actor.Status.Failure
6-
import akka.actor.{Actor, ActorSystem, Props}
7+
import akka.actor.{Actor, Props}
78
import akka.pattern.pipe
89
import cats.data.OptionT
910
import cats.implicits._
1011
import com.google.api.client.googleapis.json.GoogleJsonResponseException
1112
import com.typesafe.scalalogging.LazyLogging
1213
import io.grpc.Status.Code
1314
import org.broadinstitute.dsde.workbench.google.{GoogleIamDAO, GoogleStorageDAO}
14-
import org.broadinstitute.dsde.workbench.leonardo.config.{DataprocConfig, MonitorConfig}
15+
import org.broadinstitute.dsde.workbench.leonardo.config.{DataprocConfig, MonitorConfig, ClusterBucketConfig}
1516
import org.broadinstitute.dsde.workbench.leonardo.dao.JupyterDAO
1617
import org.broadinstitute.dsde.workbench.leonardo.dao.google.{GoogleComputeDAO, GoogleDataprocDAO}
1718
import org.broadinstitute.dsde.workbench.leonardo.db.DbReference
@@ -20,6 +21,7 @@ import org.broadinstitute.dsde.workbench.leonardo.model.google.ClusterStatus._
2021
import org.broadinstitute.dsde.workbench.leonardo.model.google.{ClusterStatus, IP, _}
2122
import org.broadinstitute.dsde.workbench.leonardo.monitor.ClusterMonitorActor._
2223
import org.broadinstitute.dsde.workbench.leonardo.monitor.ClusterMonitorSupervisor.{ClusterDeleted, ClusterSupervisorMessage, RemoveFromList}
24+
import org.broadinstitute.dsde.workbench.model.google.GcsLifecycleTypes
2325
import org.broadinstitute.dsde.workbench.util.{Retry, addJitter}
2426
import slick.dbio.DBIOAction
2527

@@ -31,8 +33,8 @@ object ClusterMonitorActor {
3133
/**
3234
* Creates a Props object used for creating a {{{ClusterMonitorActor}}}.
3335
*/
34-
def props(cluster: Cluster, monitorConfig: MonitorConfig, dataprocConfig: DataprocConfig, gdDAO: GoogleDataprocDAO, googleComputeDAO: GoogleComputeDAO, googleIamDAO: GoogleIamDAO, googleStorageDAO: GoogleStorageDAO, dbRef: DbReference, authProvider: LeoAuthProvider, jupyterProxyDAO: JupyterDAO): Props =
35-
Props(new ClusterMonitorActor(cluster, monitorConfig, dataprocConfig, gdDAO, googleComputeDAO, googleIamDAO, googleStorageDAO, dbRef, authProvider, jupyterProxyDAO))
36+
def props(cluster: Cluster, monitorConfig: MonitorConfig, dataprocConfig: DataprocConfig, clusterBucketConfig: ClusterBucketConfig, gdDAO: GoogleDataprocDAO, googleComputeDAO: GoogleComputeDAO, googleIamDAO: GoogleIamDAO, googleStorageDAO: GoogleStorageDAO, dbRef: DbReference, authProvider: LeoAuthProvider, jupyterProxyDAO: JupyterDAO): Props =
37+
Props(new ClusterMonitorActor(cluster, monitorConfig, dataprocConfig, clusterBucketConfig, gdDAO, googleComputeDAO, googleIamDAO, googleStorageDAO, dbRef, authProvider, jupyterProxyDAO))
3638

3739
// ClusterMonitorActor messages:
3840

@@ -58,6 +60,7 @@ object ClusterMonitorActor {
5860
class ClusterMonitorActor(val cluster: Cluster,
5961
val monitorConfig: MonitorConfig,
6062
val dataprocConfig: DataprocConfig,
63+
val clusterBucketConfig: ClusterBucketConfig,
6164
val gdDAO: GoogleDataprocDAO,
6265
val googleComputeDAO: GoogleComputeDAO,
6366
val googleIamDAO: GoogleIamDAO,
@@ -230,6 +233,9 @@ class ClusterMonitorActor(val cluster: Cluster,
230233
// delete the init bucket so we don't continue to accrue costs after cluster is deleted
231234
_ <- deleteInitBucket
232235

236+
// set the staging bucket to be deleted in ten days so that logs are still accessible until then
237+
_ <- setStagingBucketLifecycle
238+
233239
// delete instances in the DB
234240
_ <- persistInstances(Set.empty)
235241

@@ -395,14 +401,28 @@ class ClusterMonitorActor(val cluster: Cluster,
395401
dbRef.inTransaction { dataAccess =>
396402
dataAccess.clusterQuery.getInitBucket(cluster.googleProject, cluster.clusterName)
397403
} flatMap {
398-
case None => Future.successful( logger.warn(s"Could not lookup bucket for cluster ${cluster.projectNameString}: cluster not in db") )
404+
case None => Future.successful( logger.warn(s"Could not lookup init bucket for cluster ${cluster.projectNameString}: cluster not in db") )
399405
case Some(bucketPath) =>
400406
googleStorageDAO.deleteBucket(bucketPath.bucketName, recurse = true) map { _ =>
401407
logger.debug(s"Deleted init bucket $bucketPath for cluster ${cluster.googleProject}/${cluster.clusterName}")
402408
}
403409
}
404410
}
405411

412+
private def setStagingBucketLifecycle: Future[Unit] = {
413+
// Get the staging bucket path for this cluster, then set the age for it to be deleted the specified number of days after the deletion of the cluster.
414+
dbRef.inTransaction { dataAccess =>
415+
dataAccess.clusterQuery.getStagingBucket(cluster.googleProject, cluster.clusterName)
416+
} flatMap {
417+
case None => Future.successful( logger.warn(s"Could not lookup staging bucket for cluster ${cluster.projectNameString}: cluster not in db") )
418+
case Some(bucketPath) =>
419+
val ageToDelete = cluster.auditInfo.createdDate.until(Instant.now(), ChronoUnit.DAYS).toInt + clusterBucketConfig.stagingBucketExpiration.toDays.toInt
420+
googleStorageDAO.setBucketLifecycle(bucketPath.bucketName, ageToDelete, GcsLifecycleTypes.Delete) map { _ =>
421+
logger.debug(s"Set staging bucket $bucketPath for cluster ${cluster.googleProject}/${cluster.clusterName} to be deleted in ${ageToDelete} days.")
422+
}
423+
}
424+
}
425+
406426
private def removeCredentialsFromMetadata: Future[Unit] = {
407427
cluster.serviceAccountInfo.notebookServiceAccount match {
408428
// No notebook service account: don't remove creds from metadata! We need them.

src/main/scala/org/broadinstitute/dsde/workbench/leonardo/monitor/ClusterMonitorSupervisor.scala

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import akka.actor.SupervisorStrategy.Restart
44
import akka.actor.{Actor, ActorRef, OneForOneStrategy, Props, Timers}
55
import com.typesafe.scalalogging.LazyLogging
66
import org.broadinstitute.dsde.workbench.google.{GoogleIamDAO, GoogleStorageDAO}
7-
import org.broadinstitute.dsde.workbench.leonardo.config.{AutoFreezeConfig, DataprocConfig, MonitorConfig}
7+
import org.broadinstitute.dsde.workbench.leonardo.config.{AutoFreezeConfig, ClusterBucketConfig, DataprocConfig, MonitorConfig}
88
import org.broadinstitute.dsde.workbench.leonardo.dao.JupyterDAO
99
import org.broadinstitute.dsde.workbench.leonardo.dao.google.{GoogleComputeDAO, GoogleDataprocDAO}
1010
import org.broadinstitute.dsde.workbench.leonardo.db.DbReference
@@ -19,8 +19,8 @@ import scala.concurrent.{ExecutionContext, Future}
1919
import scala.util.{Failure, Success}
2020

2121
object ClusterMonitorSupervisor {
22-
def props(monitorConfig: MonitorConfig, dataprocConfig: DataprocConfig, gdDAO: GoogleDataprocDAO, googleComputeDAO: GoogleComputeDAO, googleIamDAO: GoogleIamDAO, googleStorageDAO: GoogleStorageDAO, dbRef: DbReference, authProvider: LeoAuthProvider, autoFreezeConfig: AutoFreezeConfig, jupyterProxyDAO: JupyterDAO, leonardoService: LeonardoService): Props =
23-
Props(new ClusterMonitorSupervisor(monitorConfig, dataprocConfig, gdDAO, googleComputeDAO, googleIamDAO, googleStorageDAO, dbRef, authProvider, autoFreezeConfig, jupyterProxyDAO, leonardoService))
22+
def props(monitorConfig: MonitorConfig, dataprocConfig: DataprocConfig, clusterBucketConfig: ClusterBucketConfig, gdDAO: GoogleDataprocDAO, googleComputeDAO: GoogleComputeDAO, googleIamDAO: GoogleIamDAO, googleStorageDAO: GoogleStorageDAO, dbRef: DbReference, authProvider: LeoAuthProvider, autoFreezeConfig: AutoFreezeConfig, jupyterProxyDAO: JupyterDAO, leonardoService: LeonardoService): Props =
23+
Props(new ClusterMonitorSupervisor(monitorConfig, dataprocConfig, clusterBucketConfig, gdDAO, googleComputeDAO, googleIamDAO, googleStorageDAO, dbRef, authProvider, autoFreezeConfig, jupyterProxyDAO, leonardoService))
2424

2525
sealed trait ClusterSupervisorMessage
2626

@@ -49,7 +49,7 @@ object ClusterMonitorSupervisor {
4949
private case object CheckForClusters extends ClusterSupervisorMessage
5050
}
5151

52-
class ClusterMonitorSupervisor(monitorConfig: MonitorConfig, dataprocConfig: DataprocConfig, gdDAO: GoogleDataprocDAO, googleComputeDAO: GoogleComputeDAO, googleIamDAO: GoogleIamDAO, googleStorageDAO: GoogleStorageDAO, dbRef: DbReference, authProvider: LeoAuthProvider, autoFreezeConfig: AutoFreezeConfig, jupyterProxyDAO: JupyterDAO, leonardoService: LeonardoService)
52+
class ClusterMonitorSupervisor(monitorConfig: MonitorConfig, dataprocConfig: DataprocConfig, clusterBucketConfig: ClusterBucketConfig, gdDAO: GoogleDataprocDAO, googleComputeDAO: GoogleComputeDAO, googleIamDAO: GoogleIamDAO, googleStorageDAO: GoogleStorageDAO, dbRef: DbReference, authProvider: LeoAuthProvider, autoFreezeConfig: AutoFreezeConfig, jupyterProxyDAO: JupyterDAO, leonardoService: LeonardoService)
5353
extends Actor with Timers with LazyLogging {
5454
import context.dispatcher
5555

@@ -149,7 +149,7 @@ class ClusterMonitorSupervisor(monitorConfig: MonitorConfig, dataprocConfig: Dat
149149
}
150150

151151
def createChildActor(cluster: Cluster): ActorRef = {
152-
context.actorOf(ClusterMonitorActor.props(cluster, monitorConfig, dataprocConfig, gdDAO, googleComputeDAO, googleIamDAO, googleStorageDAO, dbRef, authProvider, jupyterProxyDAO))
152+
context.actorOf(ClusterMonitorActor.props(cluster, monitorConfig, dataprocConfig, clusterBucketConfig, gdDAO, googleComputeDAO, googleIamDAO, googleStorageDAO, dbRef, authProvider, jupyterProxyDAO))
153153
}
154154

155155
def startClusterMonitorActor(cluster: Cluster, watchMessageOpt: Option[ClusterSupervisorMessage] = None): Unit = {

src/test/scala/org/broadinstitute/dsde/workbench/leonardo/CommonTestData.scala

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ import net.ceedubs.ficus.Ficus._
99
import org.broadinstitute.dsde.workbench.google.mock.MockGoogleDataprocDAO
1010
import org.broadinstitute.dsde.workbench.leonardo.auth.WhitelistAuthProvider
1111
import org.broadinstitute.dsde.workbench.leonardo.auth.sam.MockPetClusterServiceAccountProvider
12-
import org.broadinstitute.dsde.workbench.leonardo.config.{AutoFreezeConfig, ClusterDefaultsConfig, ClusterDnsCacheConfig, ClusterFilesConfig, ClusterResourcesConfig, DataprocConfig, MonitorConfig, ProxyConfig, SwaggerConfig, ZombieClusterConfig}
12+
import org.broadinstitute.dsde.workbench.leonardo.config.{AutoFreezeConfig, ClusterBucketConfig, ClusterDefaultsConfig, ClusterDnsCacheConfig, ClusterFilesConfig, ClusterResourcesConfig, DataprocConfig, MonitorConfig, ProxyConfig, SwaggerConfig, ZombieClusterConfig}
1313
import org.broadinstitute.dsde.workbench.leonardo.dao.google.MockGoogleComputeDAO
1414
import org.broadinstitute.dsde.workbench.leonardo.dao.{MockJupyterDAO, MockSamDAO}
1515
import org.broadinstitute.dsde.workbench.leonardo.db.TestComponent
@@ -60,6 +60,7 @@ trait CommonTestData{ this: ScalaFutures =>
6060
val clusterUrlBase = dataprocConfig.clusterUrlBase
6161
val serviceAccountsConfig = config.getConfig("serviceAccounts.config")
6262
val monitorConfig = config.as[MonitorConfig]("monitor")
63+
val clusterBucketConfig = config.as[ClusterBucketConfig]("clusterBucket")
6364
val contentSecurityPolicy = config.as[Option[String]]("jupyterConfig.contentSecurityPolicy").getOrElse("default-src: 'self'")
6465
val mockJupyterDAO = new MockJupyterDAO
6566
val singleNodeDefaultMachineConfig = MachineConfig(Some(clusterDefaultsConfig.numberOfWorkers), Some(clusterDefaultsConfig.masterMachineType), Some(clusterDefaultsConfig.masterDiskSize))
@@ -85,7 +86,7 @@ trait CommonTestData{ this: ScalaFutures =>
8586
val rstudioImage = ClusterImage(RStudio, "rocker/tidyverse:latest", Instant.now)
8687

8788
def makeDataprocInfo(index: Int): DataprocInfo = {
88-
DataprocInfo(Option(UUID.randomUUID()), Option(OperationName("operationName" + index.toString)), Option(GcsBucketName("stagingBucketName" + index.toString)), Some(IP("numbers.and.dots")))
89+
DataprocInfo(Option(UUID.randomUUID()), Option(OperationName("operationName" + index.toString)), Option(GcsBucketName("stagingbucketname" + index.toString)), Some(IP("numbers.and.dots")))
8990
}
9091

9192
def makeCluster(index: Int): Cluster = {

src/test/scala/org/broadinstitute/dsde/workbench/leonardo/model/LeonardoModelSpec.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ class LeonardoModelSpec extends TestComponent with FlatSpecLike with Matchers wi
5252
| "createdDate": "2018-08-07T10:12:35Z",
5353
| "labels": {},
5454
| "jupyterExtensionUri": "gs://extension_bucket/extension_path",
55-
| "stagingBucket": "stagingBucketName1",
55+
| "stagingBucket": "stagingbucketname1",
5656
| "errors": [],
5757
| "instances": [],
5858
| "dateAccessed": "2018-08-07T10:12:35Z",

0 commit comments

Comments
 (0)