Skip to content

Commit

Permalink
cd: workflow for deploying ml pipeline (#64)
Browse files Browse the repository at this point in the history
* cd: workflow for deploying ml pipeline

* reset database on every run

* remove gpu usage for now because we do not have quota

* pass correct chromadb host to pipeline
  • Loading branch information
Charlie-XIAO authored Dec 7, 2024
1 parent 91bb3ce commit efca2b0
Show file tree
Hide file tree
Showing 33 changed files with 828 additions and 447 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/deploy-app.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,14 @@ jobs:
make gh-actions command=./deploy-app.sh
rm /tmp/veritas-trial-service.json
- name: Create PR to update Docker tag
- name: Create PR
id: create-pr
uses: peter-evans/create-pull-request@v7
with:
token: ${{ secrets.CHARLIE_XIAO_PAT }}
labels: app, ci/cd
title: "cd: Update Docker tag for app"
commit-message: "cd: Update Docker tag for app"
title: "cd: updates of app deployment"
commit-message: "cd: updates of app deployment"
author: "github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>"
committer: "github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>"
body: |
Expand Down
62 changes: 62 additions & 0 deletions .github/workflows/deploy-chromadb.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
name: Deploy ChromaDB

on:
workflow_dispatch:

jobs:
deploy-chromadb:
runs-on: ubuntu-latest
permissions:
contents: write
pull-requests: write

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Google authentication
uses: google-github-actions/auth@v2
with:
credentials_json: ${{ secrets.GOOGLE_APPLICATION_CREDENTIALS }}

- name: Set up gcloud CLI
uses: google-github-actions/setup-gcloud@v2

- name: Configure Docker client
run: gcloud auth configure-docker

- name: Build deployment container
run: cd deploy && make build

# Redeploying the ChromaDB instance would require redeploying the pipeline
# and the app as well
- name: Deploy ChromaDB
env:
VERITAS_TRIAL_SERVICE_CREDENTIALS: /tmp/veritas-trial-service.json
run: |
cd deploy
printf "%s" "${{ secrets.VERITAS_TRIAL_SERVICE_KEY }}" > /tmp/veritas-trial-service.json
make gh-actions command=./deploy-chromadb.sh
make gh-actions command=./deploy-pipeline.sh
make gh-actions command=./deploy-app.sh
rm /tmp/veritas-trial-service.json
- name: Create PR
id: create-pr
uses: peter-evans/create-pull-request@v7
with:
token: ${{ secrets.CHARLIE_XIAO_PAT }}
labels: app, ci/cd
title: "cd: updates of chromadb deployment"
commit-message: "cd: updates of chromadb deployment"
author: "github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>"
committer: "github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>"
body: |
This PR was triggered by @${{ github.actor }} in ${{ github.workflow }}.
Check the workflow run at: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
- name: Enable auto-merge
run: gh pr merge --squash --auto "$PR_NUMBER"
env:
GH_TOKEN: ${{ secrets.CHARLIE_XIAO_PAT }}
PR_NUMBER: ${{ steps.create-pr.outputs.pull-request-number }}
58 changes: 58 additions & 0 deletions .github/workflows/deploy-pipeline.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
name: Deploy pipeline

on:
workflow_dispatch:

jobs:
deploy-pipeline:
runs-on: ubuntu-latest
permissions:
contents: write
pull-requests: write

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Google authentication
uses: google-github-actions/auth@v2
with:
credentials_json: ${{ secrets.GOOGLE_APPLICATION_CREDENTIALS }}

- name: Set up gcloud CLI
uses: google-github-actions/setup-gcloud@v2

- name: Configure Docker client
run: gcloud auth configure-docker

- name: Build deployment container
run: cd deploy && make build

- name: Deploy pipeline
env:
VERITAS_TRIAL_SERVICE_CREDENTIALS: /tmp/veritas-trial-service.json
run: |
cd deploy
printf "%s" "${{ secrets.VERITAS_TRIAL_SERVICE_KEY }}" > /tmp/veritas-trial-service.json
make gh-actions command=./deploy-pipeline.sh
rm /tmp/veritas-trial-service.json
- name: Create PR
id: create-pr
uses: peter-evans/create-pull-request@v7
with:
token: ${{ secrets.CHARLIE_XIAO_PAT }}
labels: ci/cd
title: "cd: updates of pipeline deployment"
commit-message: "cd: updates of pipeline deployment"
author: "github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>"
committer: "github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>"
body: |
This PR was triggered by @${{ github.actor }} in ${{ github.workflow }}.
Check the workflow run at: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
- name: Enable auto-merge
run: gh pr merge --squash --auto "$PR_NUMBER"
env:
GH_TOKEN: ${{ secrets.CHARLIE_XIAO_PAT }}
PR_NUMBER: ${{ steps.create-pr.outputs.pull-request-number }}
6 changes: 4 additions & 2 deletions app/Makefile
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
all: build run

CHROMADB_HOST := $(shell cat $(PWD)/../deploy/chromadb/.instance-ip)

build:
docker-compose build
CHROMADB_HOST=$(CHROMADB_HOST) docker-compose build

run:
docker-compose up
CHROMADB_HOST=$(CHROMADB_HOST) docker-compose up

# [DEV] Update lock files and development environment on the host machine
devlockbackend:
Expand Down
11 changes: 1 addition & 10 deletions app/docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -1,25 +1,16 @@
services:
# ChromaDB
chromadb:
image: chromadb/chroma:latest
ports:
- "8000:8000"
volumes:
- chroma-data:/chroma/chroma
# Backend service
backend:
build:
context: ./backend
environment:
- GOOGLE_APPLICATION_CREDENTIALS=/secrets/veritas-trial-service.json
- FRONTEND_URL=http://localhost:8080
- CHROMADB_HOST=chromadb
- CHROMADB_HOST=${CHROMADB_HOST}
ports:
- "8001:8001"
volumes:
- ../secrets:/secrets
depends_on:
- chromadb
# Frontend service
frontend:
build:
Expand Down
1 change: 1 addition & 0 deletions deploy/.docker-tag-pipeline
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
20241206223116
4 changes: 4 additions & 0 deletions deploy/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@ RUN set -ex; \
python3.11 -m pip install ansible google-auth kubernetes requests && \
ansible-galaxy collection install community.docker google.cloud kubernetes.core

# Install other Python dependencies
RUN set -ex; \
python3.11 -m pip install google-cloud-aiplatform kfp

# Clean up
RUN set -ex; \
rm -rf /var/lib/apt/lists/*
Expand Down
10 changes: 10 additions & 0 deletions deploy/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,19 @@ run:
$(DOCKER_CMD) container run $(if $(command),,-it) --rm \
--env GOOGLE_APPLICATION_CREDENTIALS=/secrets/veritas-trial-deployment.json \
--env VERITAS_TRIAL_SERVICE_CREDENTIALS=/secrets/veritas-trial-service.json \
--env GCP_PIPELINE_SERVICE_ACCOUNT_EMAIL=veritas-trial-deployment@veritastrial.iam.gserviceaccount.com \
--env GCP_PROJECT_ID=veritastrial \
--env GCP_REGION=us-central1 \
--env GCP_ZONE=us-central1-a \
--volume $(PWD)/.docker-tag-app:/veritastrial/.docker-tag-app \
--volume $(PWD)/.docker-tag-pipeline:/veritastrial/.docker-tag-pipeline \
--volume $(PWD)/chromadb/.instance-ip:/veritastrial/chromadb/.instance-ip \
--volume $(PWD)/../secrets/veritas-trial-deployment.json:/secrets/veritas-trial-deployment.json:ro \
--volume $(PWD)/../secrets/veritas-trial-service.json:/secrets/veritas-trial-service.json:ro \
--volume $(PWD)/../app/backend:/app/backend:ro \
--volume $(PWD)/../app/frontend:/app/frontend:ro \
--volume $(PWD)/../src/data-pipeline:/src/data-pipeline:ro \
--volume $(PWD)/../src/embedding-model:/src/embedding-model:ro \
--volume /var/run/docker.sock:/var/run/docker.sock \
--name $(CONTAINER_NAME) $(IMAGE_NAME) $(command)

Expand All @@ -30,14 +35,19 @@ gh-actions:
$(DOCKER_CMD) container run --rm \
--env GOOGLE_APPLICATION_CREDENTIALS=/secrets/veritas-trial-deployment.json \
--env VERITAS_TRIAL_SERVICE_CREDENTIALS=/secrets/veritas-trial-service.json \
--env GCP_PIPELINE_SERVICE_ACCOUNT_EMAIL=veritas-trial-deployment@veritastrial.iam.gserviceaccount.com \
--env GCP_PROJECT_ID=veritastrial \
--env GCP_REGION=us-central1 \
--env GCP_ZONE=us-central1-a \
--volume $(PWD)/.docker-tag-app:/veritastrial/.docker-tag-app \
--volume $(PWD)/.docker-tag-pipeline:/veritastrial/.docker-tag-pipeline \
--volume $(PWD)/chromadb/.instance-ip:/veritastrial/chromadb/.instance-ip \
--volume $(GOOGLE_APPLICATION_CREDENTIALS):/secrets/veritas-trial-deployment.json:ro \
--volume $(VERITAS_TRIAL_SERVICE_CREDENTIALS):/secrets/veritas-trial-service.json:ro \
--volume $(PWD)/../app/backend:/app/backend:ro \
--volume $(PWD)/../app/frontend:/app/frontend:ro \
--volume $(PWD)/../src/data-pipeline:/src/data-pipeline:ro \
--volume $(PWD)/../src/embedding-model:/src/embedding-model:ro \
--volume /var/run/docker.sock:/var/run/docker.sock \
--name $(CONTAINER_NAME) $(IMAGE_NAME) $(command)

Expand Down
23 changes: 14 additions & 9 deletions deploy/README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Deploy

Make sure you have `veritas-trial-deployment.json` and `veritas-trial-service.json` under the `/secrets/` directory if you will deploy manually. Now enter this directory, then build and run the container:
The deployment commands are automated with GitHub Actions. It is preffered to trigger the corresponding workflow instead of running the command manually. The following are only for demonstration purposes. Make sure you have `veritas-trial-deployment.json` and `veritas-trial-service.json` under the `/secrets/` directory if you are deploying manually. Now enter this directory, then build and run the container:

```bash
make build
Expand All @@ -9,22 +9,27 @@ make run

## App

The deployment uses Ansible. Inside the container:
The deployment uses Ansible. It will deploy the Docker images of the application and create/update the Kubernetes cluster to run the application. Inside the container, run:

```bash
./deploy-app.sh # Deploy app images and K8S cluster
./destroy-app.sh # Destroy K8S cluster
./deploy-app.sh # Deploy app (preferred to trigger GitHub Actions workflow)
./destroy-app.sh # Destroy app
```

## Pipeline

The deployment uses Ansible and Vertex AI pipeline. It will deploy the Docker images of the pipeline and run `/src/data-pipeline/` and `/src/embedding-model/` steps. Inside the container, run:

```bash
./deploy-pipeline.sh # Deploy pipeline (preferred to trigger GitHub Actions workflow)
```

The deployment command is automated with GitHub Actions. It is preferred to trigger the corresponding workflow instead of running the command manually. The destruction command is not automated and needs to be run manually when necessary.

## ChromaDB

The deployment uses Terraform, as suggested in [ChromaDB docs](https://docs.trychroma.com/deployment/gcp). Inside the container:
The deployment uses Terraform, as suggested in [ChromaDB docs](https://docs.trychroma.com/deployment/gcp). It will deploy a VM instance that runs ChromaDB service. Note that redeploying ChromaDB requires redeploying the app and the pipeline as well. The following script will not do that, but the corresponding workflow in GitHub Actions will. Inside the container, run:

```bash
./deploy-chromadb.sh # Deploy ChromaDB instance
./deploy-chromadb.sh # Deploy ChromaDB instance (preferred to trigger GitHub Actions workflow)
./destroy-chromadb.sh # Destroy ChromaDB instance
```

Both commands are not automated and need to be run manually when necessary. After a new deployment, one must update the `CHROMADB_HOST` environment variable in the "Create backend deployment" step in `app/deploy-k8s.yaml` and trigger the app deployment workflow.
13 changes: 8 additions & 5 deletions deploy/app/deploy-k8s.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,13 @@
- name: Import GCP service account credentials as secret
when: cluster_state == "present"
shell: |
kubectl create secret generic veritas-trial-service-key \
--from-file=veritas-trial-service.json={{ veritas_trial_service_credentials }} \
--namespace="{{ cluster_name }}-namespace"
ignore_errors: true # Ignore "secret already exists" error
if kubectl get secret veritas-trial-service-key --namespace="{{ cluster_name }}-namespace"; then
echo "Secret already exists"
else
kubectl create secret generic veritas-trial-service-key \
--from-file=veritas-trial-service.json={{ veritas_trial_service_credentials }} \
--namespace="{{ cluster_name }}-namespace"
fi
- name: Create frontend deployment
when: cluster_state == "present"
Expand Down Expand Up @@ -157,7 +160,7 @@
- name: GOOGLE_APPLICATION_CREDENTIALS
value: /secrets/veritas-trial-service.json
- name: CHROMADB_HOST
value: 35.226.13.117
value: "{{ lookup('env', 'CHROMADB_HOST') }}"
- name: SERVER_ROOT_PATH
value: /api

Expand Down
1 change: 1 addition & 0 deletions deploy/chromadb/.instance-ip
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
35.226.13.117
2 changes: 1 addition & 1 deletion deploy/deploy-app.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/bash

ansible-playbook app/deploy-images.yaml -i inventory.yaml
ansible-playbook app/deploy-k8s.yaml -i inventory.yaml --extra-vars cluster_state=present
CHROMADB_HOST=$(cat chromadb/.instance-ip) ansible-playbook app/deploy-k8s.yaml -i inventory.yaml --extra-vars cluster_state=present
1 change: 1 addition & 0 deletions deploy/deploy-chromadb.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ terraform import \
veritastrial/chroma-allow-ssh-http || true

terraform apply -var-file chroma.tfvars
echo "$(terraform output -raw chroma_instance_ip)" > .instance-ip
4 changes: 4 additions & 0 deletions deploy/deploy-pipeline.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash

ansible-playbook pipeline/deploy-images.yaml -i inventory.yaml
./pipeline/pipeline.py
2 changes: 2 additions & 0 deletions deploy/destroy-app.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#!/bin/bash

CHROMADB_HOST=$(cat chromadb/.instance-ip)

ansible-playbook app/deploy-k8s.yaml -i inventory.yaml --extra-vars cluster_state=absent
1 change: 1 addition & 0 deletions deploy/destroy-chromadb.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ terraform import \
veritastrial/chroma-allow-ssh-http || true

terraform destroy -var-file chroma.tfvars
echo -n > .instance-ip
58 changes: 58 additions & 0 deletions deploy/pipeline/deploy-images.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
- name: Build and push Docker images to Artifact Registry
hosts: localhost
gather_facts: false

vars:
registry_base: "{{ gcp_region }}-docker.pkg.dev/{{ gcp_project_id }}/docker"

tasks:
- name: Set timestamp as Docker image tag
shell: date +%Y%m%d%H%M%S
register: docker_tag

- name: Build data pipeline image
community.docker.docker_image:
build:
path: /src/data-pipeline
platform: linux/amd64/v2
name: "{{ registry_base }}/veritas-trial-data-pipeline:{{ docker_tag.stdout }}"
source: build

- name: Push data pipeline image
community.docker.docker_image:
name: "{{ registry_base }}/veritas-trial-data-pipeline:{{ docker_tag.stdout }}"
repository: "{{ registry_base }}/veritas-trial-data-pipeline:{{ docker_tag.stdout }}"
push: true
source: local

- name: Remove data pipeline image locally
community.docker.docker_image:
name: "{{ registry_base }}/veritas-trial-data-pipeline:{{ docker_tag.stdout }}"
source: local
state: absent
force_absent: true

- name: Build embedding model image
community.docker.docker_image:
build:
path: /src/embedding-model
platform: linux/amd64/v2
name: "{{ registry_base }}/veritas-trial-embedding-model:{{ docker_tag.stdout }}"
source: build

- name: Push embedding model image
community.docker.docker_image:
name: "{{ registry_base }}/veritas-trial-embedding-model:{{ docker_tag.stdout }}"
repository: "{{ registry_base }}/veritas-trial-embedding-model:{{ docker_tag.stdout }}"
push: true
source: local

- name: Remove embedding model image locally
community.docker.docker_image:
name: "{{ registry_base }}/veritas-trial-embedding-model:{{ docker_tag.stdout }}"
source: local
state: absent
force_absent: true

- name: Save Docker tag
shell: echo {{ docker_tag.stdout }} > ../.docker-tag-pipeline
Loading

0 comments on commit efca2b0

Please sign in to comment.