Feathr E2E Tests #5163
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Feathr E2E Tests | |
on: | |
push: | |
branches: [main] | |
paths-ignore: | |
- "docs/**" | |
- "ui/**" | |
- "**/README.md" | |
pull_request_target: | |
branches: [main] | |
types: [labeled, edited, opened, synchronize] | |
# disable below 'paths-ignore' to guarantee 'test_status' job can be triggered | |
# with this 'paths-ignore', test jobs needed by 'test_status' will be ignored | |
# please do not set 'safe to test' label for changes in below paths | |
# paths-ignore: | |
# - "docs/**" | |
# - "ui/**" | |
# - "**/README.md" | |
schedule: | |
# Runs daily at 1 PM UTC (9 PM CST), will send notification to TEAMS_WEBHOOK | |
- cron: '00 13 * * *' | |
# Cancel any in progress workflow from the same PR, branch or tag when a new workflow is triggered | |
# Ref: https://stackoverflow.com/questions/66335225/how-to-cancel-previous-runs-in-the-pr-when-you-push-new-commitsupdate-the-curre | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} | |
cancel-in-progress: true | |
jobs: | |
paths_filter: | |
runs-on: ubuntu-latest | |
if: github.event_name == 'pull_request_target' | |
outputs: | |
# output1 - if changes for a PR contain source files | |
output1: ${{ steps.filter_src.outputs.src }} | |
# output2 - if changes for a PR contain registry files | |
output2: ${{ steps.filter_registry.outputs.registry }} | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v3 | |
- uses: dorny/paths-filter@v2 | |
id: filter_src | |
with: | |
filters: | | |
src: | |
- "!((registry|docs|ui)/**)**" | |
- uses: dorny/paths-filter@v2 | |
id: filter_registry | |
with: | |
filters: | | |
registry: | |
- "registry/**" | |
- name: client tests | |
if: steps.filter_src.outputs.src == 'true' | |
run: echo "Changes of this PR contain client source files. Need to set label 'safe to test'." | |
- name: registry tests | |
if: steps.filter_registry.outputs.registry == 'true' | |
run: echo "Changes of this PR contain registry files. Need to set label 'registry test'." | |
- name: no tests | |
if: steps.filter_registry.outputs.registry != 'true' && steps.filter_src.outputs.src != 'true' | |
run: echo "Changes of this PR do not contain registry files or client source files. Do not set labels 'safe to test' or 'registry test'" | |
databricks_test: | |
runs-on: ubuntu-latest | |
if: github.event_name == 'schedule' || github.event_name == 'push' || github.event_name == 'pull_request' || (github.event_name == 'pull_request_target' && contains(github.event.pull_request.labels.*.name, 'safe to test')) | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v2 | |
with: | |
ref: ${{ github.event.pull_request.head.sha }} | |
- name: Set up JDK 8 | |
uses: actions/setup-java@v2 | |
with: | |
java-version: "8" | |
distribution: "temurin" | |
- name: Gradle build | |
run: | | |
./gradlew build | |
# remote folder for CI upload | |
echo "CI_SPARK_REMOTE_JAR_FOLDER=feathr_jar_github_action_$(date +"%H_%M_%S")" >> $GITHUB_ENV | |
# get local jar name without paths so version change won't affect it | |
echo "FEATHR_LOCAL_JAR_NAME=$(ls build/libs/*.jar| xargs -n 1 basename)" >> $GITHUB_ENV | |
# get local jar name without path | |
echo "FEATHR_LOCAL_JAR_FULL_NAME_PATH=$(ls build/libs/*.jar)" >> $GITHUB_ENV | |
- name: Set up Python 3.8 | |
uses: actions/setup-python@v2 | |
with: | |
python-version: 3.8 | |
- name: Install Feathr Package | |
run: | | |
python -m pip install --upgrade pip | |
python -m pip install -e ./feathr_project/[all] | |
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi | |
- name: Set env variable and upload jars | |
env: | |
# set this for databricks CLI | |
DATABRICKS_HOST: ${{secrets.DATABRICKS_HOST}} | |
DATABRICKS_TOKEN: ${{secrets.DATABRICKS_WORKSPACE_TOKEN_VALUE}} | |
run: | | |
# overwrite corresponding environment variables to utilize feathr to upload the files | |
# assuming there will be only one jar in the target folder | |
databricks fs cp ${{ env.FEATHR_LOCAL_JAR_FULL_NAME_PATH}} dbfs:/${{ env.CI_SPARK_REMOTE_JAR_FOLDER}}/${{ env.FEATHR_LOCAL_JAR_NAME}} --overwrite | |
- name: Run Feathr with Databricks | |
env: | |
PROJECT_CONFIG__PROJECT_NAME: "feathr_github_ci_databricks" | |
SPARK_CONFIG__SPARK_CLUSTER: databricks | |
SPARK_CONFIG__DATABRICKS__WORKSPACE_INSTANCE_URL: ${{secrets.DATABRICKS_HOST}} | |
DATABRICKS_WORKSPACE_TOKEN_VALUE: ${{secrets.DATABRICKS_WORKSPACE_TOKEN_VALUE}} | |
SPARK_CONFIG__DATABRICKS__CONFIG_TEMPLATE: '{"run_name":"FEATHR_FILL_IN","new_cluster":{"spark_version":"11.3.x-scala2.12","num_workers":1,"spark_conf":{"FEATHR_FILL_IN":"FEATHR_FILL_IN"},"instance_pool_id":"${{secrets.DATABRICKS_INSTANCE_POOL_ID}}"},"libraries":[{"jar":"FEATHR_FILL_IN"}],"spark_jar_task":{"main_class_name":"FEATHR_FILL_IN","parameters":["FEATHR_FILL_IN"]}}' | |
REDIS_PASSWORD: ${{secrets.REDIS_PASSWORD}} | |
AZURE_CLIENT_ID: ${{secrets.AZURE_CLIENT_ID}} | |
AZURE_TENANT_ID: ${{secrets.AZURE_TENANT_ID}} | |
AZURE_CLIENT_SECRET: ${{secrets.AZURE_CLIENT_SECRET}} | |
S3_ACCESS_KEY: ${{secrets.S3_ACCESS_KEY}} | |
S3_SECRET_KEY: ${{secrets.S3_SECRET_KEY}} | |
ADLS_ACCOUNT: ${{secrets.ADLS_ACCOUNT}} | |
ADLS_KEY: ${{secrets.ADLS_KEY}} | |
BLOB_ACCOUNT: ${{secrets.BLOB_ACCOUNT}} | |
BLOB_KEY: ${{secrets.BLOB_KEY}} | |
JDBC_SF_PASSWORD: ${{secrets.JDBC_SF_PASSWORD}} | |
KAFKA_SASL_JAAS_CONFIG: ${{secrets.KAFKA_SASL_JAAS_CONFIG}} | |
MONITORING_DATABASE_SQL_PASSWORD: ${{secrets.MONITORING_DATABASE_SQL_PASSWORD}} | |
SPARK_CONFIG__DATABRICKS__FEATHR_RUNTIME_LOCATION: dbfs:/${{ env.CI_SPARK_REMOTE_JAR_FOLDER}}/${{ env.FEATHR_LOCAL_JAR_NAME}} | |
COSMOS1_KEY: ${{secrets.COSMOS1_KEY}} | |
SQL1_USER: ${{secrets.SQL1_USER}} | |
SQL1_PASSWORD: ${{secrets.SQL1_PASSWORD}} | |
run: | | |
# run only test with databricks. run in 6 parallel jobs | |
pytest -n 6 --cov-report term-missing --cov=feathr_project/feathr feathr_project/test --cov-config=.github/workflows/.coveragerc_db --cov-fail-under=70 | |
azure_synapse_test: | |
# might be a bit duplication to setup both the azure_synapse test and databricks test, but for now we will keep those to accelerate the test speed | |
runs-on: ubuntu-latest | |
if: github.event_name == 'schedule' || github.event_name == 'push' || github.event_name == 'pull_request' || (github.event_name == 'pull_request_target' && contains(github.event.pull_request.labels.*.name, 'safe to test')) | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v2 | |
with: | |
ref: ${{ github.event.pull_request.head.sha }} | |
- name: Set up JDK 8 | |
uses: actions/setup-java@v2 | |
with: | |
java-version: "8" | |
distribution: "temurin" | |
- name: Gradle build | |
run: | | |
./gradlew build | |
# remote folder for CI upload | |
echo "CI_SPARK_REMOTE_JAR_FOLDER=feathr_jar_github_action_$(date +"%H_%M_%S")" >> $GITHUB_ENV | |
# get local jar name without paths so version change won't affect it | |
echo "FEATHR_LOCAL_JAR_NAME=$(ls build/libs/*.jar| xargs -n 1 basename)" >> $GITHUB_ENV | |
# get local jar name without path | |
echo "FEATHR_LOCAL_JAR_FULL_NAME_PATH=$(ls build/libs/*.jar)" >> $GITHUB_ENV | |
- name: Set up Python 3.8 | |
uses: actions/setup-python@v2 | |
with: | |
python-version: 3.8 | |
- name: Azure Blob Storage Upload (Overwrite) | |
uses: fixpoint/azblob-upload-artifact@v4 | |
with: | |
connection-string: ${{secrets.SPARK_JAR_BLOB_CONNECTION_STRING}} | |
name: ${{ env.CI_SPARK_REMOTE_JAR_FOLDER}} | |
path: ${{ env.FEATHR_LOCAL_JAR_FULL_NAME_PATH}} | |
container: ${{secrets.SPARK_JAR_BLOB_CONTAINER}} | |
cleanup: "true" | |
- name: Install Feathr Package | |
run: | | |
python -m pip install --upgrade pip | |
python -m pip install -e ./feathr_project/[all] | |
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi | |
- name: Run Feathr with Azure Synapse | |
env: | |
PROJECT_CONFIG__PROJECT_NAME: "feathr_github_ci_synapse" | |
SPARK_CONFIG__SPARK_CLUSTER: azure_synapse | |
REDIS_PASSWORD: ${{secrets.REDIS_PASSWORD}} | |
AZURE_CLIENT_ID: ${{secrets.AZURE_CLIENT_ID}} | |
AZURE_TENANT_ID: ${{secrets.AZURE_TENANT_ID}} | |
AZURE_CLIENT_SECRET: ${{secrets.AZURE_CLIENT_SECRET}} | |
S3_ACCESS_KEY: ${{secrets.S3_ACCESS_KEY}} | |
S3_SECRET_KEY: ${{secrets.S3_SECRET_KEY}} | |
ADLS_ACCOUNT: ${{secrets.ADLS_ACCOUNT}} | |
ADLS_KEY: ${{secrets.ADLS_KEY}} | |
BLOB_ACCOUNT: ${{secrets.BLOB_ACCOUNT}} | |
BLOB_KEY: ${{secrets.BLOB_KEY}} | |
JDBC_TABLE: ${{secrets.JDBC_TABLE}} | |
JDBC_USER: ${{secrets.JDBC_USER}} | |
JDBC_PASSWORD: ${{secrets.JDBC_PASSWORD}} | |
JDBC_DRIVER: ${{secrets.JDBC_DRIVER}} | |
JDBC_SF_PASSWORD: ${{secrets.JDBC_SF_PASSWORD}} | |
KAFKA_SASL_JAAS_CONFIG: ${{secrets.KAFKA_SASL_JAAS_CONFIG}} | |
MONITORING_DATABASE_SQL_PASSWORD: ${{secrets.MONITORING_DATABASE_SQL_PASSWORD}} | |
SPARK_CONFIG__AZURE_SYNAPSE__FEATHR_RUNTIME_LOCATION: "abfss://${{secrets.SPARK_JAR_BLOB_CONTAINER}}@feathrazuretest3storage.dfs.core.windows.net/${{ env.CI_SPARK_REMOTE_JAR_FOLDER}}/${{ env.FEATHR_LOCAL_JAR_NAME}}" | |
COSMOS1_KEY: ${{secrets.COSMOS1_KEY}} | |
SQL1_USER: ${{secrets.SQL1_USER}} | |
SQL1_PASSWORD: ${{secrets.SQL1_PASSWORD}} | |
run: | | |
# skip databricks related test as we just ran the test; also seperate databricks and synapse test to make sure there's no write conflict | |
# run in 6 parallel jobs to make the time shorter | |
pytest -n 6 -m "not databricks" --cov-report term-missing --cov=feathr_project/feathr feathr_project/test --cov-config=.github/workflows/.coveragerc_sy --cov-fail-under=70 | |
local_spark_test: | |
runs-on: ubuntu-latest | |
if: github.event_name == 'schedule' || github.event_name == 'push' || github.event_name == 'pull_request' || (github.event_name == 'pull_request_target' && contains(github.event.pull_request.labels.*.name, 'safe to test')) | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v2 | |
with: | |
ref: ${{ github.event.pull_request.head.sha }} | |
- name: Set up JDK 8 | |
uses: actions/setup-java@v2 | |
with: | |
java-version: "8" | |
distribution: "temurin" | |
- name: Gradle build | |
run: | | |
./gradlew build | |
# remote folder for CI upload | |
echo "CI_SPARK_REMOTE_JAR_FOLDER=feathr_jar_github_action_$(date +"%H_%M_%S")" >> $GITHUB_ENV | |
# get local jar name without paths so version change won't affect it | |
echo "FEATHR_LOCAL_JAR_NAME=$(ls build/libs/*.jar| xargs -n 1 basename)" >> $GITHUB_ENV | |
# get local jar name without path | |
echo "FEATHR_LOCAL_JAR_FULL_NAME_PATH=$(ls build/libs/*.jar)" >> $GITHUB_ENV | |
- name: Set up Python 3.8 | |
uses: actions/setup-python@v2 | |
with: | |
python-version: 3.8 | |
- name: Install Feathr Package | |
run: | | |
python -m pip install --upgrade pip | |
python -m pip install -e ./feathr_project/[all] | |
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi | |
- name: Run Feathr with Local Spark | |
env: | |
PROJECT_CONFIG__PROJECT_NAME: "feathr_github_ci_local" | |
SPARK_CONFIG__SPARK_CLUSTER: local | |
REDIS_PASSWORD: ${{secrets.REDIS_PASSWORD}} | |
AZURE_CLIENT_ID: ${{secrets.AZURE_CLIENT_ID}} | |
AZURE_TENANT_ID: ${{secrets.AZURE_TENANT_ID}} | |
AZURE_CLIENT_SECRET: ${{secrets.AZURE_CLIENT_SECRET}} | |
S3_ACCESS_KEY: ${{secrets.S3_ACCESS_KEY}} | |
S3_SECRET_KEY: ${{secrets.S3_SECRET_KEY}} | |
ADLS_ACCOUNT: ${{secrets.ADLS_ACCOUNT}} | |
ADLS_KEY: ${{secrets.ADLS_KEY}} | |
BLOB_ACCOUNT: ${{secrets.BLOB_ACCOUNT}} | |
BLOB_KEY: ${{secrets.BLOB_KEY}} | |
JDBC_TABLE: ${{secrets.JDBC_TABLE}} | |
JDBC_USER: ${{secrets.JDBC_USER}} | |
JDBC_PASSWORD: ${{secrets.JDBC_PASSWORD}} | |
JDBC_DRIVER: ${{secrets.JDBC_DRIVER}} | |
JDBC_SF_PASSWORD: ${{secrets.JDBC_SF_PASSWORD}} | |
KAFKA_SASL_JAAS_CONFIG: ${{secrets.KAFKA_SASL_JAAS_CONFIG}} | |
MONITORING_DATABASE_SQL_PASSWORD: ${{secrets.MONITORING_DATABASE_SQL_PASSWORD}} | |
COSMOS1_KEY: ${{secrets.COSMOS1_KEY}} | |
SQL1_USER: ${{secrets.SQL1_USER}} | |
SQL1_PASSWORD: ${{secrets.SQL1_PASSWORD}} | |
run: | | |
# skip cloud related tests | |
pytest --cov-report term-missing --cov=feathr_project/feathr/spark_provider feathr_project/test/test_local_spark_e2e.py --cov-config=.github/workflows/.coveragerc_local | |
registry_test: | |
runs-on: ubuntu-latest | |
if: github.event_name == 'schedule' || github.event_name == 'push' || github.event_name == 'pull_request' || (github.event_name == 'pull_request_target' && contains(github.event.pull_request.labels.*.name, 'registry test')) | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v2 | |
with: | |
ref: ${{ github.event.pull_request.head.sha }} | |
- name: Set up JDK 8 | |
uses: actions/setup-java@v2 | |
with: | |
java-version: "8" | |
distribution: "temurin" | |
- name: Set up Python 3.8 | |
uses: actions/setup-python@v2 | |
with: | |
python-version: 3.8 | |
- name: Install Feathr Package | |
run: | | |
python -m pip install --upgrade pip | |
if [ -f ./registry/test/requirements.txt ]; then pip install -r ./registry/test/requirements.txt; fi | |
if [ -f ./registry/purview-registry/requirements.txt ]; then pip install -r ./registry/purview-registry/requirements.txt; fi | |
if [ -f ./registry/sql-registry/requirements.txt ]; then pip install -r ./registry/sql-registry/requirements.txt; fi | |
- name: Run Registry Test Cases | |
env: | |
AZURE_CLIENT_ID: ${{secrets.AZURE_CLIENT_ID}} | |
AZURE_TENANT_ID: ${{secrets.AZURE_TENANT_ID}} | |
AZURE_CLIENT_SECRET: ${{secrets.AZURE_CLIENT_SECRET}} | |
PURVIEW_NAME: "feathrazuretest3-purview1" | |
CONNECTION_STR: ${{secrets.CONNECTION_STR}} | |
run: | | |
pytest --cov-report term-missing --cov=registry/sql-registry/registry --cov-config=registry/test/.coveragerc registry/test/test_sql_registry.py --cov-fail-under=20 | |
pytest --cov-report term-missing --cov=registry/purview-registry/registry --cov-config=registry/test/.coveragerc registry/test/test_purview_registry.py --cov-fail-under=20 | |
client_test_status: | |
# The status used to mark if any required test jobs failed for a PR | |
# If it's failure, PR will be blocked | |
runs-on: ubuntu-latest | |
if: always() && github.event_name == 'pull_request_target' | |
needs: [databricks_test, azure_synapse_test, local_spark_test, paths_filter] | |
steps: | |
- name: Test status failure | |
if: contains(needs.*.result, 'failure') | |
run: | | |
echo "Merging is blocked because one or more test jobs failed." | |
exit 1 | |
- name: Required test cases skipped | |
if: contains(needs.*.result, 'skipped') && needs.paths_filter.outputs.output1 == 'true' | |
run: | | |
echo "Merging is blocked because some required test jobs didn't run. Please set the label 'safe to test'" | |
exit 1 | |
- name: Test status success | |
if: ${{ !contains(needs.*.result, 'failure') && !contains(needs.*.result, 'skipped') || ( contains(needs.*.result, 'skipped') && needs.paths_filter.outputs.output1 != 'true' )}} | |
run: | | |
echo "All required CI test jobs for client source passed." | |
registry_test_status: | |
# The status used to mark if any required registry test cases failed for a PR | |
# If it's failure, PR will be blocked | |
runs-on: ubuntu-latest | |
if: always() && github.event_name == 'pull_request_target' | |
needs: [registry_test, paths_filter] | |
steps: | |
- name: Test status failure | |
if: contains(needs.*.result, 'failure') | |
run: | | |
echo "Merging is blocked because the registry test job failed." | |
exit 1 | |
- name: Required test cases skipped | |
if: needs.registry_test.result == 'skipped' && needs.paths_filter.outputs.output2 == 'true' | |
run: | | |
echo "Merging is blocked because the required 'registry_test' jobs didn't run. Please set the label 'registry test'" | |
exit 1 | |
- name: Test status success | |
if: needs.registry_test.result == 'success' || needs.registry_test.result == 'skipped' && needs.paths_filter.outputs.output2 != 'true' | |
run: | | |
echo "All required CI test cases for registry passed." | |
failure_notification: | |
# If any failure, warning message will be sent | |
needs: [databricks_test, azure_synapse_test, local_spark_test, registry_test] | |
runs-on: ubuntu-latest | |
if: failure() && github.event_name == 'schedule' | |
steps: | |
- name: Warning | |
run: | | |
curl -H 'Content-Type: application/json' -d '{"text": "[WARNING] Daily CI has failure, please check: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' ${{ secrets.TEAMS_WEBHOOK }} | |
notification: | |
# Final Daily Report with all job status | |
needs: [databricks_test, azure_synapse_test, local_spark_test, registry_test] | |
runs-on: ubuntu-latest | |
if: always() && github.event_name == 'schedule' | |
steps: | |
- name: Get Date | |
run: echo "NOW=$(date +'%Y-%m-%d')" >> $GITHUB_ENV | |
- name: Notification | |
run: | | |
curl -H 'Content-Type: application/json' -d '{"text": "${{env.NOW}} Daily Report: 1. Databricks Test ${{needs.databricks_test.result}}, 2. Synapse Test ${{needs.azure_synapse_test.result}} , 3. Local Spark TEST ${{needs.local_spark_test.result}}. Link: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' ${{ secrets.TEAMS_WEBHOOK }} |