Skip to content

Commit

Permalink
Initialize example-actions
Browse files Browse the repository at this point in the history
  • Loading branch information
rdenholm authored Nov 26, 2024
0 parents commit 46e55fe
Show file tree
Hide file tree
Showing 19 changed files with 280 additions and 0 deletions.
6 changes: 6 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Set the default behavior, in case people don't have core.autocrlf set.
* text=auto

# ensure unix line endings on windows for files that need them.
*.sh eol=lf
codelists/* eol=lf
43 changes: 43 additions & 0 deletions .github/workflows/setup.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
name: Setup repository
on:
workflow_dispatch:
push:
branches: [main]
jobs:
setup:
name: Initialise OpenSAFELY project.
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Update README.md and remove action
shell: bash
run: |
export GITHUB_REPOSITORY_OWNER="$(echo $GITHUB_REPOSITORY | awk -F/ '{print $1}')"
export GITHUB_REPOSITORY_NAME="$(echo $GITHUB_REPOSITORY | awk -F/ '{print $2}')"
envsubst < README.md > tmp && mv tmp README.md
rm .github/workflows/setup.yaml
- name: Do not run on template repository
id: is_template
# The only way to trigger this to run when used as a template is on
# push to main. But that means it would also trigger when we push to
# the template repo itself, which we do not want. So, check if we are
# in a template repo
run: |
is_template=false
curl --silent -X GET \
-H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
-H "Accept: application/vnd.github.baptiste-preview+json" \
https://api.github.com/repos/$GITHUB_REPOSITORY \
| jq --exit-status '.is_template == false' || is_template=true
# output true/false so later actions can be skipped
echo "::set-output name=is_template::$is_template"
- name: Commit changes
# only actually commit the changes if this is not a template repo
if: steps.is_template.outputs.is_template == 'false'
run: |
# use the same author as the initial commit
git config user.email "$(git log -1 --pretty=format:'%ae')"
git config user.name "$(git log -1 --pretty=format:'%an')"
git add .
git commit --amend --no-edit
git push origin $GITHUB_REF --force
14 changes: 14 additions & 0 deletions .github/workflows/test_runner.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
name: Test that the project is runnable

on: [push, workflow_dispatch]
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
DOCKER_RO_TOKEN: ${{ secrets.DOCKER_RO_TOKEN }}
STATA_LICENSE: ${{ secrets.STATA_LICENSE }}
jobs:
test:
runs-on: ubuntu-latest
name: Test the project can run, using dummy data
steps:
- name: Test that the project is runnable
uses: opensafely-core/research-action@v1
11 changes: 11 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
*~
model.log
*/input.csv
__pycache__
.python-version
/output/*
metadata/*
venv/
.DS_Store
.Rhistory
.Rproj.user/
44 changes: 44 additions & 0 deletions .gitpod.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
---
# List the start up tasks. Learn more https://www.gitpod.io/docs/config-start-tasks/
tasks:
- name: Install opensafely
# prebuild, run in the background on every commit
init: |
# install everything into a venv in /workspace, so it is persisted in the prebuild image
python -m venv /workspace/venv
# install opensafely
/workspace/venv/bin/pip install --progress-bar off opensafely
# ensure essential docker images are preloaded preloaded
/workspace/venv/bin/opensafely pull cohortextractor
# run everytime we start a workspace
command: |
# limit action concurrency to as not to exhaust gitpod's RAM
export MAX_WORKERS=2
# add opensafely to the path
export PATH=/workspace/venv/bin:$PATH
opensafely upgrade
opensafely pull --project project.yaml
github:
prebuilds:
# enable for the default branch (defaults to true)
master: true
# enable for all branches in this repo (defaults to false)
branches: true
# enable for pull requests coming from this repo (defaults to true)
pullRequests: true
# enable for pull requests coming from forks (defaults to false)
pullRequestsFromForks: true
# add a check to pull requests (defaults to true)
addCheck: true
# add a "Review in Gitpod" button as a comment to pull requests (defaults to false)
addComment: true
# add a "Review in Gitpod" button to the pull request's description (defaults to false)
addBadge: false

vscode:
extensions:
- ms-python.python
- vscode.html-language-features
- redhat.vscode-yaml
- ikuyadeu.r
- randomfractalsinc.vscode-data-preview
14 changes: 14 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"python.linting.pylintEnabled": false,
"python.linting.flake8Enabled": true,
"python.linting.enabled": true,
"data.preview.create.json.schema": false,
"files.associations": {
"*.feather": "arrow",
},
"files.autoSave": "afterDelay",
"files.autoSaveDelay": 1000,
"window.autoDetectColorScheme": true,
"extensions.ignoreRecommendations": true,
"data.preview.theme": "light"
}
25 changes: 25 additions & 0 deletions .vscode/tasks.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
// See https://go.microsoft.com/fwlink/?LinkId=733558
// for the documentation about the tasks.json format
"version": "2.0.0",
"tasks": [
{
"label": "OpenSAFELY run project",
"type": "shell",
"command": "opensafely run run_all -f",
"problemMatcher": [],
"group": {
"kind": "build",
"isDefault": true
},
"presentation": {
"echo": true,
"reveal": "always",
"focus": true,
"panel": "new",
"showReuseMessage": false,
"clear": true,
}
}
]
}
21 changes: 21 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) {{organisation}}

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
22 changes: 22 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# ${GITHUB_REPOSITORY_NAME}

This is the code and configuration for ${GITHUB_REPOSITORY_NAME}.

You can run this project via [Gitpod](https://gitpod.io) in a web browser by clicking on this badge: [![Gitpod ready-to-code](https://img.shields.io/badge/Gitpod-ready--to--code-908a85?logo=gitpod)](https://gitpod.io/#https://github.com/${GITHUB_REPOSITORY})

* The paper is [here]()
* Raw model outputs, including charts, crosstabs, etc, are in `released_outputs/`
* If you are interested in how we defined our variables, take a look at the [study definition](analysis/study_definition.py); this is written in `python`, but non-programmers should be able to understand what is going on there
* If you are interested in how we defined our code lists, look in the [codelists folder](./codelists/).
* Developers and epidemiologists interested in the framework should review [the OpenSAFELY documentation](https://docs.opensafely.org)

# About the OpenSAFELY framework

The OpenSAFELY framework is a Trusted Research Environment (TRE) for electronic
health records research in the NHS, with a focus on public accountability and
research quality.

Read more at [OpenSAFELY.org](https://opensafely.org).

# Licences
As standard, research projects have a MIT license.
13 changes: 13 additions & 0 deletions analysis/example.do
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
// stata cannot handle compressed csv files directly, so unzip first to a plain csv file
!gunzip output/input.csv.gz

// now import the uncompressed csv using delimited
import delimited using output/input.csv


// your analysis code goes here


// all dta file outputs should be saved using `gzsave` and a .dta.gz extension
// In subsequent actions, use `gzuse` to load them.
gzsave output/stata.dta.gz
10 changes: 10 additions & 0 deletions analysis/example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import pandas as pd
import pyarrow.feather

df = pd.read_csv("output/input.csv.gz")


# feather files are compressed by default in python
df.to_feather("output/python.feather.lz4")
pyarrow.feather.write_feather(df, "output/python.feather.raw", compression="uncompressed")
pyarrow.feather.write_feather(df, "output/python.feather.zstd", compression="zstd")
7 changes: 7 additions & 0 deletions analysis/example.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# read compressed .csv file
df <- readr::read_csv("output/input.csv.gz")

# write a .feather file output
arrow::write_feather(df, "output/r.feather.lz4")
arrow::write_feather(df, "output/r.feather.raw", compression = "uncompressed")
arrow::write_feather(df, "output/r.feather.zstd", compression = "zstd")
13 changes: 13 additions & 0 deletions analysis/study_definition.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from cohortextractor import StudyDefinition, patients, codelist, codelist_from_csv # NOQA


study = StudyDefinition(
default_expectations={
"date": {"earliest": "1900-01-01", "latest": "today"},
"rate": "uniform",
"incidence": 0.5,
},
population=patients.registered_with_one_practice_between(
"2019-02-01", "2020-02-01"
),
)
3 changes: 3 additions & 0 deletions codelists/codelists.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"files": {}
}
Empty file added codelists/codelists.txt
Empty file.
1 change: 1 addition & 0 deletions docs/.gitkeep
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

Empty file added logs/.gitkeep
Empty file.
Empty file added output/.gitkeep
Empty file.
33 changes: 33 additions & 0 deletions project.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
version: '3.0'

expectations:
population_size: 1000000

actions:

generate_study_population:
run: cohortextractor:latest generate_cohort --output-format csv.gz --study-definition study_definition
outputs:
highly_sensitive:
cohort: output/input.csv.gz

python_example:
run: python:latest analysis/example.py
needs: [generate_study_population]
outputs:
highly_sensitive:
cohort: output/python.feather*

stata_example:
run: stata-mp:latest analysis/example.do
needs: [generate_study_population]
outputs:
highly_sensitive:
cohort: output/stata.dta.gz

r_example:
run: r:latest analysis/example.r
needs: [generate_study_population]
outputs:
highly_sensitive:
cohort: output/r.feather*

0 comments on commit 46e55fe

Please sign in to comment.