Merge branch 'release/v0.1.0'

aai-institute · Jun 23, 2021 · a4320f0 · a4320f0
2 parents de4f675 + 2e9c2c4
commit a4320f0
Show file tree

Hide file tree

Showing 84 changed files with 5,992 additions and 553 deletions.
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
@@ -0,0 +1,22 @@
+[bumpversion]
+current_version = 0.1.0
+commit = False
+tag = False
+allow_dirty = False
+parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<build>\d+))?
+serialize = 
+ {major}.{minor}.{patch}-{release}{build}
+ {major}.{minor}.{patch}
+
+[bumpversion:part:release]
+optional_value = prod
+first_value = dev
+values = 
+ dev
+ prod
+
+[bumpversion:part:build]
+
+[bumpversion:file:setup.py]
+
+[bumpversion:file:src/kyle/__init__.py]
diff --git a/.coveragerc b/.coveragerc
@@ -0,0 +1,2 @@
+[run]
+source = src
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
@@ -0,0 +1,48 @@
+on:
+ push:
+ # Sequence of patterns matched against refs/tags
+ tags:
+ - 'v*' # Push events to matching v*, i.e. v1.0, v20.15.10
+
+name: Create Release
+
+jobs:
+ build:
+ name: Create GitHub Release
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v2
+ - name: Create Release
+ id: create_release
+ uses: actions/create-release@v1
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # This token is provided by Actions, you do not need to create your own token
+ with:
+ tag_name: ${{ github.ref }}
+ release_name: Release ${{ github.ref }}
+ body: |
+ Changes in this Release
+ - First Change
+ - Second Change
+ draft: false
+ prerelease: false
+ deploy:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v2
+ - name: Set up Python for PyPI Release
+ uses: actions/setup-python@v1
+ with:
+ python-version: '3.8'
+ - name: Install dependencies for PyPI Release
+ run: |
+ python -m pip install --upgrade pip
+ pip install setuptools wheel twine
+ - name: Build and publish to PyPI
+ env:
+ TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
+ TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
+ run: |
+ python setup.py sdist bdist_wheel
+ twine upload dist/*
diff --git a/.github/workflows/tox.yaml b/.github/workflows/tox.yaml
@@ -0,0 +1,58 @@
+name: Merge develop, run tests and build documentation
+
+on:
+ pull_request:
+ branches: [develop]
+ push:
+ branches: [develop, master]
+ workflow_dispatch:
+ inputs:
+ reason:
+ description: Why did you trigger the pipeline?
+ required: False
+ default: Check if it runs again due to external changes
+
+jobs:
+ build:
+ runs-on: ubuntu-latest
+
+ steps:
+ # pandoc needed for docu, see https://nbsphinx.readthedocs.io/en/0.7.1/installation.html?highlight=pandoc#pandoc
+ - name: Install Non-Python Packages
+ run: sudo apt-get update -yq && sudo apt-get -yq install pandoc
+ - uses: actions/[email protected]
+ with:
+ fetch-depth: 0
+ lfs: true
+ persist-credentials: false
+ # lfs=true is not enough, see https://stackoverflow.com/questions/61463578/github-actions-actions-checkoutv2-lfs-true-flag-not-converting-pointers-to-act
+ - name: Checkout LFS Objects
+ run: git lfs pull
+ - name: Merge develop into current branch
+ if: github.ref != 'refs/heads/develop'
+ run: |
+ git fetch origin develop:develop --update-head-ok
+ git merge develop
+ - name: Setup Python 3.8
+ uses: actions/setup-python@v1
+ with:
+ python-version: "3.8"
+ - name: Install Tox and Python Packages
+ run: pip install tox
+ - name: Run Tox
+ run: tox
+ - name: Prepare Pages
+ if: github.ref == 'refs/heads/develop'
+ run: |
+ mv docs/_build/html/* public/docs
+ mv htmlcov/* public/coverage
+ - name: Deploy Pages
+ uses: JamesIves/[email protected]
+ if: github.ref == 'refs/heads/develop'
+ with:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ BRANCH: gh-pages
+ FOLDER: public
+ TARGET_FOLDER: .
+ CLEAN: true
+ SINGLE_COMMIT: true
diff --git a/.gitignore b/.gitignore
@@ -131,3 +131,8 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+
+# reports
+pylint.html
+
+data
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
@@ -2,21 +2,137 @@ image: "python:3.8-buster"
 
 stages:
  - tox
- - package
+ - documentation
+ - build
+ - publish
+ - update-tox-cache
 
-tox:
+variables:
+ PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache/pip"
+
+cache: &global_cache
+ paths:
+ - .cache/pip
+ - .venv/
+ - .tox
+ - apt-cache/
+ key: ${CI_COMMIT_REF_SLUG}
+
+# Pip's cache doesn't store the python packages
+# https://pip.pypa.io/en/stable/reference/pip_install/#caching
+before_script:
+ - mkdir -p apt-cache
+ # pandoc needed for docu, see https://nbsphinx.readthedocs.io/en/0.7.1/installation.html?highlight=pandoc#pandoc
+ - apt-get update -yq && apt-get -o dir::cache::archives="$(pwd)/apt-cache" -yq install pandoc
+ - if [ -e $LOCAL_CONFIG ]; then mv $CONFIG_LOCAL ./config_local.json && echo "retrieved local config"; fi
+ - pip install virtualenv
+ - virtualenv .venv
+ - source .venv/bin/activate
+
+.tox_job: &tox_job
  stage: tox
  script:
  - pip install tox
  - tox
+ artifacts:
+ paths:
+ - badges
+ - docs/_build
+ - htmlcov
+ - pylint.html
+
+tox_recreate:
+ only:
+ changes:
+ - requirements.txt
+ cache:
+ # push cache if dependencies have changed
+ <<: *global_cache
+ policy: push
+ <<: *tox_job
+
+tox_use_cache:
+ except:
+ changes:
+ - requirements.txt
+ cache:
+ # use cache if dependencies haven't changed
+ <<: *global_cache
+ policy: pull
+ <<: *tox_job
+
+pages:
+ cache: {}
+ stage: documentation
+ script:
+ - mv docs/_build/html/* public/docs
+ - mv pylint.html public/pylint/index.html
+ - mv htmlcov/* public/coverage
+ artifacts:
+ paths:
+ - public
+ only:
+ - develop
 
 package:
- stage: package
+ cache:
+ paths:
+ - .cache/pip
+ - .venv/
+ key: "$CI_JOB_NAME-$CI_COMMIT_REF_SLUG"
+ stage: build
  script:
+ - |
+ # Bump version number of develop branch
+ if [ "$CI_COMMIT_BRANCH" = "develop" ]; then
+ # Git config
+ git config user.name "Gitlab CI"
+ git config user.email "[email protected]"
+ chmod 0600 $GITLAB_DEPLOY_KEY
+
+ # HTTPS clone URL -> git+ssh URL for pushing
+ export GIT_REPO_URL_SSH=$(echo -n $CI_REPOSITORY_URL | sed -r 's%https?://.*@([^/]+)/%git@\1:%' -)
+ git remote set-url origin $GIT_REPO_URL_SSH
+ export GIT_SSH_COMMAND='ssh -i $GITLAB_DEPLOY_KEY -o IdentitiesOnly=yes -o StrictHostKeyChecking=no'
+
+ pip install bump2version
+ apt-get update && apt-get -o dir::cache::archives="$(pwd)/apt-cache" -yq install git-lfs
+
+ bump2version build --commit
+ git push -o ci.skip origin HEAD:develop
+ fi
  - pip install setuptools wheel
  - python setup.py sdist bdist_wheel
  artifacts:
  paths:
+ - dist/*.tar.gz
  - dist/*.whl
 
+publish_package:
+ cache: {}
+ only:
+ - tags
+ - develop
+ stage: publish
+ needs: [package]
+ script:
+ - pip install twine
+ - export TWINE_REPOSITORY_URL=$PYPI_REPO_URL
+ - export TWINE_USERNAME=$PYPI_REPO_USER
+ - export TWINE_PASSWORD=$PYPI_REPO_PASS
+ - twine upload dist/*
 
+update_tox_cache:
+ needs: []
+ except:
+ changes:
+ - requirements.txt
+ when: manual
+ allow_failure: true
+ cache:
+ <<: *global_cache
+ policy: push
+ stage: update-tox-cache
+ script:
+ - pip install tox
+ - tox -r
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,6 @@
+repos:
+ - repo: https://github.com/psf/black
+ rev: 20.8b1
+ hooks:
+ - id: black
+ language_version: python3
diff --git a/.pylintrc b/.pylintrc
@@ -0,0 +1,9 @@
+[MESSAGE CONTROL]
+disable =
+ I0011 # reasoning
+
+[MASTER]
+load-plugins=pylint_json2html
+
+[REPORTS]
+output-format=jsonextended
diff --git a/LICENSE.txt b/LICENSE.txt
@@ -0,0 +1,21 @@
+kyle - a python library for classifier calibration
+
+Copyright 2021-2021 by appliedAI
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -1,51 +1,18 @@
-# kale
-
-This repository contains a library template with utilities for building, testing, documentation 
-and configuration management.
-
-## Workflow
-Automated builds, tests, generation of docu and publishing should be handled by cicd pipelines. 
-You might already have an initial version of the pipeline here. Below you will find further details on testing 
-and documentation. 
-
-Before pushing your changes to the remote it is often useful to execute `tox` locally in order to
-detect mistakes early on.
-
-We strongly suggest to use some form of virtual environment for working with the library. E.g. with conda:
-```shell script
-conda create -n kale python=3.8
-conda activate kale
-pip install -r requirements.txt
-```
-
-### Testing and packaging
-The library is tested with tox which will build and install the package and run pytest and doctest. 
-You can run it locally by installing tox into your virtual environment 
-(e.g. with `pip install tox`) and executing `tox`. 
-
-For creating a package locally run
-```shell script
-python setup.py sdist bdist_wheel
-```
-
-### Documentation
-Documentation is built with sphinx every time tox is executed. 
-There is a helper script for updating documentation files automatically. It is called by tox on built and can 
-also be invoked as
-```bash
-python scripts/update_docs.py
-```
-See the code documentation in the script for more details on that
-
-### Note
-You might wonder why the requirements.txt already contains numpy. The reason is that tox seems to have a problem with empty
-requirements files. Feel free to remove numpy once you have non-trivial requirements
-
-## Configuration Management
-The repository also includes configuration utilities that are often helpful when using data-related libraries. 
-They do not form part of the resulting package, you can (and probably should) adjust them to your needs.
-
-## CI/CD
-Depending on the provider you chose for CI/CD, this repo might already contain a rudimentary CI/CD pipeline. 
-The pipelines serve for building and testing the library and for publishing the resulting package and documentation.
-You will probably have to further adjust it to your needs.
+# Kyle - a Calibration Toolkit
+
+This library contains utils for measuring and visualizing calibration of probabilistic classifiers as well as for 
+recalibrating them. Currently, only methods for recalibration through post-processing are supported, although we plan
+to include calibration specific training algorithms as well in the future.
+
+Kyle is model agnostic, any probabilistic classifier can be wrapped with a thin wrapper called `CalibratableModel` which
+supports multiple calibration algorithms. For a quick intro overview of the API have a look at the calibration demo 
+notebook (the notebook with executed cells can be found in the docu).
+
+Apart from tools for analysing models, kyle also offers support for developing and testing custom calibration metrics
+and algorithms. In order not to have to rely on evaluation data sets and trained models for delivering labels and confidence 
+vectors, with kyle custom samplers based on [fake classifiers](our paper/review) can be constructed. These samplers can
+also be fit on some data set in case you want to mimic it. Using the fake classifiers, an arbitrary number of ground 
+truth labels and miscalibrated confidence vectors can be generated to help you analyse your algorithms (common use cases
+will be analysis of variance and bias of calibration metrics and benchmarking of recalibration algorithms). Several
+pre-configured fake classifiers mimicking common models, e.g. vision models trained on MNIST and CIFAR10, are implemented
+in kyle and can be used out of the box. 
diff --git a/badges/.gitignore b/badges/.gitignore
@@ -0,0 +1,2 @@
+*
+!.gitignore