diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index e6a448b8..71c24edb 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -172,9 +172,18 @@ build:thredds: CONTEXT_DIR: $CI_PROJECT_DIR/images/thredds needs: ["build:jdk", "build:tomcat"] + +build:wget-api: + extends: .docker-build + stage: build-4 + variables: + CONTEXT_DIR: $CI_PROJECT_DIR/images/wget-api + needs: ["build:base"] + build:auth-service: extends: .docker-build stage: build-4 variables: CONTEXT_DIR: $CI_PROJECT_DIR/images/auth-service needs: ["build:python-build", "build:django"] + diff --git a/deploy/ansible/roles/index/defaults/main.yml b/deploy/ansible/roles/index/defaults/main.yml index aa10c192..6ef0bd44 100644 --- a/deploy/ansible/roles/index/defaults/main.yml +++ b/deploy/ansible/roles/index/defaults/main.yml @@ -68,3 +68,21 @@ search_image_prefix: "{{ image_prefix }}" search_image_tag: "{{ image_tag }}" search_image_pull: "{{ image_pull }}" search_image_repository: search + +### +# wgetApi configuration +### +wget_api_enabled: true +wget_api_image_prefix: "{{ image_prefix }}" +wget_api_tag: "{{ image_tag }}" +wget_api_image_pull: "{{ image_pull }}" +wget_api_image_repository: wget_api +wget_api: + debug: False + allowed_hosts: "{{ wget_api.allowed_hosts }}" + solr_url: "{{ wget_api.solr_url }}" + shards_xml: "{{ wget_api.shards_xml }}" + script_file_default_limit: "{{ wget_api.script_file_default_limit }}" + script_file_max_limit: "{{ wget_api.script_file_max_limit }}" + data_upload_max_number_fields: "{{ wget_api.data_upload_max_number_fields }}" + diff --git a/deploy/ansible/roles/index/tasks/main.yml b/deploy/ansible/roles/index/tasks/main.yml index 7cd8eade..642d6ca2 100644 --- a/deploy/ansible/roles/index/tasks/main.yml +++ b/deploy/ansible/roles/index/tasks/main.yml @@ -22,3 +22,11 @@ - name: Uninstall search application include: search_uninstall.yml when: "'index' not in group_names or not search_enabled" + +- name: Install wget_api application + include: wget_api_install.yml + when: "'index' in group_names and wget_api_enabled" + +- name: Uninstall wget_api application + include: wget_api_uninstall.yml + when: "'index' not in group_names or not wget_api_enabled" diff --git a/deploy/ansible/roles/index/tasks/wget_api_install.yml b/deploy/ansible/roles/index/tasks/wget_api_install.yml new file mode 100644 index 00000000..e6ac2eb1 --- /dev/null +++ b/deploy/ansible/roles/index/tasks/wget_api_install.yml @@ -0,0 +1,44 @@ +--- + +- name: Create Docker network + docker_network: + name: esgf + +- name: Make wget api config directory + file: + path: /tmp/esgf_wget + state: directory + +- name: Write wget api local settings + template: + src: "esgf_wgetapi_config.j2" + dest: "/tmp/esgf_wget/{{ wget_api.config }}" + +- name: Write wget api allowed projects + template: + src: "esgf_wgetapi_allowed_projects.json.j2" + dest: "/tmp/esgf_wget/{{ wget_api.allowed_projects_json }}" + +- name: Write XML file containing Solr shards + template: + src: "esgf_wgetapi_solr_shards_static.xml.j2" + dest: "/tmp/esgf_wget/esgf_wgetapi_solr_shards_static.xml" + +- name: Start wget_api container + docker_container: + name: wget_api + env: + ESGF_WGET_CONFIG: "{{ wget_api.config_path }}/{{ wget_api.config }}" + ESGF_WGET_SECRET_KEY: "{{ wget_api.secret_key }}" + image: "{{ wget_api_image_prefix }}/{{ wget_api_image_repository }}:{{ wget_api_image_tag }}" + pull: "{{ wget_api_image_pull }}" + detach: yes + restart_policy: unless-stopped + exposed_ports: + - "8000" + networks: + - name: esgf + networks_cli_compatible: yes + volumes: ["/tmp/esgf_wget:{{ wget_api.config_path }}:ro"] + state: started + restart: yes diff --git a/deploy/ansible/roles/index/tasks/wget_api_uninstall.yml b/deploy/ansible/roles/index/tasks/wget_api_uninstall.yml new file mode 100644 index 00000000..54224539 --- /dev/null +++ b/deploy/ansible/roles/index/tasks/wget_api_uninstall.yml @@ -0,0 +1,12 @@ +--- + +- name: Stop wget_api container + docker_container: + name: wget_api + state: absent + +- name: Remove wget api config directory + file: + path: /tmp/esgf_wget + state: absent + diff --git a/deploy/ansible/roles/index/templates/esgf_wgetapi_allowed_projects.json.j2 b/deploy/ansible/roles/index/templates/esgf_wgetapi_allowed_projects.json.j2 new file mode 100644 index 00000000..847779fe --- /dev/null +++ b/deploy/ansible/roles/index/templates/esgf_wgetapi_allowed_projects.json.j2 @@ -0,0 +1,3 @@ +{ + "allowed_projects": ["{{ wget_api.allowed_projects | join('", "') }}"] +} diff --git a/deploy/ansible/roles/index/templates/esgf_wgetapi_config.j2 b/deploy/ansible/roles/index/templates/esgf_wgetapi_config.j2 new file mode 100644 index 00000000..13fc36f5 --- /dev/null +++ b/deploy/ansible/roles/index/templates/esgf_wgetapi_config.j2 @@ -0,0 +1,29 @@ +[django] + +# SECURITY WARNING: don't run with debug turned on in production! +DEBUG = {{ wget_api.debug }} + +ALLOWED_HOSTS = {{ wget_api.allowed_hosts }} + +# Expand the number of fields allowed for wget API +DATA_UPLOAD_MAX_NUMBER_FIELDS = 1024 + +[wget] +# Address of ESGF Solr +ESGF_SOLR_URL = {{ wget_api.solr_url }} + +# Path to XML file containing Solr shards +ESGF_SOLR_SHARDS_XML = {{ wget_api.config_path }}/esgf_wgetapi_solr_shards_static.xml + +# Path to JSON file containing allowed projects to access for datasets +ESGF_ALLOWED_PROJECTS_JSON = {{ wget_api.config_path }}/{{ wget_api.allowed_projects_json }} + +# Default limit on the number of files allowed in a wget script +WGET_SCRIPT_FILE_DEFAULT_LIMIT = {{ wget_api.script_file_default_limit }} + +# Maximum number of files allowed in a wget script +WGET_SCRIPT_FILE_MAX_LIMIT = {{ wget_api.script_file_max_limit }} + +# Maximum length for facet values used in the wget directory structure +WGET_MAX_DIR_LENGTH = {{ wget_api.max_dir_length }} + diff --git a/deploy/ansible/roles/index/templates/esgf_wgetapi_solr_shards_static.xml.j2 b/deploy/ansible/roles/index/templates/esgf_wgetapi_solr_shards_static.xml.j2 new file mode 100644 index 00000000..fa393b25 --- /dev/null +++ b/deploy/ansible/roles/index/templates/esgf_wgetapi_solr_shards_static.xml.j2 @@ -0,0 +1,6 @@ + + + {% for item in wget_api.solr_shards %} + {{ item }} + {% endfor %} + diff --git a/deploy/ansible/roles/proxy/defaults/main.yml b/deploy/ansible/roles/proxy/defaults/main.yml index a7d389b7..4221b17e 100644 --- a/deploy/ansible/roles/proxy/defaults/main.yml +++ b/deploy/ansible/roles/proxy/defaults/main.yml @@ -43,3 +43,8 @@ solr_enabled: true # Indicates if search is enabled search_enabled: true + +# Indicates if wget_api is enabled +wget_api_enabled: true + + diff --git a/deploy/ansible/roles/proxy/templates/proxy.conf.j2 b/deploy/ansible/roles/proxy/templates/proxy.conf.j2 index 4c06cd2e..37ee8a8b 100644 --- a/deploy/ansible/roles/proxy/templates/proxy.conf.j2 +++ b/deploy/ansible/roles/proxy/templates/proxy.conf.j2 @@ -43,5 +43,13 @@ server { proxy_pass http://search:8080; } {% endif %} + + {% if wget_api_enabled %} + location /wget { + include /etc/nginx/includes/proxy_params.conf; + proxy_pass http://wget_api:8000; + } + {% endif %} {% endif %} } + diff --git a/deploy/kubernetes/chart/templates/ingress/ingress.yaml b/deploy/kubernetes/chart/templates/ingress/ingress.yaml index c6bba107..37e5ac1e 100644 --- a/deploy/kubernetes/chart/templates/ingress/ingress.yaml +++ b/deploy/kubernetes/chart/templates/ingress/ingress.yaml @@ -68,4 +68,12 @@ spec: port: number: 8080 {{- end }} + {{- if .Values.index.wgetApi.enabled }} + - path: /wget + backend: + service: + name: {{ include "esgf.component.fullname" (list . "wget-api") }} + port: + number: 8000 + {{- end }} {{- end }} diff --git a/deploy/kubernetes/chart/templates/wgetApi/Deployment.yaml b/deploy/kubernetes/chart/templates/wgetApi/Deployment.yaml new file mode 100644 index 00000000..816bb9be --- /dev/null +++ b/deploy/kubernetes/chart/templates/wgetApi/Deployment.yaml @@ -0,0 +1,49 @@ +{{- $wgetApi := .Values.index.wgetApi -}} +{{- if (and .Values.index.enabled $wgetApi.enabled) -}} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "esgf.component.fullname" (list . "wgetApi") }} + labels: {{ include "esgf.component.labels" (list . "wgetApi" $wgetApi.labels) | nindent 4 }} +spec: + replicas: {{ $wgetApi.replicaCount }} + selector: + matchLabels: {{ include "esgf.component.selectorLabels" (list . "wgetApi") | nindent 6 }} + template: + metadata: + labels: {{ include "esgf.component.selectorLabels" (list . "wgetApi") | nindent 8 }} + spec: + {{- with $wgetApi.podSecurityContext }} + securityContext: {{ toYaml . | nindent 8 }} + {{- end }} + containers: + - name: wget-api + {{ include "esgf.deployment.image" (list . $wgetApi.image) }} + env: + - name: ESGF_WGET_CONFIG + value: {{ $wgetApi.config_path }}/{{ $wgetApi.config }} + - name: ESGF_WGET_SECRET_KEY + valueFrom: + secretKeyRef: + name: wget-api-secret + key: secret + ports: + - name: http + containerPort: 8000 + readinessProbe: &probe + httpGet: + path: /wget + port: 8000 + initialDelaySeconds: 20 + periodSeconds: 20 + livenessProbe: + <<: *probe + initialDelaySeconds: 30 + volumeMounts: + - name: config + mountPath: {{ $wgetApi.config_path }} + volumes: + - name: config + configMap: + name: {{ include "esgf.component.fullname" (list . "wgetApi") }} +{{- end -}} diff --git a/deploy/kubernetes/chart/templates/wgetApi/configmap.yaml b/deploy/kubernetes/chart/templates/wgetApi/configmap.yaml new file mode 100644 index 00000000..64f8fd87 --- /dev/null +++ b/deploy/kubernetes/chart/templates/wgetApi/configmap.yaml @@ -0,0 +1,50 @@ +{{- $wgetApi := .Values.index.wgetApi -}} +{{- if (and .Values.index.enabled $wgetApi.enabled) -}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "esgf.component.fullname" (list . "wgetApi") }} + labels: {{ include "esgf.component.labels" (list . "wgetApi" $wgetApi.labels) | nindent 4 }} +data: + wget_api_config: | + [django] + DEBUG = {{ $wgetApi.settings.debug }} + + ALLOWED_HOSTS = {{ $wgetApi.settings.allowedHosts }} + + # Expand the number of fields allowed for wget API + DATA_UPLOAD_MAX_NUMBER_FIELDS = {{ $wgetApi.settings.dataUploadMaxNumberFields }} + + [wget] + # Address of ESGF Solr + ESGF_SOLR_URL = {{ $wgetApi.settings.esgfSolrUrl }} + + # Path to XML file containing Solr shards + ESGF_SOLR_SHARDS_XML = {{ $wgetApi.settings.esgfSolrShardsXml }} + + # Path to JSON file containing allowed projects to access for datasets + ESGF_ALLOWED_PROJECTS_JSON = {{ $wgetApi.config_path}}/{{ $wgetApi.allowed_projects_json }} + + # Default limit on the number of files allowed in a wget script + WGET_SCRIPT_FILE_DEFAULT_LIMIT = {{ $wgetApi.settings.wgetScriptFileDefaultLimit }} + + # Maximum number of files allowed in a wget script + WGET_SCRIPT_FILE_MAX_LIMIT = {{ $wgetApi.settings.wgetScriptFileMaxLimit }} + esgf_allowed_projects.json: | + { + "allowed_projects": [{{ $wgetApi.settings.allowed_projects | join "," }}] + } + esgf_solr_shards.xml: | + + + + {{ range $wgetApi.settings.shards }} + {{ . }} + {{ end }} + +{{- end -}} diff --git a/deploy/kubernetes/chart/templates/wgetApi/secret.yaml b/deploy/kubernetes/chart/templates/wgetApi/secret.yaml new file mode 100644 index 00000000..ea5a997a --- /dev/null +++ b/deploy/kubernetes/chart/templates/wgetApi/secret.yaml @@ -0,0 +1,10 @@ +{{- $wgetApi := .Values.index.wgetApi -}} +{{- if (and .Values.index.enabled $wgetApi.enabled) -}} +apiVersion: v1 +kind: Secret +metadata: + name: wget-api-secret +type: Opaque +data: + secret: {{ randAlphaNum 50 | b64enc }} +{{- end -}} diff --git a/deploy/kubernetes/chart/templates/wgetApi/service.yaml b/deploy/kubernetes/chart/templates/wgetApi/service.yaml new file mode 100644 index 00000000..6526bfe5 --- /dev/null +++ b/deploy/kubernetes/chart/templates/wgetApi/service.yaml @@ -0,0 +1,16 @@ +{{- $wgetApi := .Values.index.wgetApi -}} +{{- if (and .Values.index.enabled $wgetApi.enabled) -}} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "esgf.component.fullname" (list . "wgetApi") }} + labels: {{ include "esgf.component.labels" (list . "wgetApi" $wgetApi.labels) | nindent 4 }} +spec: + type: ClusterIP + ports: + - name: http + port: 8080 + targetPort: 8000 + protocol: TCP + selector: {{ include "esgf.component.selectorLabels" (list . "wgetApi") | nindent 4 }} +{{- end -}} diff --git a/deploy/kubernetes/chart/values.yaml b/deploy/kubernetes/chart/values.yaml index bbb6706f..e0064648 100644 --- a/deploy/kubernetes/chart/values.yaml +++ b/deploy/kubernetes/chart/values.yaml @@ -352,3 +352,43 @@ index: # The tolerations for the search pods # See https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ tolerations: + + # Configuration for wgetApi + wgetApi: + # Indicates if wgetApi component should be deployed + enabled: true + # Image overrides for the wgetApi image + image: + # prefix: + repository: esgf-wget + # tag: + # The pod security context for all wgetApi pods + # This should normally not need to change unless Solr pods are configured to use + # specific paths on the host for core storage + podSecurityContext: + runAsUser: 1000 + runAsGroup: 1000 + fsGroup: 1000 + # The container security context for all Solr containers + securityContext: + # Run with a read-only root filesystem by default + readOnlyRootFilesystem: true + # The absolute path where wget api config file will be stored. + config_path: /esg/esgf_wget + # The wget api config file name to be created under config_path + config: wget_api_config + settings: + debug: False + allowedHosts: "*" + # ESGF solr url. For ex: https://esgf-node.llnl.gov/solr + esgfSolrUrl: + # Path to XML file containing Solr shards. For ex: /esg/config/esgf_shards_static.xml + esgfSolrShardsXml: + # Default limit on the number of files allowed in a wget script + wgetScriptFileDefaultLimit: 1000 + # Maximum number of files allowed in a wget script + wgetScriptFileMaxLimit: 100000 + # Expand the number of fields allowed for wget API + dataUploadMaxNumberFields: 10240 + # The number of replicas for the wgetApi pod. + replicaCount: 1 diff --git a/images/wget-api/Dockerfile b/images/wget-api/Dockerfile new file mode 100644 index 00000000..737a0083 --- /dev/null +++ b/images/wget-api/Dockerfile @@ -0,0 +1,32 @@ +##### +## Image for running wget-api +##### +# docker build -t --build-arg=ESGF_IMAGES_VERSION=future-architecture . + +ARG ESGF_REPOSITORY_BASE=esgfdeploy +ARG ESGF_IMAGES_VERSION=latest + +FROM centos:7.8.2003 as get_repo + +WORKDIR /usr/local/esgf-wget +USER root + +RUN yum install -y git && \ + git clone https://github.com/esgf/esgf-wget.git --branch devel /usr/local/esgf-wget + +FROM ${ESGF_REPOSITORY_BASE}/base:${ESGF_IMAGES_VERSION} + +USER root +RUN yum install -y python3 python3-pip && \ + pip3 install gunicorn "django>=3.0,<3.1" + +WORKDIR /wgetApi + +COPY --from=get_repo /usr/local/esgf-wget/esgf_wget esgf_wget +COPY --from=get_repo /usr/local/esgf-wget/manage.py . + +EXPOSE 8000 +USER $ESGF_UID + +ENTRYPOINT ["gunicorn", "-b", "0.0.0.0:8000", "esgf_wget.wsgi", "--worker-tmp-dir", "/dev/shm", "--workers", "2", "--threads", "2", "--worker-class", "gthread"] +