Skip to content

Commit e33f6c6

Browse files
authored
Merge pull request #356 from arXiv/ntai/oidc-client-with-ssl-verify-option
Add ssl cert check option for oidc client. needed to run it with self-signed cert for local dev
2 parents ddfc35d + 10f6e9c commit e33f6c6

File tree

4 files changed

+92
-39
lines changed

4 files changed

+92
-39
lines changed

.github/workflows/pullreqeust_tests.yaml

+4-2
Original file line numberDiff line numberDiff line change
@@ -35,13 +35,15 @@ jobs:
3535
# TODO The types are in bad shape and need to be fixed
3636
# run: poetry run mypy --exclude "test*" -p arxiv
3737

38+
# https://github.com/mozilla/geckodriver/releases
39+
# When there is a new driver, you'd need to update immediately
3840
- name: Install Firefox ESR and test driver
3941
run: |
4042
sudo add-apt-repository -y ppa:mozillateam/ppa
4143
sudo apt update
4244
sudo apt install -y firefox-esr
43-
wget https://github.com/mozilla/geckodriver/releases/latest/download/geckodriver-v0.35.0-linux64.tar.gz
44-
tar -xvzf geckodriver-v0.35.0-linux64.tar.gz
45+
wget https://github.com/mozilla/geckodriver/releases/download/v0.36.0/geckodriver-v0.36.0-linux64.tar.gz
46+
tar -xvzf geckodriver-v0.36.0-linux64.tar.gz
4547
sudo mv geckodriver /usr/local/bin/
4648
sudo chmod +x /usr/local/bin/geckodriver
4749

arxiv/auth/openid/README.md

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# Note on tests
2+
3+
## Running test locally
4+
5+
To run the tsst flask:
6+
7+
python -m arxiv.auth.openid.tests.toy_flask
8+
9+
## To see the browser on-screen running test_keycloak
10+
11+
Turn off the headless mode in the test but please don't check it in.

arxiv/auth/openid/oidc_idp.py

+75-36
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
"""
22
OpenID connect IdP client
33
"""
4-
import json
54
import urllib.parse
65
from typing import List, Optional
76
import requests
@@ -31,6 +30,7 @@ class ArxivOidcIdpClient:
3130
_login_redirect_url: str
3231
_logout_redirect_url: str
3332
jwt_verify_options: dict
33+
_ssl_cert_verify: bool
3434

3535
def __init__(self, redirect_uri: str,
3636
server_url: str = "https://openid.arxiv.org",
@@ -41,26 +41,36 @@ def __init__(self, redirect_uri: str,
4141
login_redirect_url: str | None = None,
4242
logout_redirect_url: str | None = None,
4343
logger: logging.Logger | None = None,
44+
ssl_verify: bool = True,
4445
):
4546
"""
4647
Make Tapir user data from pass-data
4748
4849
Parameters
4950
----------
50-
redirect_uri: Callback URL - typically FOO/callback which is POSTED when the IdP
51-
authentication succeeds.
52-
server_url: IdP's URL
53-
realm: OpenID's realm - for arXiv users, it should be "arxiv"
54-
client_id: Registered client ID. OAuth2 client/callback are registered on IdP and need to
55-
match
56-
scope: List of OAuth2 scopes - Apparently, keycloak (v20?) dropped the "openid" scope.
57-
Trying to include "openid" results in no such scope error if you don't set up "openid" scope in the realm.
58-
You need to have the "openid" scope for id_token, or else logout does not work.
59-
IOW, you need to create "openid" scope for the realm if it does not exist.
60-
client_secret: Registered client secret
61-
login_redirect_url: redircet URL after log in
62-
logout_redirect_url: redircet URL after log out
63-
logger: Python logging logger instance
51+
redirect_uri : str
52+
str: Callback URL - typically FOO/callback which is POSTED when the IdP authentication succeeds.
53+
server_url : str
54+
str: IdP's URL
55+
realm : str
56+
OpenID's realm - for arXiv users, it should be "arxiv"
57+
client_id : str
58+
Registered client ID. OAuth2 client/callback are registered on IdP and need to match
59+
scope : [str]
60+
List of OAuth2 scopes - Apparently, keycloak (v20?) dropped the "openid" scope.
61+
Trying to include "openid" results in no such scope error if you don't set up "openid" scope in the realm.
62+
You need to have the "openid" scope for id_token, or else logout does not work.
63+
IOW, you need to create "openid" scope for the realm if it does not exist.
64+
client_secret : str
65+
Registered client secret
66+
login_redirect_url: str
67+
redircet URL after log in
68+
logout_redirect_url : str
69+
redircet URL after log out
70+
logger : logging.Logger
71+
Python logging logger instance
72+
ssl_verify: bool
73+
Verify SSL certificate - DO NOT TURN THIS OFF UNLESS YOU ARE WORKING ON LOCAL HOST
6474
"""
6575
self.server_url = server_url
6676
self.realm = realm
@@ -80,14 +90,17 @@ def __init__(self, redirect_uri: str,
8090
"verify_iss": True,
8191
"verify_aud": False, # audience is "account" when it comes from olde tapir but not so for Keycloak.
8292
}
93+
self._ssl_cert_verify = ssl_verify
8394
pass
8495

8596
@property
8697
def oidc(self) -> str:
98+
"""OIDC URL"""
8799
return f'{self.server_url}/realms/{self.realm}/protocol/openid-connect'
88100

89101
@property
90-
def auth_url(self) -> str:
102+
def authn_url(self) -> str:
103+
"""Authentication URL"""
91104
return self.oidc + '/auth'
92105

93106
@property
@@ -115,7 +128,7 @@ def logout_url(self, user: ArxivUserClaims, redirect_url: str | None = None) ->
115128
@property
116129
def login_url(self) -> str:
117130
scope = "&scope=" + "%20".join(self.scope) if self.scope else ""
118-
url = f'{self.auth_url}?client_id={self.client_id}&redirect_uri={self.redirect_uri}&response_type=code{scope}'
131+
url = f'{self.authn_url}?client_id={self.client_id}&redirect_uri={self.redirect_uri}&response_type=code{scope}'
119132
self._logger.debug(f'login_url: {url}')
120133
return url
121134

@@ -125,7 +138,7 @@ def server_certs(self) -> dict:
125138
# I'm having some 2nd thought about caching this. Fresh cert every time is probably needed
126139
# if not self._server_certs:
127140
# This adds one extra fetch but it avoids weird expired certs situation
128-
certs_response = requests.get(self.certs_url)
141+
certs_response = requests.get(self.certs_url, verify=self._ssl_cert_verify)
129142
self._server_certs = certs_response.json()
130143
return self._server_certs
131144

@@ -145,7 +158,13 @@ def acquire_idp_token(self, code: str) -> Optional[dict]:
145158
146159
Parameters
147160
----------
148-
code: When IdP calls back, it comes with the authentication code as a query parameter.
161+
code : str
162+
When IdP calls back, it comes with the authentication code as a query parameter.
163+
164+
Returns
165+
-------
166+
dict | None
167+
IDP token when this is a success
149168
"""
150169
auth = None
151170
if self.client_secret:
@@ -169,7 +188,8 @@ def acquire_idp_token(self, code: str) -> Optional[dict]:
169188
'redirect_uri': self.redirect_uri,
170189
'client_id': self.client_id,
171190
},
172-
auth=auth
191+
auth=auth,
192+
verify=self._ssl_cert_verify,
173193
)
174194
if token_response.status_code != 200:
175195
self._logger.warning(f'idp %s', token_response.status_code)
@@ -187,12 +207,13 @@ def validate_access_token(self, access_token: str) -> dict | None:
187207
188208
Parameters
189209
----------
190-
access_token: This is the access token in the IdP's token that you get from the code
210+
access_token : str
211+
This is the access token in the IdP's token that you get from the code
191212
192-
Return
193-
------
194-
None -> Invalid access token
195-
dict -> The content of idp token as dict
213+
Returns
214+
-------
215+
None | dict
216+
None -> Invalid access token, dict -> The content of idp token as dict
196217
"""
197218

198219
try:
@@ -220,9 +241,14 @@ def validate_access_token(self, access_token: str) -> dict | None:
220241
except jwt.ExpiredSignatureError:
221242
self._logger.error("IdP signature cert is expired.")
222243
return None
244+
223245
except jwt.InvalidTokenError:
224246
self._logger.error("jwt.InvalidTokenError: Token is invalid.", exc_info=True)
225247
return None
248+
249+
except jwt.ImmatureSignatureError:
250+
self._logger.error("jwt.ImmatureSignatureError: Token is invalid.", exc_info=True)
251+
return None
226252
# not reached
227253

228254
def to_arxiv_user_claims(self,
@@ -236,13 +262,13 @@ def to_arxiv_user_claims(self,
236262
237263
Parameters
238264
----------
239-
idp_token: This is the IdP token which contains access, refresh, id tokens, etc.
265+
idp_token : dict | None
266+
This is the IdP token which contains access, refresh, id tokens, etc.
240267
241-
kc_cliams: This is the contents of (unpacked) access token. IdP signs it with the private
268+
kc_cliams : dict | None
269+
This is the contents of (unpacked) access token. IdP signs it with the private
242270
key, and the value is verified using the published public cert.
243271
244-
NOTE: So this means there are two copies of access token. Unfortunate, but unpacking
245-
every time can be costly.
246272
"""
247273
if idp_token is None:
248274
idp_token = {}
@@ -261,12 +287,13 @@ def from_code_to_user_claims(self, code: str) -> ArxivUserClaims | None:
261287
262288
Parameters
263289
----------
264-
code: The code you get in the /callback
290+
code : str
291+
The code you get in the /callback
265292
266293
Returns
267294
-------
268-
ArxivUserClaims: User's IdP claims
269-
None: Something is wrong
295+
ArxivUserClaims | None
296+
User's IdP claims or None when something is wrong
270297
271298
Note
272299
----
@@ -293,7 +320,13 @@ def logout_user(self, user: ArxivUserClaims) -> bool:
293320
294321
Parameters
295322
----------
296-
user: ArxivUserClaims
323+
user : ArxivUserClaims
324+
user claims
325+
326+
Returns
327+
-------
328+
bool
329+
Logout success / failure
297330
"""
298331
try:
299332
header = {
@@ -314,7 +347,7 @@ def logout_user(self, user: ArxivUserClaims) -> bool:
314347
log_extra = {'header': header, 'body': data}
315348
self._logger.debug('Logout request %s', url, extra=log_extra)
316349
try:
317-
response = requests.post(url, headers=header, data=data, timeout=30)
350+
response = requests.post(url, headers=header, data=data, timeout=30, verify=self._ssl_cert_verify)
318351
if response.status_code == 200:
319352
# If Keycloak is misconfigured, this does not log out.
320353
# Turn front channel logout off in the logout settings of the client."
@@ -342,13 +375,18 @@ def logout_user(self, user: ArxivUserClaims) -> bool:
342375
return False
343376

344377

345-
def refresh_access_token(self, refresh_token: str) -> ArxivUserClaims:
378+
def refresh_access_token(self, refresh_token: str) -> Optional[ArxivUserClaims]:
346379
"""With the refresh token, get a new access token
347380
348381
Parameters
349382
----------
350383
refresh_token: str
351-
refresh token which is given by OIDC
384+
refresh token which is given by OIDC
385+
386+
Returns
387+
-------
388+
ArxivUserClaims | None
389+
New (refreshed) user claims when success. None - the refresh token is invalid/expired.
352390
"""
353391

354392
headers = {
@@ -378,6 +416,7 @@ def refresh_access_token(self, refresh_token: str) -> ArxivUserClaims:
378416
},
379417
auth=auth,
380418
headers=headers,
419+
verify=self._ssl_cert_verify
381420
)
382421
if token_response.status_code != 200:
383422
self._logger.warning(f'idp %s', token_response.status_code)

arxiv/auth/openid/tests/test_keycloak.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,9 @@
1313
@pytest.fixture(scope="module")
1414
def web_driver() -> webdriver.Chrome:
1515
options = Options()
16-
options.headless = True
1716
options.binary_location = which(WEB_BROWSER)
17+
# If you want to see the browser on your screen, comment out next 2 lines.
18+
options.headless = True
1819
options.add_argument('--headless')
1920

2021
service = Service(executable_path=which(WEB_DRIVER))

0 commit comments

Comments
 (0)