jupyterhub · lambdaTotoro · Sep 29, 2021 · Apr 10, 2021 · Apr 20, 2021 · Apr 20, 2021
diff --git a/docs/options.rst b/docs/options.rst
@@ -96,6 +96,78 @@ To enable reCAPTCHA on signup, add the following two lines to the configuration
     c.Authenticator.recaptcha_secret = "your secret"
 
 
+Allow self-serve approval
+-------------------------
+
+By default all users who sign up on Native Authenticator need an admin approval so 
+they can actually log in the system. Or you can allow anybody without approval as described
+above with `open_signup`. Alternatively, you may want something like `open_signup` but
+only for users in your own organization. This is what this option permits.
+New users are still created in non-authorized mode, but they can self-authorize by
+navigating to a (cryptographic) URL which will be e-mailed to them *only* if the
+provided email address matches the specified pattern.
+For example, to allow any users who have an mit.edu email address,
+you may do the following:
+
+.. code-block:: python
+
+    import re
+    c.Authenticator.allow_self_approval_for = re.compile('[^@]+@mit\.edu$')
+
+Note that this setting automatically enables `ask_email_on_signup`.
+
+To use the code, you must also provide a secret key to cryptographically sign the URL.
+To prevents attacks, it is mandatory that this key stays secret.
+
+.. code-block:: python
+
+    c.Authenticator.secret_key = "your-key"
+
+You should customize the email sent to users with something like
+
+.. code-block:: python
+
+    c.Authenticator.self_approval_email = ("from", "subject", "email body, including https://example.com{approval_url}")
+
+Note that you need to specify the domain where JupyterHub is running (example.com in the example above) and
+the port too, if you are using a non-standard one (e.g. 8000). Also the protocol must be the correct one
+you are serving your connections from (https in the example).
+
+Moreover, you may specify the SMTP server to use for sending the email. You can do that with
+
+.. code-block:: python
+
+    c.Authenticator.self_approval_server = {'url': 'smtp.gmail.com', 'usr': 'myself', 'pwd': 'mypassword'}
+
+If you do not specify a `self_approval_server`, it will attempt to use `localhost` without authentication.
+
+If you wish to use gmail as your SMTP server as in the example above, you must also allow
+"less secure apps" for this to work, as described at
+https://support.google.com/accounts/answer/6010255 and if you have 2FA enabled you should disable it for
+JupyterHub to be able to send emails, as described at https://support.google.com/accounts/answer/185833
+See https://stackoverflow.com/questions/16512592/login-credentials-not-working-with-gmail-smtp for additional
+gmail-specific SMTP details.
+
+Finally, all of this will correctly create and enable JupyterHub users. However the people wishing to
+login as this users, will need to have **also** accounts on the system. If the system where JupyterHub
+is running is one of the most common Linux distributions, adding the following to the config file
+will automatically create their Linux account the first time they log in JupyterHub. If the system
+where JupyterHub is running is another OS, such as BSD or Windows, the corresponding user
+creation command must be invoked instead of useradd with the appropriate arguments.
+
+.. code-block:: python
+
+def pre_spawn_hook(spawner):
+    username = spawner.user.name
+    try:
+        import pwd
+        pwd.getpwnam(username)
+    except KeyError:
+        import subprocess
+        subprocess.check_call(['useradd', '-ms', '/bin/bash', username])
+c.Spawner.pre_spawn_hook = pre_spawn_hook
+
+
 Mandatory acceptance of Terms of Service before SignUp
 ------------------------------------------------------
 

diff --git a/nativeauthenticator/crypto/LICENSE b/nativeauthenticator/crypto/LICENSE
@@ -0,0 +1,27 @@
+Copyright (c) Django Software Foundation and individual contributors.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+    1. Redistributions of source code must retain the above copyright notice,
+       this list of conditions and the following disclaimer.
+
+    2. Redistributions in binary form must reproduce the above copyright
+       notice, this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+
+    3. Neither the name of Django nor the names of its contributors may be used
+       to endorse or promote products derived from this software without
+       specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/nativeauthenticator/crypto/crypto.py b/nativeauthenticator/crypto/crypto.py
@@ -0,0 +1,78 @@
+"""
+Django's standard crypto functions and utilities.
+"""
+import hashlib
+import hmac
+import secrets
+
+from .encoding import force_bytes
+
+
+class InvalidAlgorithm(ValueError):
+    """Algorithm is not supported by hashlib."""
+    pass
+
+
+def salted_hmac(key_salt, value, secret, *, algorithm='sha1'):
+    """
+    Return the HMAC of 'value', using a key generated from key_salt and a
+    secret. Default algorithm is SHA1,
+    but any algorithm name supported by hashlib can be passed.
+
+    A different key_salt should be passed in for every application of HMAC.
+    """
+
+    key_salt = force_bytes(key_salt)
+    secret = force_bytes(secret)
+    try:
+        hasher = getattr(hashlib, algorithm)
+    except AttributeError as e:
+        raise InvalidAlgorithm(
+            '%r is not an algorithm accepted by the hashlib module.'
+            % algorithm
+        ) from e
+    # We need to generate a derived key from our base key.  We can do this by
+    # passing the key_salt and our base key through a pseudo-random function.
+    key = hasher(key_salt + secret).digest()
+    # If len(key_salt + secret) > block size of the hash algorithm, the above
+    # line is redundant and could be replaced by key = key_salt + secret, since
+    # the hmac module does the same thing for keys longer than the block size.
+    # However, we need to ensure that we *always* do this.
+    return hmac.new(key, msg=force_bytes(value), digestmod=hasher)
+
+
+RANDOM_STRING_CHARS = \
+        'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
+
+
+def get_random_string(length, allowed_chars=RANDOM_STRING_CHARS):
+    """
+    Return a securely generated random string.
+
+    The bit length of the returned value can be calculated with the formula:
+        log_2(len(allowed_chars)^length)
+
+    For example, with default `allowed_chars` (26+26+10), this gives:
+      * length: 12, bit length =~ 71 bits
+      * length: 22, bit length =~ 131 bits
+    """
+    return ''.join(secrets.choice(allowed_chars) for i in range(length))
+
+
+def constant_time_compare(val1, val2):
+    """Return True if the two strings are equal, False otherwise."""
+    return secrets.compare_digest(force_bytes(val1), force_bytes(val2))
+
+
+def pbkdf2(password, salt, iterations, dklen=0, digest=None):
+    """Return the hash of password using pbkdf2."""
+    if digest is None:
+        digest = hashlib.sha256
+    dklen = dklen or None
+    password = force_bytes(password)
+    salt = force_bytes(salt)
+    return hashlib.pbkdf2_hmac(digest().name,
+                               password,
+                               salt,
+                               iterations,
+                               dklen)
diff --git a/nativeauthenticator/crypto/encoding.py b/nativeauthenticator/crypto/encoding.py
@@ -0,0 +1,204 @@
+import codecs
+import datetime
+import locale
+from decimal import Decimal
+from urllib.parse import quote
+
+
+class DjangoUnicodeDecodeError(UnicodeDecodeError):
+    def __init__(self, obj, *args):
+        self.obj = obj
+        super().__init__(*args)
+
+    def __str__(self):
+        return '%s. You passed in %r (%s)' % (
+                super().__str__(),
+                self.obj,
+                type(self.obj))
+
+
+_PROTECTED_TYPES = (
+    type(None),
+    int, float, Decimal,
+    datetime.datetime,
+    datetime.date,
+    datetime.time,
+)
+
+
+def is_protected_type(obj):
+    """Determine if the object instance is of a protected type.
+
+    Objects of protected types are preserved as-is when passed to
+    force_str(strings_only=True).
+    """
+    return isinstance(obj, _PROTECTED_TYPES)
+
+
+def force_str(s, encoding='utf-8', strings_only=False, errors='strict'):
+    """
+    Similar to smart_str(), except that lazy instances are resolved to
+    strings, rather than kept as lazy objects.
+
+    If strings_only is True, don't convert (some) non-string-like objects.
+    """
+    # Handle the common case first for performance reasons.
+    if issubclass(type(s), str):
+        return s
+    if strings_only and is_protected_type(s):
+        return s
+    try:
+        if isinstance(s, bytes):
+            s = str(s, encoding, errors)
+        else:
+            s = str(s)
+    except UnicodeDecodeError as e:
+        raise DjangoUnicodeDecodeError(s, *e.args)
+    return s
+
+
+def force_bytes(s, encoding='utf-8', strings_only=False, errors='strict'):
+    """
+    Similar to smart_bytes, except that lazy instances are resolved to
+    strings, rather than kept as lazy objects.
+
+    If strings_only is True, don't convert (some) non-string-like objects.
+    """
+    # Handle the common case first for performance reasons.
+    if isinstance(s, bytes):
+        if encoding == 'utf-8':
+            return s
+        else:
+            return s.decode('utf-8', errors).encode(encoding, errors)
+    if strings_only and is_protected_type(s):
+        return s
+    if isinstance(s, memoryview):
+        return bytes(s)
+    return str(s).encode(encoding, errors)
+
+
+# List of byte values that uri_to_iri() decodes from percent encoding.
+# First, the unreserved characters from RFC 3986:
+_ascii_ranges = [[45, 46, 95, 126], range(65, 91), range(97, 123)]
+_hextobyte = {
+    (fmt % char).encode(): bytes((char,))
+    for ascii_range in _ascii_ranges
+    for char in ascii_range
+    for fmt in ['%02x', '%02X']
+}
+# And then everything above 128, because bytes ≥ 128 are part of multibyte
+# Unicode characters.
+_hexdig = '0123456789ABCDEFabcdef'
+_hextobyte.update({
+    (a + b).encode(): bytes.fromhex(a + b)
+    for a in _hexdig[8:] for b in _hexdig
+})
+
+
+def uri_to_iri(uri):
+    """
+    Convert a Uniform Resource Identifier(URI) into an Internationalized
+    Resource Identifier(IRI).
+
+    This is the algorithm from section 3.2 of RFC 3987, excluding step 4.
+
+    Take an URI in ASCII bytes (e.g. '/I%20%E2%99%A5%20Django/') and return
+    a string containing the encoded result (e.g. '/I%20♥%20Django/').
+    """
+    if uri is None:
+        return uri
+    uri = force_bytes(uri)
+    # Fast selective unquote: First, split on '%' and then starting with the
+    # second block, decode the first 2 bytes if they represent a hex code to
+    # decode. The rest of the block is the part after '%AB', not containing
+    # any '%'. Add that to the output without further processing.
+    bits = uri.split(b'%')
+    if len(bits) == 1:
+        iri = uri
+    else:
+        parts = [bits[0]]
+        append = parts.append
+        hextobyte = _hextobyte
+        for item in bits[1:]:
+            hex = item[:2]
+            if hex in hextobyte:
+                append(hextobyte[item[:2]])
+                append(item[2:])
+            else:
+                append(b'%')
+                append(item)
+        iri = b''.join(parts)
+    return repercent_broken_unicode(iri).decode()
+
+
+def escape_uri_path(path):
+    """
+    Escape the unsafe characters from the path portion of a Uniform Resource
+    Identifier (URI).
+    """
+    # These are the "reserved" and "unreserved" characters specified in
+    # sections 2.2 and 2.3 of RFC 2396:
+    #   reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ","
+    #   unreserved  = alphanum | mark
+    #   mark        = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
+    # The list of safe characters here is constructed subtracting ";", "=",
+    # and "?" according to section 3.3 of RFC 2396.
+    # The reason for not subtracting and escaping "/" is that we are escaping
+    # the entire path, not a path segment.
+    return quote(path, safe="/:@&+$,-_.!~*'()")
+
+
+def punycode(domain):
+    """Return the Punycode of the given domain if it's non-ASCII."""
+    return domain.encode('idna').decode('ascii')
+
+
+def repercent_broken_unicode(path):
+    """
+    As per section 3.2 of RFC 3987, step three of converting a URI into an IRI,
+    repercent-encode any octet produced that is not part of a strictly legal
+    UTF-8 octet sequence.
+    """
+    while True:
+        try:
+            path.decode()
+        except UnicodeDecodeError as e:
+            # CVE-2019-14235: A recursion shouldn't be used since the exception
+            # handling uses massive amounts of memory
+            repercent = quote(path[e.start:e.end],
+                              safe=b"/#%[]=:;$&()+,!?*@'~")
+            path = path[:e.start] + repercent.encode() + path[e.end:]
+        else:
+            return path
+
+
+def filepath_to_uri(path):
+    """Convert a file system path to a URI portion that is suitable for
+    inclusion in a URL.
+
+    Encode certain chars that would normally be recognized as special chars
+    for URIs. Do not encode the ' character, as it is a valid character
+    within URIs. See the encodeURIComponent() JavaScript function for details.
+    """
+    if path is None:
+        return path
+    # I know about `os.sep` and `os.altsep` but I want to leave
+    # some flexibility for hardcoding separators.
+    return quote(str(path).replace("\\", "/"), safe="/~!*()'")
+
+
+def get_system_encoding():
+    """
+    The encoding of the default system locale. Fallback to 'ascii' if the
+    #encoding is unsupported by Python or could not be determined. See tickets
+    #10335 and #5846.
+    """
+    try:
+        encoding = locale.getdefaultlocale()[1] or 'ascii'
+        codecs.lookup(encoding)
+    except Exception:
+        encoding = 'ascii'
+    return encoding
+
+
+DEFAULT_LOCALE_ENCODING = get_system_encoding()