Skip to content

Commit 3f9490d

Browse files
anshbansalhsheth2
andauthored
feat(cli): option to init via username password, add lint for smoke-test (datahub-project#9675)
Co-authored-by: Harshal Sheth <[email protected]>
1 parent 69d0ba1 commit 3f9490d

File tree

67 files changed

+1161
-772
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+1161
-772
lines changed

.github/workflows/docker-unified.yml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,18 @@ jobs:
6464
steps:
6565
- name: Check out the repo
6666
uses: hsheth2/sane-checkout-action@v1
67+
- uses: actions/setup-python@v4
68+
with:
69+
python-version: "3.10"
70+
cache: "pip"
71+
- name: Set up JDK 17
72+
uses: actions/setup-java@v3
73+
with:
74+
distribution: "zulu"
75+
java-version: 17
76+
- name: Run lint on smoke test
77+
run: |
78+
./gradlew :smoke-test:lint
6779
- name: Compute Tag
6880
id: tag
6981
run: |

docs/cli.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ failure_log:
180180
### init
181181
182182
The init command is used to tell `datahub` about where your DataHub instance is located. The CLI will point to localhost DataHub by default.
183-
Running `datahub init` will allow you to customize the datahub instance you are communicating with.
183+
Running `datahub init` will allow you to customize the datahub instance you are communicating with. It has an optional `--use-password` option which allows to initialise the config using username, password. We foresee this mainly being used by admins as majority of organisations will be using SSO and there won't be any passwords to use.
184184

185185
**_Note_**: Provide your GMS instance's host when the prompt asks you for the DataHub host.
186186

metadata-ingestion/src/datahub/cli/cli_utils.py

Lines changed: 102 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,11 @@
99

1010
import click
1111
import requests
12-
import yaml
1312
from deprecated import deprecated
14-
from pydantic import BaseModel, ValidationError
1513
from requests.models import Response
1614
from requests.sessions import Session
1715

16+
from datahub.cli import config_utils
1817
from datahub.emitter.aspect import ASPECT_MAP, TIMESERIES_ASPECT_MAP
1918
from datahub.emitter.request_helper import make_curl_command
2019
from datahub.emitter.serialization_helper import post_json_transform
@@ -23,13 +22,6 @@
2322

2423
log = logging.getLogger(__name__)
2524

26-
DEFAULT_GMS_HOST = "http://localhost:8080"
27-
CONDENSED_DATAHUB_CONFIG_PATH = "~/.datahubenv"
28-
DATAHUB_CONFIG_PATH = os.path.expanduser(CONDENSED_DATAHUB_CONFIG_PATH)
29-
30-
DATAHUB_ROOT_FOLDER = os.path.expanduser("~/.datahub")
31-
32-
ENV_SKIP_CONFIG = "DATAHUB_SKIP_CONFIG"
3325
ENV_METADATA_HOST_URL = "DATAHUB_GMS_URL"
3426
ENV_METADATA_HOST = "DATAHUB_GMS_HOST"
3527
ENV_METADATA_PORT = "DATAHUB_GMS_PORT"
@@ -45,104 +37,13 @@
4537
# For the methods that aren't duplicates, that logic should be moved to the client.
4638

4739

48-
class GmsConfig(BaseModel):
49-
server: str
50-
token: Optional[str] = None
51-
52-
53-
class DatahubConfig(BaseModel):
54-
gms: GmsConfig
55-
56-
57-
def get_boolean_env_variable(key: str, default: bool = False) -> bool:
58-
value = os.environ.get(key)
59-
if value is None:
60-
return default
61-
elif value.lower() in ("true", "1"):
62-
return True
63-
else:
64-
return False
65-
66-
6740
def set_env_variables_override_config(url: str, token: Optional[str]) -> None:
6841
"""Should be used to override the config when using rest emitter"""
6942
config_override[ENV_METADATA_HOST_URL] = url
7043
if token is not None:
7144
config_override[ENV_METADATA_TOKEN] = token
7245

7346

74-
def persist_datahub_config(config: dict) -> None:
75-
with open(DATAHUB_CONFIG_PATH, "w+") as outfile:
76-
yaml.dump(config, outfile, default_flow_style=False)
77-
return None
78-
79-
80-
def write_gms_config(
81-
host: str, token: Optional[str], merge_with_previous: bool = True
82-
) -> None:
83-
config = DatahubConfig(gms=GmsConfig(server=host, token=token))
84-
if merge_with_previous:
85-
try:
86-
previous_config = get_client_config(as_dict=True)
87-
assert isinstance(previous_config, dict)
88-
except Exception as e:
89-
# ok to fail on this
90-
previous_config = {}
91-
log.debug(
92-
f"Failed to retrieve config from file {DATAHUB_CONFIG_PATH}: {e}. This isn't fatal."
93-
)
94-
config_dict = {**previous_config, **config.dict()}
95-
else:
96-
config_dict = config.dict()
97-
persist_datahub_config(config_dict)
98-
99-
100-
def should_skip_config() -> bool:
101-
return get_boolean_env_variable(ENV_SKIP_CONFIG, False)
102-
103-
104-
def ensure_datahub_config() -> None:
105-
if not os.path.isfile(DATAHUB_CONFIG_PATH):
106-
click.secho(
107-
f"No {CONDENSED_DATAHUB_CONFIG_PATH} file found, generating one for you...",
108-
bold=True,
109-
)
110-
write_gms_config(DEFAULT_GMS_HOST, None)
111-
112-
113-
def get_client_config(as_dict: bool = False) -> Union[Optional[DatahubConfig], dict]:
114-
with open(DATAHUB_CONFIG_PATH, "r") as stream:
115-
try:
116-
config_json = yaml.safe_load(stream)
117-
if as_dict:
118-
return config_json
119-
try:
120-
datahub_config = DatahubConfig.parse_obj(config_json)
121-
return datahub_config
122-
except ValidationError as e:
123-
click.echo(
124-
f"Received error, please check your {CONDENSED_DATAHUB_CONFIG_PATH}"
125-
)
126-
click.echo(e, err=True)
127-
sys.exit(1)
128-
except yaml.YAMLError as exc:
129-
click.secho(f"{DATAHUB_CONFIG_PATH} malformed, error: {exc}", bold=True)
130-
return None
131-
132-
133-
def get_details_from_config():
134-
datahub_config = get_client_config(as_dict=False)
135-
assert isinstance(datahub_config, DatahubConfig)
136-
if datahub_config is not None:
137-
gms_config = datahub_config.gms
138-
139-
gms_host = gms_config.server
140-
gms_token = gms_config.token
141-
return gms_host, gms_token
142-
else:
143-
return None, None
144-
145-
14647
def get_details_from_env() -> Tuple[Optional[str], Optional[str]]:
14748
host = os.environ.get(ENV_METADATA_HOST)
14849
port = os.environ.get(ENV_METADATA_PORT)
@@ -178,12 +79,12 @@ def get_url_and_token():
17879
if len(config_override.keys()) > 0:
17980
gms_host = config_override.get(ENV_METADATA_HOST_URL)
18081
gms_token = config_override.get(ENV_METADATA_TOKEN)
181-
elif should_skip_config():
82+
elif config_utils.should_skip_config():
18283
gms_host = gms_host_env
18384
gms_token = gms_token_env
18485
else:
185-
ensure_datahub_config()
186-
gms_host_conf, gms_token_conf = get_details_from_config()
86+
config_utils.ensure_datahub_config()
87+
gms_host_conf, gms_token_conf = config_utils.get_details_from_config()
18788
gms_host = first_non_null([gms_host_env, gms_host_conf])
18889
gms_token = first_non_null([gms_token_env, gms_token_conf])
18990
return gms_host, gms_token
@@ -253,14 +154,18 @@ def parse_run_restli_response(response: requests.Response) -> dict:
253154
exit()
254155

255156
if not isinstance(response_json, dict):
256-
click.echo(f"Received error, please check your {CONDENSED_DATAHUB_CONFIG_PATH}")
157+
click.echo(
158+
f"Received error, please check your {config_utils.CONDENSED_DATAHUB_CONFIG_PATH}"
159+
)
257160
click.echo()
258161
click.echo(response_json)
259162
exit()
260163

261164
summary = response_json.get("value")
262165
if not isinstance(summary, dict):
263-
click.echo(f"Received error, please check your {CONDENSED_DATAHUB_CONFIG_PATH}")
166+
click.echo(
167+
f"Received error, please check your {config_utils.CONDENSED_DATAHUB_CONFIG_PATH}"
168+
)
264169
click.echo()
265170
click.echo(response_json)
266171
exit()
@@ -686,3 +591,95 @@ def command(ctx: click.Context) -> None:
686591
ctx.exit(1)
687592

688593
return command
594+
595+
596+
def get_session_login_as(
597+
username: str, password: str, frontend_url: str
598+
) -> requests.Session:
599+
session = requests.Session()
600+
headers = {
601+
"Content-Type": "application/json",
602+
}
603+
system_auth = get_system_auth()
604+
if system_auth is not None:
605+
session.headers.update({"Authorization": system_auth})
606+
else:
607+
data = '{"username":"' + username + '", "password":"' + password + '"}'
608+
response = session.post(f"{frontend_url}/logIn", headers=headers, data=data)
609+
response.raise_for_status()
610+
return session
611+
612+
613+
def _ensure_valid_gms_url_acryl_cloud(url: str) -> str:
614+
if "acryl.io" not in url:
615+
return url
616+
if url.startswith("http://"):
617+
url = url.replace("http://", "https://")
618+
if url.endswith("acryl.io"):
619+
url = f"{url}/gms"
620+
return url
621+
622+
623+
def fixup_gms_url(url: str) -> str:
624+
if url is None:
625+
return ""
626+
if url.endswith("/"):
627+
url = url.rstrip("/")
628+
url = _ensure_valid_gms_url_acryl_cloud(url)
629+
return url
630+
631+
632+
def guess_frontend_url_from_gms_url(gms_url: str) -> str:
633+
gms_url = fixup_gms_url(gms_url)
634+
url = gms_url
635+
if url.endswith("/gms"):
636+
url = gms_url.rstrip("/gms")
637+
if url.endswith("8080"):
638+
url = url[:-4] + "9002"
639+
return url
640+
641+
642+
def generate_access_token(
643+
username: str,
644+
password: str,
645+
gms_url: str,
646+
token_name: Optional[str] = None,
647+
validity: str = "ONE_HOUR",
648+
) -> Tuple[str, str]:
649+
frontend_url = guess_frontend_url_from_gms_url(gms_url)
650+
session = get_session_login_as(
651+
username=username,
652+
password=password,
653+
frontend_url=frontend_url,
654+
)
655+
now = datetime.now()
656+
timestamp = now.astimezone().isoformat()
657+
if token_name is None:
658+
token_name = f"cli token {timestamp}"
659+
json = {
660+
"query": """mutation createAccessToken($input: CreateAccessTokenInput!) {
661+
createAccessToken(input: $input) {
662+
accessToken
663+
metadata {
664+
id
665+
actorUrn
666+
ownerUrn
667+
name
668+
description
669+
}
670+
}
671+
}""",
672+
"variables": {
673+
"input": {
674+
"type": "PERSONAL",
675+
"actorUrn": f"urn:li:corpuser:{username}",
676+
"duration": validity,
677+
"name": token_name,
678+
}
679+
},
680+
}
681+
response = session.post(f"{frontend_url}/api/v2/graphql", json=json)
682+
response.raise_for_status()
683+
return token_name, response.json().get("data", {}).get("createAccessToken", {}).get(
684+
"accessToken", None
685+
)
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
import logging
2+
import os
3+
import sys
4+
from typing import Optional, Union
5+
6+
import click
7+
import yaml
8+
from pydantic import BaseModel, ValidationError
9+
10+
from datahub.cli.env_utils import get_boolean_env_variable
11+
12+
__help__ = (
13+
"For helper methods to contain manipulation of the config file in local system."
14+
)
15+
log = logging.getLogger(__name__)
16+
17+
DEFAULT_GMS_HOST = "http://localhost:8080"
18+
CONDENSED_DATAHUB_CONFIG_PATH = "~/.datahubenv"
19+
DATAHUB_CONFIG_PATH = os.path.expanduser(CONDENSED_DATAHUB_CONFIG_PATH)
20+
DATAHUB_ROOT_FOLDER = os.path.expanduser("~/.datahub")
21+
ENV_SKIP_CONFIG = "DATAHUB_SKIP_CONFIG"
22+
23+
24+
class GmsConfig(BaseModel):
25+
server: str
26+
token: Optional[str] = None
27+
28+
29+
class DatahubConfig(BaseModel):
30+
gms: GmsConfig
31+
32+
33+
def persist_datahub_config(config: dict) -> None:
34+
with open(DATAHUB_CONFIG_PATH, "w+") as outfile:
35+
yaml.dump(config, outfile, default_flow_style=False)
36+
return None
37+
38+
39+
def write_gms_config(
40+
host: str, token: Optional[str], merge_with_previous: bool = True
41+
) -> None:
42+
config = DatahubConfig(gms=GmsConfig(server=host, token=token))
43+
if merge_with_previous:
44+
try:
45+
previous_config = get_client_config(as_dict=True)
46+
assert isinstance(previous_config, dict)
47+
except Exception as e:
48+
# ok to fail on this
49+
previous_config = {}
50+
log.debug(
51+
f"Failed to retrieve config from file {DATAHUB_CONFIG_PATH}: {e}. This isn't fatal."
52+
)
53+
config_dict = {**previous_config, **config.dict()}
54+
else:
55+
config_dict = config.dict()
56+
persist_datahub_config(config_dict)
57+
58+
59+
def get_details_from_config():
60+
datahub_config = get_client_config(as_dict=False)
61+
assert isinstance(datahub_config, DatahubConfig)
62+
if datahub_config is not None:
63+
gms_config = datahub_config.gms
64+
65+
gms_host = gms_config.server
66+
gms_token = gms_config.token
67+
return gms_host, gms_token
68+
else:
69+
return None, None
70+
71+
72+
def should_skip_config() -> bool:
73+
return get_boolean_env_variable(ENV_SKIP_CONFIG, False)
74+
75+
76+
def ensure_datahub_config() -> None:
77+
if not os.path.isfile(DATAHUB_CONFIG_PATH):
78+
click.secho(
79+
f"No {CONDENSED_DATAHUB_CONFIG_PATH} file found, generating one for you...",
80+
bold=True,
81+
)
82+
write_gms_config(DEFAULT_GMS_HOST, None)
83+
84+
85+
def get_client_config(as_dict: bool = False) -> Union[Optional[DatahubConfig], dict]:
86+
with open(DATAHUB_CONFIG_PATH, "r") as stream:
87+
try:
88+
config_json = yaml.safe_load(stream)
89+
if as_dict:
90+
return config_json
91+
try:
92+
datahub_config = DatahubConfig.parse_obj(config_json)
93+
return datahub_config
94+
except ValidationError as e:
95+
click.echo(
96+
f"Received error, please check your {CONDENSED_DATAHUB_CONFIG_PATH}"
97+
)
98+
click.echo(e, err=True)
99+
sys.exit(1)
100+
except yaml.YAMLError as exc:
101+
click.secho(f"{DATAHUB_CONFIG_PATH} malformed, error: {exc}", bold=True)
102+
return None

0 commit comments

Comments
 (0)