Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add platform alerts #57

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion cli/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions cli/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ docopt = "^0.6.2"
types-docopt = "^0.6.11.4"
pydantic-settings = "^2.2.1"
pyjwt = {extras = ["crypto"], version = "^2.8.0"}
pytimeparse2 = "^1.7.1"

[tool.poetry.group.dev.dependencies]
black = "^24.1.1"
Expand Down
173 changes: 173 additions & 0 deletions cli/tenzir_platform/subcommand_alert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
# SPDX-FileCopyrightText: (c) 2024 The Tenzir Contributors
# SPDX-License-Identifier: BSD-3-Clause

"""Usage:
tenzir-platform alert add <node> <duration> <webhook_url> [<webhook_body>]
tenzir-platform alert delete <alert_id>
tenzir-platform alert list

Options:
<node> The node to be monitored.
<duration> The amount of time to wait before triggering the alert.
<webhook_url> The URL to call when the alert triggers
<webhook_body> The body to send along with the webhook. Must be valid JSON.

Description:
tenzir-platform alert add <node> <duration> <webhook>
Add a new alert to the platform.

tenzir-platform alert delete <alert_id>
Delete the specified alert.

tenzir-platform alert list
List all configured alerts.
"""

from tenzir_platform.helpers.cache import load_current_workspace
from tenzir_platform.helpers.client import AppClient
from tenzir_platform.helpers.environment import PlatformEnvironment
from pydantic import BaseModel
from docopt import docopt
from typing import Optional, List
from requests import HTTPError
from pytimeparse2 import parse as parse_duration
import json
import time
import tempfile
import os
import subprocess
import re
import random
import datetime


def _is_node_id(identifier: str):
return bool(re.match(r"^n-[a-z0-9]{8}$", identifier))


def _get_node_list(client: AppClient, workspace_id: str) -> List:
resp = client.post(
"list-nodes",
json={
"tenant_id": workspace_id,
},
)
resp.raise_for_status()
return resp.json()["nodes"]


def _resolve_node_identifier(
client: AppClient, workspace_id: str, identifier: str
) -> str:
# If we already have a node id, use that.
if _is_node_id(identifier):
return identifier

# Otherwise go through the list of nodes and look for a matching name.
nodes = _get_node_list(client, workspace_id)
name_matched = [node for node in nodes if node["name"] == identifier]
if len(name_matched) == 0:
raise Exception(f"Unknown node {identifier}")
if len(name_matched) > 1:
matching_ids = [node["node_id"] for node in name_matched]
raise Exception(f"Ambigous name {identifier} is shared by nodes {matching_ids}")
return name_matched[0]["node_id"]


def add(
client: AppClient, workspace_id: str, node: str, duration: str, webhook_url: str, webhook_body: str
):
node_id = _resolve_node_identifier(client, workspace_id, node)
seconds = parse_duration(duration)
print(type(seconds))
if not seconds:
print(f"invalid duration: {duration}")
return
try:
json.loads(webhook_body)
except:
print(f"body must be valid json")
return
resp = client.post(
"alert/add",
json={
"tenant_id": workspace_id,
"node_id": node_id,
"duration": seconds,
"webhook_url": webhook_url,
"webhook_body": webhook_body,
},
)
resp.raise_for_status()
print(json.dumps(resp.json()))


def delete(client: AppClient, workspace_id: str, alert_id: str):
resp = client.post(
"alert/delete",
json={
"tenant_id": workspace_id,
"alert_id": alert_id,
},
)
resp.raise_for_status()
print(f"deleted alert {alert_id}")


def list(
client: AppClient,
workspace_id: str,
):
resp = client.post(
"alert/list",
json={
"tenant_id": workspace_id,
},
)
resp.raise_for_status()
alerts = resp.json()["alerts"]
if len(alerts) == 0:
print("no alerts configured")
return
print("Alert Node Trigger Url")
for alert in alerts:
alert_id = alert["id"]
duration = alert["duration"]
node = alert["node_id"]
url = alert["webhook_url"]
print(f"{alert_id}\t{node}\t{duration}s\t{url}")


def alert_subcommand(platform: PlatformEnvironment, argv):
args = docopt(__doc__, argv=argv)
try:
workspace_id, user_key = load_current_workspace(platform)
client = AppClient(platform=platform)
client.workspace_login(user_key)
except Exception as e:
print(f"error: {e}")
print(
"Failed to load current workspace, please run 'tenzir-platform workspace select' first"
)
exit(1)

try:
if args["add"]:
node = args["<node>"]
duration = args["<duration>"]
webhook_url = args["<webhook_url>"]
webhook_body = args.get("<webhook_body>", f'{{"text": "Node $NODE_ID disconnected for more than {duration}s"}}')
assert json.loads(webhook_body), "body must be valid json"
add(client, workspace_id, node, duration, webhook_url, webhook_body)
elif args["delete"]:
alert = args["<alert_id>"]
delete(client, workspace_id, alert)
elif args["list"]:
list(client, workspace_id)
except HTTPError as e:
if e.response.status_code == 403:
print(
"Access denied. Please try re-authenticating by running 'tenzir-platform workspace select'"
)
else:
print(f"Error communicating with the platform: {e}")
4 changes: 4 additions & 0 deletions cli/tenzir_platform/tenzir_platform.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
auth Authenticate the current user
workspace Select the currently used workspace
node Interact with nodes
alert Configure alerts for disconnected nodes.
admin Administer local on-prem platform infrastructure.

See 'tenzir-platform <command> --help' for more information on a specific command.
Expand All @@ -29,6 +30,7 @@
import importlib.metadata

from tenzir_platform.subcommand_auth import auth_subcommand
from tenzir_platform.subcommand_alert import alert_subcommand
from tenzir_platform.subcommand_workspace import workspace_subcommand
from tenzir_platform.subcommand_node import node_subcommand
from tenzir_platform.subcommand_admin import admin_subcommand
Expand All @@ -47,6 +49,8 @@ def main():
argv = [arguments["<command>"]] + arguments["<args>"]
if arguments["<command>"] == "auth":
auth_subcommand(platform, argv)
elif arguments["<command>"] == "alert":
alert_subcommand(platform, argv)
elif arguments["<command>"] == "workspace":
workspace_subcommand(platform, argv)
elif arguments["<command>"] == "node":
Expand Down
2 changes: 1 addition & 1 deletion components/tenant-manager