Skip to content

Commit

Permalink
Add platform alerts (#57)
Browse files Browse the repository at this point in the history
Add cli support for platform alerts
  • Loading branch information
lava authored Nov 15, 2024
1 parent e19d0af commit 360ab66
Show file tree
Hide file tree
Showing 4 changed files with 195 additions and 1 deletion.
16 changes: 15 additions & 1 deletion cli/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions cli/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ docopt = "^0.6.2"
types-docopt = "^0.6.11.4"
pydantic-settings = "^2.2.1"
pyjwt = {extras = ["crypto"], version = "^2.8.0"}
pytimeparse2 = "^1.7.1"

[tool.poetry.group.dev.dependencies]
black = "^24.1.1"
Expand Down
175 changes: 175 additions & 0 deletions cli/tenzir_platform/subcommand_alert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
# SPDX-FileCopyrightText: (c) 2024 The Tenzir Contributors
# SPDX-License-Identifier: BSD-3-Clause

"""Usage:
tenzir-platform alert add <node> <duration> <webhook_url> [<webhook_body>]
tenzir-platform alert delete <alert_id>
tenzir-platform alert list
Options:
<node> The node to be monitored.
<duration> The amount of time to wait before triggering the alert.
<webhook_url> The URL to call when the alert triggers
<webhook_body> The body to send along with the webhook. Must be valid JSON.
Description:
tenzir-platform alert add <node> <duration> <webhook>
Add a new alert to the platform.
tenzir-platform alert delete <alert_id>
Delete the specified alert.
tenzir-platform alert list
List all configured alerts.
"""

from tenzir_platform.helpers.cache import load_current_workspace
from tenzir_platform.helpers.client import AppClient
from tenzir_platform.helpers.environment import PlatformEnvironment
from pydantic import BaseModel
from docopt import docopt
from typing import Optional, List
from requests import HTTPError
from pytimeparse2 import parse as parse_duration
import json
import time
import tempfile
import os
import subprocess
import re
import random
import datetime


def _is_node_id(identifier: str):
return bool(re.match(r"^n-[a-z0-9]{8}$", identifier))


def _get_node_list(client: AppClient, workspace_id: str) -> List:
resp = client.post(
"list-nodes",
json={
"tenant_id": workspace_id,
},
)
resp.raise_for_status()
return resp.json()["nodes"]


def _resolve_node_identifier(
client: AppClient, workspace_id: str, identifier: str
) -> str:
# If we already have a node id, use that.
if _is_node_id(identifier):
return identifier

# Otherwise go through the list of nodes and look for a matching name.
nodes = _get_node_list(client, workspace_id)
name_matched = [node for node in nodes if node["name"] == identifier]
if len(name_matched) == 0:
raise Exception(f"Unknown node {identifier}")
if len(name_matched) > 1:
matching_ids = [node["node_id"] for node in name_matched]
raise Exception(f"Ambigous name {identifier} is shared by nodes {matching_ids}")
return name_matched[0]["node_id"]


def add(
client: AppClient, workspace_id: str, node: str, duration: str, webhook_url: str, webhook_body: str
):
node_id = _resolve_node_identifier(client, workspace_id, node)
seconds = parse_duration(duration)
print(type(seconds))
if not seconds:
print(f"invalid duration: {duration}")
return
try:
json.loads(webhook_body)
except:
print(f"body must be valid json")
return
resp = client.post(
"alert/add",
json={
"tenant_id": workspace_id,
"node_id": node_id,
"duration": seconds,
"webhook_url": webhook_url,
"webhook_body": webhook_body,
},
)
resp.raise_for_status()
print(json.dumps(resp.json()))


def delete(client: AppClient, workspace_id: str, alert_id: str):
resp = client.post(
"alert/delete",
json={
"tenant_id": workspace_id,
"alert_id": alert_id,
},
)
resp.raise_for_status()
print(f"deleted alert {alert_id}")


def list(
client: AppClient,
workspace_id: str,
):
resp = client.post(
"alert/list",
json={
"tenant_id": workspace_id,
},
)
resp.raise_for_status()
alerts = resp.json()["alerts"]
if len(alerts) == 0:
print("no alerts configured")
return
print("Alert Node Trigger Url")
for alert in alerts:
alert_id = alert["id"]
duration = alert["duration"]
node = alert["node_id"]
url = alert["webhook_url"]
print(f"{alert_id}\t{node}\t{duration}s\t{url}")


def alert_subcommand(platform: PlatformEnvironment, argv):
args = docopt(__doc__, argv=argv)
try:
workspace_id, user_key = load_current_workspace(platform)
client = AppClient(platform=platform)
client.workspace_login(user_key)
except Exception as e:
print(f"error: {e}")
print(
"Failed to load current workspace, please run 'tenzir-platform workspace select' first"
)
exit(1)

try:
if args["add"]:
node = args["<node>"]
duration = args["<duration>"]
webhook_url = args["<webhook_url>"]
webhook_body = args["<webhook_body>"]
if webhook_body is None:
webhook_body = f'{{"text": "Node $NODE_ID disconnected for more than {duration}s"}}'
assert json.loads(webhook_body), "body must be valid json"
add(client, workspace_id, node, duration, webhook_url, webhook_body)
elif args["delete"]:
alert = args["<alert_id>"]
delete(client, workspace_id, alert)
elif args["list"]:
list(client, workspace_id)
except HTTPError as e:
if e.response.status_code == 403:
print(
"Access denied. Please try re-authenticating by running 'tenzir-platform workspace select'"
)
else:
print(f"Error communicating with the platform: {e}")
4 changes: 4 additions & 0 deletions cli/tenzir_platform/tenzir_platform.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
auth Authenticate the current user
workspace Select the currently used workspace
node Interact with nodes
alert Configure alerts for disconnected nodes.
admin Administer local on-prem platform infrastructure.
See 'tenzir-platform <command> --help' for more information on a specific command.
Expand All @@ -29,6 +30,7 @@
import importlib.metadata

from tenzir_platform.subcommand_auth import auth_subcommand
from tenzir_platform.subcommand_alert import alert_subcommand
from tenzir_platform.subcommand_workspace import workspace_subcommand
from tenzir_platform.subcommand_node import node_subcommand
from tenzir_platform.subcommand_admin import admin_subcommand
Expand All @@ -47,6 +49,8 @@ def main():
argv = [arguments["<command>"]] + arguments["<args>"]
if arguments["<command>"] == "auth":
auth_subcommand(platform, argv)
elif arguments["<command>"] == "alert":
alert_subcommand(platform, argv)
elif arguments["<command>"] == "workspace":
workspace_subcommand(platform, argv)
elif arguments["<command>"] == "node":
Expand Down

0 comments on commit 360ab66

Please sign in to comment.