Skip to content

Commit

Permalink
MM-60630: Automated loadtests configuration templates (#860)
Browse files Browse the repository at this point in the history
* Remove proxy's linux-tools-aws-lts-22.04 as well

* Format cluster.tf

* make assets

* Add CI base configuration

* Use the latest load-test tool release

* Use an empty string instead of "undefined"
  • Loading branch information
agarciamontoro authored Dec 3, 2024
1 parent 5cf2fc2 commit deadfc9
Show file tree
Hide file tree
Showing 8 changed files with 334 additions and 10 deletions.
12 changes: 6 additions & 6 deletions deployment/terraform/assets/bindata.go

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions deployment/terraform/assets/cluster.tf
Original file line number Diff line number Diff line change
Expand Up @@ -337,8 +337,8 @@ resource "aws_rds_cluster_instance" "cluster_instances" {
}

resource "aws_db_parameter_group" "db_params_group" {
name_prefix = "${var.cluster_name}-db-pg"
family = var.db_instance_engine == "aurora-mysql" ? "aurora-mysql8.0" : "aurora-postgresql14"
name_prefix = "${var.cluster_name}-db-pg"
family = var.db_instance_engine == "aurora-mysql" ? "aurora-mysql8.0" : "aurora-postgresql14"
dynamic "parameter" {
for_each = var.db_parameters
content {
Expand Down Expand Up @@ -633,7 +633,7 @@ resource "aws_security_group" "redis" {
security_groups = [aws_security_group.app[0].id, aws_security_group.metrics[0].id]
}

count = var.redis_enabled ? 1 : 0
count = var.redis_enabled ? 1 : 0
}

resource "aws_security_group" "elastic" {
Expand Down
2 changes: 1 addition & 1 deletion deployment/terraform/assets/provisioners/proxy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ do
sudo apt-get -y update && \
sudo apt-get install -y nginx && \
sudo apt-get install -y prometheus-node-exporter && \
sudo apt-get install -y numactl linux-tools-aws linux-tools-aws-lts-22.04 && \
sudo apt-get install -y numactl linux-tools-aws && \
sudo systemctl daemon-reload && \
sudo systemctl enable nginx && \
sudo mkdir -p /etc/nginx/snippets && \
Expand Down
1 change: 1 addition & 0 deletions examples/config/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
This directory contains sets of configuration templates that we use in different scenarios. Some fields are hard-coded to the values we use in our day-to-day processes (e.g. the path to the SSH keys), and others are marked as `#TBD` because they may change from run to run (e.g. the URLs to download Mattermost from). In any case, these sets can serve as starter packs for other, different workflows. For now, we have:
- [Release testing](./release): configuration used when testing a new release of the load-test tool.
- [Performance comparison](./perfcomp): configuration used for regression testing of new Mattermost releases. The results of these runs can be found in the [`performance-reports` repository](https://github.com/mattermost/performance-reports/tree/main/performance-comparisons).
- [CI](./ci): configuration template used by Mattermost's CI pipeline to run automated load-test comparisons in pull requests.
22 changes: 22 additions & 0 deletions examples/config/ci/comparison.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"BaseBuild": {
"Label": "base",
"URL": ""
},
"NewBuild": {
"Label": "new",
"URL": ""
},
"LoadTests": [
{
"Type": "unbounded",
"DBEngine": "postgresql",
"DBDumpURL": "https://lt-public-data.s3.us-east-1.amazonaws.com/20M_710_psql.sql.gz"
}
],
"Output": {
"UploadDashboard": true,
"GenerateGraphs": true,
"GenerateReport": true
}
}
46 changes: 46 additions & 0 deletions examples/config/ci/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
{
"ConnectionConfiguration": {
"ServerURL": "http://localhost:8065",
"WebSocketURL": "ws://localhost:8065",
"AdminEmail": "[email protected]",
"AdminPassword": "Sys@dmin-sample1"
},
"UserControllerConfiguration": {
"Type": "simulative",
"RatesDistribution": [
{
"Rate": 5.4,
"Percentage": 1
}
]
},
"InstanceConfiguration": {
"NumTeams": 2,
"NumChannels": 0,
"NumPosts": 0,
"NumReactions": 0,
"NumAdmins": 0,
"PercentReplies": 0.5,
"PercentRepliesInLongThreads": 0.05,
"PercentPublicChannels": 1,
"PercentPrivateChannels": 0,
"PercentDirectChannels": 0,
"PercentGroupChannels": 0
},
"UsersConfiguration": {
"InitialActiveUsers": 0,
"UsersFilePath": "",
"MaxActiveUsers": 2000,
"AvgSessionsPerUser": 1
},
"LogSettings": {
"EnableConsole": true,
"ConsoleLevel": "DEBUG",
"ConsoleJson": false,
"EnableFile": true,
"FileLevel": "DEBUG",
"FileJson": true,
"FileLocation": "ltagent.log",
"EnableColor": true
}
}
102 changes: 102 additions & 0 deletions examples/config/ci/coordinator.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
{
"ClusterConfig": {
"Agents": [
{
"Id": "lt0",
"ApiURL": "http://localhost:4000"
}
],
"MaxActiveUsers": 12000
},
"MonitorConfig": {
"PrometheusURL": "http://localhost:9090",
"UpdateIntervalMs": 2000,
"Queries": [
{
"Description": "Percentage of HTTP 5xx server errors",
"Legend": "Percent",
"Query": "(sum(rate(mattermost_api_time_count{status_code=~\"5..\"}[1m]))/sum(rate(mattermost_api_time_count[1m])))*100",
"Threshold": 0.025,
"MinIntervalSec": 60,
"Alert": true
},
{
"Description": "Average client request duration",
"Legend": "Avg duration (s)",
"Query": "sum(rate(loadtest_http_request_time_sum[1m]))/sum(rate(loadtest_http_request_time_count[1m]))",
"Threshold": 0.1,
"MinIntervalSec": 60,
"Alert": true
},
{
"Description": "99th percentile of client request duration",
"Legend": "P99 duration (s)",
"Query": "histogram_quantile(0.99, sum(rate(loadtest_http_request_time_bucket[1m])) by (le))",
"Threshold": 2.0,
"MinIntervalSec": 60,
"Alert": true
},
{
"Description": "Percentage of HTTP 5xx client errors",
"Legend": "Percent",
"Query": "(sum(rate(loadtest_http_errors_total{status_code=~\"5..\"}[1m]))/sum(rate(loadtest_http_request_time_count[1m])))*100",
"Threshold": 0.025,
"MinIntervalSec": 60,
"Alert": true
},
{
"Description": "Percentage of client timeouts",
"Legend": "Percent",
"Query": "(sum(rate(loadtest_http_timeouts_total[1m]))/sum(rate(loadtest_http_request_time_count[1m]))) * 100",
"Threshold": 0.025,
"MinIntervalSec": 60,
"Alert": true
},
{
"Description": "CPU utilization - Average of app nodes",
"Legend": "Percent",
"Query": "100 - 100 * (avg(irate(node_cpu_seconds_total{instance=~\"app.*\",mode=\"idle\"}[5m])))",
"Threshold": 85,
"MinIntervalSec": 60,
"Alert": true
},
{
"Description": "Memory utilization - Average of app nodes",
"Legend": "Percent",
"Query": "100 - 100 * avg(node_memory_MemAvailable_bytes{instance=~\"app.*\"} / node_memory_MemTotal_bytes{instance=~\"app.*\"})",
"Threshold": 85,
"MinIntervalSec": 60,
"Alert": true
},
{
"Description": "Percentage of TCP retransmissions in the app nodes",
"Legend": "Percent",
"Query": "(avg(rate(node_netstat_Tcp_RetransSegs{instance=~\"app.*\"}[1m])) / avg(rate(node_netstat_Tcp_OutSegs{instance=~\"app.*\"}[1m]))) * 100",
"Threshold": 0.5,
"MinIntervalSec": 60,
"Alert": true
},
{
"Description": "Percentage of TCP retransmissions in the proxy node",
"Legend": "Percent",
"Query": "(avg(rate(node_netstat_Tcp_RetransSegs{instance=~\"proxy:9100\"}[1m])) / avg(rate(node_netstat_Tcp_OutSegs{instance=~\"proxy:9100\"}[1m]))) * 100",
"Threshold": 0.5,
"MinIntervalSec": 60,
"Alert": true
}
]
},
"NumUsersInc": 8,
"NumUsersDec": 8,
"RestTimeSec": 1,
"LogSettings": {
"EnableConsole": true,
"ConsoleLevel": "INFO",
"ConsoleJson": false,
"EnableFile": true,
"FileLevel": "INFO",
"FileJson": true,
"FileLocation": "ltcoordinator.log",
"EnableColor": false
}
}
153 changes: 153 additions & 0 deletions examples/config/ci/deployer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
{
"AWSProfile": "",
"AWSRegion": "us-east-1",
"AWSAvailabilityZone": "",
"AWSAMI": "ami-003d3d03cfe1b0468",
"ClusterName": "",
"ClusterVpcID": "",
"AppInstanceCount": 2,
"AppInstanceType": "c7i.large",
"MetricsInstanceType": "t3.xlarge",
"AgentInstanceCount": 6,
"AgentInstanceType": "c7i.xlarge",
"ElasticSearchSettings": {
"InstanceCount": 0,
"InstanceType": "r6g.large.search",
"Version": "Elasticsearch_7.10",
"CreateRole": false,
"SnapshotRepository": "",
"SnapshotName": "",
"RestoreTimeoutMinutes": 45,
"ClusterTimeoutMinutes": 45
},
"RedisSettings": {
"Enabled": false,
"NodeType": "cache.m7g.2xlarge",
"ParameterGroupName": "default.redis7",
"EngineVersion": "7.1"
},
"JobServerSettings":{
"InstanceCount": 0,
"InstanceType": "c7i.xlarge"
},
"EnableAgentFullLogs": true,
"ProxyInstanceCount": 1,
"ProxyInstanceType": "c7i.xlarge",
"SSHPublicKey": "/home/runner/.ssh/mattermost-load-test.pub",
"TerraformStateDir" : "/home/runner/ltstatus",
"S3BucketDumpURI" : "",
"DBDumpURI": "",
"SiteURL": "",
"EnableNetPeekMetrics": false,
"TerraformDBSettings": {
"InstanceCount": 2,
"InstanceEngine": "aurora-postgresql",
"InstanceType": "db.r7g.xlarge",
"UserName": "mmuser",
"Password": "mostest80098bigpass_",
"EnablePerformanceInsights": true,
"DBParameters": [],
"ClusterIdentifier": ""
},
"ExternalDBSettings": {
"DriverName": "cockroach",
"DataSource": "",
"DataSourceReplicas": [],
"DataSourceSearchReplicas": []
},
"ExternalBucketSettings": {
"AmazonS3AccessKeyId": "",
"AmazonS3SecretAccessKey": "",
"AmazonS3Bucket": "",
"AmazonS3PathPrefix": "",
"AmazonS3Region": "us-east-1",
"AmazonS3Endpoint": "s3.amazonaws.com",
"AmazonS3SSL": true,
"AmazonS3SignV2": false,
"AmazonS3SSE": false
},
"ExternalAuthProviderSettings": {
"Enabled": false,
"KeycloakAdminUser": "mmadmin",
"KeycloakAdminPassword": "mmpass",
"KeycloakRealmFilePath": "",
"KeycloakDBDumpURI": "",
"GenerateUsersCount": 0
},
"MattermostDownloadURL": "https://latest.mattermost.com/mattermost-enterprise-linux",
"MattermostLicenseFile": "/home/runner/mattermost-load-test",
"MattermostConfigPatchFile": "",
"AdminEmail": "[email protected]",
"AdminUsername": "sysadmin",
"AdminPassword": "Sys@dmin-sample1",
"LoadTestDownloadURL": "https://github.com/mattermost/mattermost-load-test-ng/releases/download/v1.23.0/mattermost-load-test-ng-v1.23.0-linux-amd64.tar.gz",
"LogSettings": {
"EnableConsole": true,
"ConsoleLevel": "INFO",
"ConsoleJson": false,
"EnableFile": true,
"FileLevel": "INFO",
"FileJson": true,
"FileLocation": "deployer.log",
"EnableColor": true
},
"Report": {
"Label": "{instance=~\"app.*\"}",
"GraphQueries": [
{
"Name": "CPU Utilization",
"Query": "avg(rate(mattermost_process_cpu_seconds_total{instance=~\"app.*\"}[1m])* 100)"
},
{
"Name": "Heap In Use",
"Query": "avg(go_memstats_heap_inuse_bytes{instance=~\"app.*:8067\"})"
},
{
"Name": "Stack In Use",
"Query": "avg(go_memstats_stack_inuse_bytes{instance=~\"app.*:8067\"})"
},
{
"Name": "Goroutines In Use",
"Query": "sum(go_goroutines{instance=~\"app.*:8067\"})"
},
{
"Name": "RPS",
"Query": "sum(rate(mattermost_http_requests_total{instance=~\"app.*:8067\"}[1m]))"
},
{
"Name": "Avg Store times",
"Query": "sum(rate(mattermost_db_store_time_sum{instance=~\"app.*:8067\"}[1m])) / sum(rate(mattermost_db_store_time_count{instance=~\"app.*:8067\"}[1m]))"
},
{
"Name": "P99 Store times",
"Query": "histogram_quantile(0.99, sum(rate(mattermost_db_store_time_bucket[1m])) by (le))"
},
{
"Name": "Avg API times",
"Query": "sum(rate(mattermost_api_time_sum[1m])) / sum(rate(mattermost_api_time_count[1m]))"
},
{
"Name": "P99 API times",
"Query": "histogram_quantile(0.99, sum(rate(mattermost_api_time_bucket[1m])) by (le))"
}
]
},
"StorageSizes": {
"Agent": 10,
"Proxy": 10,
"App": 15,
"Metrics": 50,
"Job": 50,
"ElasticSearch": 20
},
"PyroscopeSettings": {
"EnableAppProfiling": true,
"EnableAgentProfiling": true,
"BlockProfileRate": 0
},
"CustomTags": {
"Origin": "ci-automated-loadtests",
"PR": "",
"SHA": ""
}
}

0 comments on commit deadfc9

Please sign in to comment.