From f86f589eb2fbbf6a5198fb97f2dee0d9c99a826a Mon Sep 17 00:00:00 2001 From: JoJo Date: Wed, 6 Sep 2023 12:08:02 -0400 Subject: [PATCH] Set lower connection timeout on connection pool, to avoid long running checks (#15768) * Set lower connection timeout on connection pool, to avoid long running checks * except PoolTimeout on autodiscovery & emit warning * changelog entry --- postgres/CHANGELOG.md | 4 +++ postgres/assets/configuration/spec.yaml | 7 +++++ postgres/datadog_checks/postgres/config.py | 3 ++ .../postgres/config_models/defaults.py | 4 +++ .../postgres/config_models/instance.py | 1 + .../datadog_checks/postgres/connections.py | 2 +- postgres/datadog_checks/postgres/postgres.py | 30 +++++++++++++++---- postgres/tests/test_connections.py | 9 +++++- 8 files changed, 52 insertions(+), 8 deletions(-) diff --git a/postgres/CHANGELOG.md b/postgres/CHANGELOG.md index 7ab632aa450e1..15e97f9a5ce5c 100644 --- a/postgres/CHANGELOG.md +++ b/postgres/CHANGELOG.md @@ -2,6 +2,10 @@ ## Unreleased +***Fixed***: + +* Set lower connection timeout on connection pool, to avoid long running checks ([#15768](https://github.com/DataDog/integrations-core/pull/15768)) + ## 14.2.2 / 2023-09-05 ***Fixed***: diff --git a/postgres/assets/configuration/spec.yaml b/postgres/assets/configuration/spec.yaml index d76a22a355168..798ec561b4168 100644 --- a/postgres/assets/configuration/spec.yaml +++ b/postgres/assets/configuration/spec.yaml @@ -125,6 +125,13 @@ files: value: type: integer example: 5000 + - name: connection_timeout + description: | + Sets the timeout (in ms) that the agent will wait to receive a connection from the database. + hidden: true + value: + type: integer + example: 5000 - name: idle_connection_timeout description: | Sets the timeout (in ms) a connection used in the connection pool will be idle until it is closed by the pooler. diff --git a/postgres/datadog_checks/postgres/config.py b/postgres/datadog_checks/postgres/config.py index 577093659f2d0..22aa8da18dd7f 100644 --- a/postgres/datadog_checks/postgres/config.py +++ b/postgres/datadog_checks/postgres/config.py @@ -58,6 +58,9 @@ def __init__(self, instance): '"dbname" parameter must be set OR autodiscovery must be enabled when using the "relations" parameter.' ) self.max_connections = instance.get('max_connections', 30) + connection_timeout_ms = instance.get('connection_timeout', 5000) + # Convert milliseconds to seconds and ensure a minimum of 2 seconds, which is enforced by psycopg + self.connection_timeout = max(2, connection_timeout_ms / 1000) self.tags = self._build_tags(instance.get('tags', [])) ssl = instance.get('ssl', "disable") diff --git a/postgres/datadog_checks/postgres/config_models/defaults.py b/postgres/datadog_checks/postgres/config_models/defaults.py index abffb7ef8feee..1a455ebace1e8 100644 --- a/postgres/datadog_checks/postgres/config_models/defaults.py +++ b/postgres/datadog_checks/postgres/config_models/defaults.py @@ -44,6 +44,10 @@ def instance_collect_wal_metrics(): return False +def instance_connection_timeout(): + return 5000 + + def instance_data_directory(): return '/usr/local/pgsql/data' diff --git a/postgres/datadog_checks/postgres/config_models/instance.py b/postgres/datadog_checks/postgres/config_models/instance.py index 8adc4f098be7d..a9693c78d87ca 100644 --- a/postgres/datadog_checks/postgres/config_models/instance.py +++ b/postgres/datadog_checks/postgres/config_models/instance.py @@ -177,6 +177,7 @@ class InstanceConfig(BaseModel): collect_schemas: Optional[CollectSchemas] = None collect_settings: Optional[CollectSettings] = None collect_wal_metrics: Optional[bool] = None + connection_timeout: Optional[int] = None custom_queries: Optional[tuple[MappingProxyType[str, Any], ...]] = None data_directory: Optional[str] = None database_autodiscovery: Optional[DatabaseAutodiscovery] = None diff --git a/postgres/datadog_checks/postgres/connections.py b/postgres/datadog_checks/postgres/connections.py index 6f952fc7cec3e..f725d30d88669 100644 --- a/postgres/datadog_checks/postgres/connections.py +++ b/postgres/datadog_checks/postgres/connections.py @@ -144,7 +144,7 @@ def get_connection(self, dbname: str, ttl_ms: int, timeout: int = None, persiste """ with self._mu: pool = self._get_connection_pool(dbname=dbname, ttl_ms=ttl_ms, timeout=timeout, persistent=persistent) - db = pool.getconn(timeout=timeout) + db = pool.getconn() try: yield db finally: diff --git a/postgres/datadog_checks/postgres/postgres.py b/postgres/datadog_checks/postgres/postgres.py index 57dbd8f3b6df9..40c4b6d632aa0 100644 --- a/postgres/datadog_checks/postgres/postgres.py +++ b/postgres/datadog_checks/postgres/postgres.py @@ -9,7 +9,7 @@ import psycopg from cachetools import TTLCache from psycopg.rows import dict_row -from psycopg_pool import ConnectionPool +from psycopg_pool import ConnectionPool, PoolTimeout from six import iteritems from datadog_checks.base import AgentCheck @@ -542,10 +542,20 @@ def _collect_relations_autodiscovery(self, instance_tags, relations_scopes): start_time = time() databases = self.autodiscovery.get_items() for db in databases: - with self.db_pool.get_connection(db, self._config.idle_connection_timeout) as conn: - with conn.cursor() as cursor: - for scope in relations_scopes: - self._query_scope(cursor, scope, instance_tags, False, db) + try: + with self.db_pool.get_connection(db, self._config.idle_connection_timeout) as conn: + with conn.cursor() as cursor: + for scope in relations_scopes: + self._query_scope(cursor, scope, instance_tags, False, db) + except PoolTimeout: + self.log.warning( + "Unable to establish connection to %s in %d. " + "If you wish to exclude this database from autodiscovery, " + "add it to the `exclude` list", + db, + self._config.connection_timeout, + ) + elapsed_ms = (time() - start_time) * 1000 self.histogram( "dd.postgres._collect_relations_autodiscovery.time", @@ -651,6 +661,7 @@ def _new_connection(self, dbname: str, min_pool_size: int = 1, max_pool_size: in kwargs=args, open=True, name=dbname, + timeout=self._config.connection_timeout, ) else: password = self._config.password @@ -688,7 +699,14 @@ def _new_connection(self, dbname: str, min_pool_size: int = 1, max_pool_size: in if self._config.ssl_password: conn_args['sslpassword'] = self._config.ssl_password args.update(conn_args) - pool = ConnectionPool(min_size=min_pool_size, max_size=max_pool_size, kwargs=args, open=True, name=dbname) + pool = ConnectionPool( + min_size=min_pool_size, + max_size=max_pool_size, + kwargs=args, + open=True, + name=dbname, + timeout=self._config.connection_timeout, + ) return pool def _connect(self): diff --git a/postgres/tests/test_connections.py b/postgres/tests/test_connections.py index a62d861d2eb79..a24ebb9869819 100644 --- a/postgres/tests/test_connections.py +++ b/postgres/tests/test_connections.py @@ -318,7 +318,14 @@ def local_pool(dbname, min_pool_size, max_pool_size): 'password': PASSWORD_ADMIN, 'dbname': dbname, } - return ConnectionPool(min_size=min_pool_size, max_size=max_pool_size, kwargs=args, open=True, name=dbname) + return ConnectionPool( + min_size=min_pool_size, + max_size=max_pool_size, + kwargs=args, + open=True, + name=dbname, + timeout=2, + ) def get_activity(db_pool, unique_id):