Skip to content

Commit

Permalink
Set lower connection timeout on connection pool, to avoid long runnin…
Browse files Browse the repository at this point in the history
…g checks (#15768)

* Set lower connection timeout on connection pool, to avoid long running checks

* except PoolTimeout on autodiscovery & emit warning

* changelog entry
  • Loading branch information
jmeunier28 authored Sep 6, 2023
1 parent 8ca9fa2 commit f86f589
Show file tree
Hide file tree
Showing 8 changed files with 52 additions and 8 deletions.
4 changes: 4 additions & 0 deletions postgres/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

## Unreleased

***Fixed***:

* Set lower connection timeout on connection pool, to avoid long running checks ([#15768](https://github.com/DataDog/integrations-core/pull/15768))

## 14.2.2 / 2023-09-05

***Fixed***:
Expand Down
7 changes: 7 additions & 0 deletions postgres/assets/configuration/spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,13 @@ files:
value:
type: integer
example: 5000
- name: connection_timeout
description: |
Sets the timeout (in ms) that the agent will wait to receive a connection from the database.
hidden: true
value:
type: integer
example: 5000
- name: idle_connection_timeout
description: |
Sets the timeout (in ms) a connection used in the connection pool will be idle until it is closed by the pooler.
Expand Down
3 changes: 3 additions & 0 deletions postgres/datadog_checks/postgres/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ def __init__(self, instance):
'"dbname" parameter must be set OR autodiscovery must be enabled when using the "relations" parameter.'
)
self.max_connections = instance.get('max_connections', 30)
connection_timeout_ms = instance.get('connection_timeout', 5000)
# Convert milliseconds to seconds and ensure a minimum of 2 seconds, which is enforced by psycopg
self.connection_timeout = max(2, connection_timeout_ms / 1000)
self.tags = self._build_tags(instance.get('tags', []))

ssl = instance.get('ssl', "disable")
Expand Down
4 changes: 4 additions & 0 deletions postgres/datadog_checks/postgres/config_models/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ def instance_collect_wal_metrics():
return False


def instance_connection_timeout():
return 5000


def instance_data_directory():
return '/usr/local/pgsql/data'

Expand Down
1 change: 1 addition & 0 deletions postgres/datadog_checks/postgres/config_models/instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ class InstanceConfig(BaseModel):
collect_schemas: Optional[CollectSchemas] = None
collect_settings: Optional[CollectSettings] = None
collect_wal_metrics: Optional[bool] = None
connection_timeout: Optional[int] = None
custom_queries: Optional[tuple[MappingProxyType[str, Any], ...]] = None
data_directory: Optional[str] = None
database_autodiscovery: Optional[DatabaseAutodiscovery] = None
Expand Down
2 changes: 1 addition & 1 deletion postgres/datadog_checks/postgres/connections.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def get_connection(self, dbname: str, ttl_ms: int, timeout: int = None, persiste
"""
with self._mu:
pool = self._get_connection_pool(dbname=dbname, ttl_ms=ttl_ms, timeout=timeout, persistent=persistent)
db = pool.getconn(timeout=timeout)
db = pool.getconn()
try:
yield db
finally:
Expand Down
30 changes: 24 additions & 6 deletions postgres/datadog_checks/postgres/postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import psycopg
from cachetools import TTLCache
from psycopg.rows import dict_row
from psycopg_pool import ConnectionPool
from psycopg_pool import ConnectionPool, PoolTimeout
from six import iteritems

from datadog_checks.base import AgentCheck
Expand Down Expand Up @@ -542,10 +542,20 @@ def _collect_relations_autodiscovery(self, instance_tags, relations_scopes):
start_time = time()
databases = self.autodiscovery.get_items()
for db in databases:
with self.db_pool.get_connection(db, self._config.idle_connection_timeout) as conn:
with conn.cursor() as cursor:
for scope in relations_scopes:
self._query_scope(cursor, scope, instance_tags, False, db)
try:
with self.db_pool.get_connection(db, self._config.idle_connection_timeout) as conn:
with conn.cursor() as cursor:
for scope in relations_scopes:
self._query_scope(cursor, scope, instance_tags, False, db)
except PoolTimeout:
self.log.warning(
"Unable to establish connection to %s in %d. "
"If you wish to exclude this database from autodiscovery, "
"add it to the `exclude` list",
db,
self._config.connection_timeout,
)

elapsed_ms = (time() - start_time) * 1000
self.histogram(
"dd.postgres._collect_relations_autodiscovery.time",
Expand Down Expand Up @@ -651,6 +661,7 @@ def _new_connection(self, dbname: str, min_pool_size: int = 1, max_pool_size: in
kwargs=args,
open=True,
name=dbname,
timeout=self._config.connection_timeout,
)
else:
password = self._config.password
Expand Down Expand Up @@ -688,7 +699,14 @@ def _new_connection(self, dbname: str, min_pool_size: int = 1, max_pool_size: in
if self._config.ssl_password:
conn_args['sslpassword'] = self._config.ssl_password
args.update(conn_args)
pool = ConnectionPool(min_size=min_pool_size, max_size=max_pool_size, kwargs=args, open=True, name=dbname)
pool = ConnectionPool(
min_size=min_pool_size,
max_size=max_pool_size,
kwargs=args,
open=True,
name=dbname,
timeout=self._config.connection_timeout,
)
return pool

def _connect(self):
Expand Down
9 changes: 8 additions & 1 deletion postgres/tests/test_connections.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,14 @@ def local_pool(dbname, min_pool_size, max_pool_size):
'password': PASSWORD_ADMIN,
'dbname': dbname,
}
return ConnectionPool(min_size=min_pool_size, max_size=max_pool_size, kwargs=args, open=True, name=dbname)
return ConnectionPool(
min_size=min_pool_size,
max_size=max_pool_size,
kwargs=args,
open=True,
name=dbname,
timeout=2,
)


def get_activity(db_pool, unique_id):
Expand Down

0 comments on commit f86f589

Please sign in to comment.