Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 5954 - Disable Transparent Huge Pages #5955

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
150 changes: 150 additions & 0 deletions dirsrvtests/tests/suites/healthcheck/health_tunables_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
# --- BEGIN COPYRIGHT BLOCK ---
# Copyright (C) 2023 Red Hat, Inc.
# All rights reserved.
#
# License: GPL (version 3 or any later version).
# See LICENSE for details.
# --- END COPYRIGHT BLOCK ---
#

import subprocess
import pytest
import re
import os
from lib389.utils import *
from lib389.cli_base import FakeArgs
from lib389.topologies import topology_st
from lib389.cli_ctl.health import health_check_run
from lib389.paths import Paths

CMD_OUTPUT = 'No issues found.'
JSON_OUTPUT = '[]'
RET_CODE = 'DSTHPLE0001'

log = logging.getLogger(__name__)
p = Paths()


def run_healthcheck_and_flush_log(topology, instance, searched_code=None, json=False, searched_code2=None,
list_checks=False, list_errors=False, check=None, searched_list=None):
args = FakeArgs()
args.instance = instance.serverid
args.verbose = instance.verbose
args.list_errors = list_errors
args.list_checks = list_checks
args.check = check
args.dry_run = False
args.json = json

log.info('Use healthcheck with --json == {} option'.format(json))
health_check_run(instance, topology.logcap.log, args)

if searched_list is not None:
for item in searched_list:
assert topology.logcap.contains(item)
log.info('Healthcheck returned searched item: %s' % item)
else:
assert topology.logcap.contains(searched_code)
log.info('Healthcheck returned searched code: %s' % searched_code)

if searched_code2 is not None:
assert topology.logcap.contains(searched_code2)
log.info('Healthcheck returned searched code: %s' % searched_code2)

log.info('Clear the log')
topology.logcap.flush()


def _set_thp_system_mode(mode):
thp_path = '/sys/kernel/mm/transparent_hugepage/enabled'
with open(thp_path, 'w') as f:
log.info(f"Setting THP mode to {mode}")
f.write(mode)


def _set_thp_instance_mode(inst, disable: bool):
systemctl_env = {'SYSTEMD_EDITOR': 'tee'}
service_config = f"[Service]\nEnvironment=THP_DISABLE={int(disable)}"
subprocess.run(['systemctl', 'edit', '--drop-in=thp.conf', f'dirsrv@{inst.serverid}'],
input=service_config.encode(), check=True, env=systemctl_env)
# `systemctl` tries to be helpful and passes +4 as a parameter for the EDITOR.
# But when `tee` is used as an EDITOR, it creates another file with our overrides contents.
# So let's remove it
if os.path.exists("+4"):
os.remove("+4")

inst.restart()


def _get_thp_system_mode():
thp_path = '/sys/kernel/mm/transparent_hugepage/enabled'
enabled_value_pattern = r'\[([^\]]+)\]'
with open(thp_path, 'r') as f:
text = f.read().strip()
mode = re.search(enabled_value_pattern, text)[1]
log.info(f"Current THP mode is {mode}")
return mode


@pytest.fixture(scope="function")
def thp_reset(request):
mode = _get_thp_system_mode()

def fin():
_set_thp_system_mode(mode)

request.addfinalizer(fin)


@pytest.mark.skipif(get_user_is_root() is False,
reason="This test requires root permissions to change kernel tunables.")
@pytest.mark.skipif(p.with_systemd is False, reason='Needs systemd to run')
@pytest.mark.skipif(ds_is_older("2.5.0"), reason="Not implemented")
@pytest.mark.parametrize("system_thp_mode,instance_thp_mode,expected_output",
[("always", False, (RET_CODE, RET_CODE)),
("always", True, (CMD_OUTPUT, JSON_OUTPUT)),
("never", False, (CMD_OUTPUT, JSON_OUTPUT)),
("never", True, (CMD_OUTPUT, JSON_OUTPUT))],
ids=["System and Instance THP ON",
"System THP ON, Instance THP OFF",
"System THP OFF, Instance THP ON",
"System THP OFF, Instance THP OFF"])
def test_healthcheck_transparent_huge_pages(topology_st, system_thp_mode, instance_thp_mode, expected_output, thp_reset):
"""Check if HealthCheck returns DSTHPLE0001 code
:id: 1f195e10-6403-4c92-8ac9-724b669e8cf2
:setup: Standalone instance
:parametrized: yes
:steps:
1. Enable THP system wide and for the instance
2. Use HealthCheck without --json option
3. Use HealthCheck with --json option
4. Enable THP system wide, disable THP for the instance
5. Use HealthCheck without --json option
6. Use HealthCheck with --json option
7. Disable THP system wide, enable THP for the instance
8. Use HealthCheck without --json option
9. Use HealthCheck with --json option
10. Disable THP system wide, disable THP for the instance
11. Use HealthCheck without --json option
12. Use HealthCheck with --json option
:expectedresults:
1. Success
2. HealthCheck should return code DSHTPLE0001
3. HealthCheck should return code DSTHPLE0001
4. Success
5. HealthCheck reports no issue found
6. HealthCheck reports no issue found
7. Success
8. HealthCheck reports no issue found
9. HealthCheck reports no issue found
10. Success
11. HealthCheck reports no issue found
12. HealthCheck reports no issue found
"""
standalone = topology_st.standalone
standalone.config.set("nsslapd-accesslog-logbuffering", "on")

_set_thp_system_mode(system_thp_mode)
_set_thp_instance_mode(standalone, instance_thp_mode)
run_healthcheck_and_flush_log(topology_st, standalone, expected_output[0], json=False)
run_healthcheck_and_flush_log(topology_st, standalone, expected_output[1], json=True)
5 changes: 4 additions & 1 deletion dirsrvtests/tests/suites/healthcheck/healthcheck_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,9 @@ def test_healthcheck_list_checks(topology_st):
'replication:conflicts',
'dseldif:nsstate',
'tls:certificate_expiration',
'logs:notes']
'logs:notes',
'tunables:thp',
]

standalone = topology_st.standalone

Expand Down Expand Up @@ -205,6 +207,7 @@ def test_healthcheck_list_errors(topology_st):
'DSSKEWLE0001 :: Medium time skew',
'DSSKEWLE0002 :: Major time skew',
'DSSKEWLE0003 :: Extensive time skew',
'DSTHPLE0001 :: Transparent Huge Pages',
'DSVIRTLE0001 :: Virtual attribute indexed']

standalone = topology_st.standalone
Expand Down
12 changes: 12 additions & 0 deletions ldap/servers/slapd/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,10 @@ union semun
#include <malloc.h>
#endif

#ifdef LINUX
#include <sys/prctl.h>
#endif

/* Forward Declarations */

struct main_config
Expand Down Expand Up @@ -523,6 +527,14 @@ main(int argc, char **argv)
{
int return_value = 0;
struct main_config mcfg = {0};
#ifdef LINUX
#if defined(PR_SET_THP_DISABLE)
char *thp_disable = getenv("THP_DISABLE");
if (thp_disable != NULL && strcmp(thp_disable, "1") == 0) {
prctl(PR_SET_THP_DISABLE, 1, 0, 0, 0);
}
#endif
#endif

/* Set a number of defaults */
mcfg.slapd_exemode = SLAPD_EXEMODE_UNKNOWN;
Expand Down
2 changes: 2 additions & 0 deletions src/lib389/lib389/cli_ctl/health.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from lib389.nss_ssl import NssSsl
from lib389.dseldif import FSChecks, DSEldif
from lib389.dirsrv_log import DirsrvAccessLog
from lib389.tunables import Tunables
from lib389 import lint
from lib389 import plugins
from lib389._constants import DSRC_HOME
Expand All @@ -39,6 +40,7 @@
DSEldif,
NssSsl,
DirsrvAccessLog,
Tunables,
]


Expand Down
26 changes: 26 additions & 0 deletions src/lib389/lib389/lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -511,3 +511,29 @@
'fix': """Stop using this these unknown attributes in the filter, or add the schema
to the server and make sure it's properly indexed."""
}

# Transparent Huge Pages
DSTHPLE0001 = {
'dsle': 'DSTHPLE0001',
'severity': 'Medium',
'description': 'Transparent Huge Pages',
'items': ['Possible Performance Impact'],
'detail': """Transparent Huge Pages are enabled. This can lead to an unexpected memory
consumption, especially when using large caches.\n""",
'fix': """Disable Transparent Huge Pages.
System-wide at boot:
Add "transparent_hugepage=never" to the list of kernel boot parameters.

System-wide at runtime:
# echo "never" > /sys/kernel/mm/transparent_hugepage/enabled
# echo "never" > /sys/kernel/mm/transparent_hugepage/defrag

Per instance (for the versions of 389 Directory Server that support it):
Edit dirsrv unit file:
# systemctl edit dirsrv@instance_name

And uncomment the following lines:
[Service]
Environment=THP_DISABLE=1
"""
}
65 changes: 65 additions & 0 deletions src/lib389/lib389/tunables.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# --- BEGIN COPYRIGHT BLOCK ---
# Copyright (C) 2023 Red Hat, Inc.
# All rights reserved.
#
# License: GPL (version 3 or any later version).
# See LICENSE for details.
# --- END COPYRIGHT BLOCK ---
#

import os
import re
import copy
from lib389._mapped_object_lint import DSLint
from lib389 import pid_from_file
from lib389.lint import DSTHPLE0001

class Tunables(DSLint):
"""A class for working with system tunables
:param instance: An instance
:type instance: lib389.DirSrv
"""

def __init__(self, instance):
self._instance = instance
self.pid = str(pid_from_file(instance.ds_paths.pid_file))


@classmethod
def lint_uid(cls):
return 'tunables'


def _lint_thp(self):
"""Check if THP is enabled"""
def systemwide_thp_enabled() -> bool:
thp_path = '/sys/kernel/mm/transparent_hugepage'
thp_enabled_path = os.path.join(thp_path, "enabled")
thp_status_pattern = r"(.*\[always\].*)|(.*\[madvise\].*)"
if os.path.exists(thp_enabled_path):
with open(thp_enabled_path, 'r') as f:
thp_status = f.read().strip()
match = re.match(thp_status_pattern, thp_status)
return match is not None


def instance_thp_enabled() -> bool:
pid_status_path = f"/proc/{self.pid}/status"

with open(pid_status_path, 'r') as pid_status:
pid_status_content = pid_status.read()
thp_line = None
for line in pid_status_content.split('\n'):
if 'THP_enabled' in line:
thp_line = line
break
if thp_line is not None:
thp_value = int(thp_line.split()[1])
return bool(thp_value)


if instance_thp_enabled() and systemwide_thp_enabled():
report = copy.deepcopy(DSTHPLE0001)
report['check'] = 'tunables:transparent_huge_pages'
yield report

3 changes: 3 additions & 0 deletions wrappers/systemd.template.service.custom.conf.in
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ TimeoutStopSec=600
# Preload jemalloc
Environment=LD_PRELOAD=@libdir@/@package_name@/lib/libjemalloc.so.2

# Disable Transparent Huge Pages
Environment=THP_DISABLE=1

##################################################
# Heap profiling with jemalloc #
##################################################
Expand Down