diff --git a/microsoft/testsuites/dpdk/dpdksuite.py b/microsoft/testsuites/dpdk/dpdksuite.py index 4a00a8cb2a..6fc2a6932f 100644 --- a/microsoft/testsuites/dpdk/dpdksuite.py +++ b/microsoft/testsuites/dpdk/dpdksuite.py @@ -24,19 +24,7 @@ from lisa.features import Gpu, Infiniband, IsolatedResource, Sriov from lisa.operating_system import BSD, CBLMariner, Ubuntu, Windows from lisa.testsuite import TestResult, simple_requirement -from lisa.tools import ( - Echo, - Git, - Hugepages, - Ip, - Kill, - Lscpu, - Lsmod, - Make, - Modprobe, - Ping, - Timeout, -) +from lisa.tools import Echo, Git, Hugepages, Ip, Kill, Lscpu, Lsmod, Make, Modprobe from lisa.tools.hugepages import HugePageSize from lisa.tools.lscpu import CpuArchitecture from lisa.util.constants import SIGINT @@ -56,6 +44,7 @@ generate_send_receive_run_info, init_nodes_concurrent, initialize_node_resources, + run_dpdk_symmetric_mp, run_testpmd_concurrent, verify_dpdk_build, verify_dpdk_l3fwd_ntttcp_tcp, @@ -139,98 +128,7 @@ def verify_dpdk_symmetric_mp( variables: Dict[str, Any], result: TestResult, ) -> None: - # setup and unwrap the resources for this test - nics = [node.nics.get_nic("eth1"), node.nics.get_nic("eth2")] - test_nic = node.nics.get_secondary_nic() - extra_nics = [nic for nic in nics if nic.name != test_nic.name] - ping = node.tools[Ping] - ping.install() - try: - test_kit = initialize_node_resources( - node, - log, - variables, - "netvsc", - HugePageSize.HUGE_2MB, - extra_nics=extra_nics, - ) - except (NotEnoughMemoryException, UnsupportedOperationException) as err: - raise SkippedException(err) - testpmd = test_kit.testpmd - if isinstance(testpmd.installer, PackageManagerInstall): - # The Testpmd tool doesn't get re-initialized - # even if you invoke it with new arguments. - raise SkippedException( - "DPDK symmetric_mp test is not implemented for " - " package manager installation." - ) - symmetric_mp_path = testpmd.get_example_app_path("dpdk-symmetric_mp") - node.log.debug("\n".join([str(nic) for nic in nics])) - if node.nics.is_mana_device_present(): - nic_args = ( - f'--vdev="{test_nic.pci_slot},' - f"mac={test_nic.mac_addr}," - f'mac={extra_nics[0].mac_addr}"' - ) - else: - nic_args = f"-b {node.nics.get_primary_nic().pci_slot}" - - output = node.execute( - f"{str(testpmd.get_example_app_path('dpdk-devname'))} {nic_args}", - sudo=True, - shell=True, - ).stdout - port_mask = 0x0 - port_regex = re.compile( - r"dpdk-devname found port=(?P[0-9]+) driver=net_netvsc .*\n" - ) - matches = port_regex.findall(output) - if not matches: - fail("could not find port ids") - for match in matches: - port_mask ^= 1 << (int(match)) - - node.log.debug(f"Port mask: {hex(port_mask)}") - # primary_nic = node.nics.get_primary_nic().pci_slot - symmetric_mp_args = ( - f"{nic_args} --proc-type auto " - "--log-level netvsc,debug --log-level mana,debug --log-level eal,debug " - f"-- -p {hex(port_mask)[2:]} --num-procs 2" - ) - primary = node.tools[Timeout].start_with_timeout( - command=f"{str(symmetric_mp_path)} -l 1 {symmetric_mp_args} --proc-id 0", - timeout=150, - signal=SIGINT, - kill_timeout=30, - ) - primary.wait_output("APP: Finished Process Init") - secondary = node.tools[Timeout].start_with_timeout( - command=f"{str(symmetric_mp_path)} -l 2 {symmetric_mp_args} --proc-id 1", - timeout=120, - signal=SIGINT, - kill_timeout=35, - ) - _ = ping.ping_async( - target=test_nic.ip_addr, - nic_name=node.nics.get_primary_nic().name, - count=100, - ) - secondary_exit = secondary.wait_result() - assert_that(secondary_exit.exit_code).described_as( - f"Secondary process failure: {secondary_exit.stdout}" - ).is_zero() - process_result = primary.wait_result( - expected_exit_code=0, - expected_exit_code_failure_message="Primary process failed to exit with 0", - ) - node.log.debug( - f"Primary process:\n\n{process_result.stdout}\n" - f"\nprimary stderr:\n{process_result.stderr}" - ) - node.log.debug( - f"Secondary system:\n\n{secondary_exit.stdout}\n" - f"secondary stderr:\n{secondary_exit.stderr}" - ) + run_dpdk_symmetric_mp(node, log, variables) @TestCaseMetadata( description=""" diff --git a/microsoft/testsuites/dpdk/dpdkutil.py b/microsoft/testsuites/dpdk/dpdkutil.py index 3345f7d237..81fbb0e7f6 100644 --- a/microsoft/testsuites/dpdk/dpdkutil.py +++ b/microsoft/testsuites/dpdk/dpdkutil.py @@ -1,4 +1,5 @@ import itertools +import re import time from collections import deque from decimal import Decimal @@ -6,7 +7,7 @@ from functools import partial from typing import Any, Dict, List, Optional, Tuple, Union -from assertpy import assert_that +from assertpy import assert_that, fail from semver import VersionInfo from lisa import ( @@ -54,6 +55,7 @@ Downloader, GitDownloader, Installer, + PackageManagerInstall, TarDownloader, check_dpdk_support, is_url_for_git_repo, @@ -294,7 +296,7 @@ def initialize_node_resources( pmd: str, hugepage_size: HugePageSize, sample_apps: Union[List[str], None] = None, - extra_nics: Union[List[NicInfo], None] = None, + test_nics: Union[List[NicInfo], None] = None, ) -> DpdkTestResources: _set_forced_source_by_distro(node, variables) check_pmd_support(node, pmd) @@ -354,8 +356,9 @@ def initialize_node_resources( # init and enable hugepages (required by dpdk) hugepages = node.tools[Hugepages] + numa_nodes = node.tools[Lscpu].get_numa_node_count() try: - hugepages.init_hugepages(hugepage_size, minimum_gb=4) + hugepages.init_hugepages(hugepage_size, minimum_gb=8 * numa_nodes) except NotEnoughMemoryException as err: raise SkippedException(err) @@ -374,9 +377,10 @@ def initialize_node_resources( ).is_equal_to("hv_netvsc") # netvsc pmd requires uio_hv_generic to be loaded before use - test_nics = [test_nic] - if extra_nics is not None: - test_nics += extra_nics + + # Allow user to pass in an explicit list of nics to use for the test. + if test_nics is None: + test_nics = [node.nics.get_secondary_nic()] do_pmd_driver_setup(node=node, test_nics=test_nics, testpmd=testpmd, pmd=pmd) @@ -480,7 +484,9 @@ def init_nodes_concurrent( pmd: str, hugepage_size: HugePageSize, sample_apps: Union[List[str], None] = None, + test_nic_count: int = 1, ) -> List[DpdkTestResources]: + assert test_nic_count > 0, "Test Bug: test_nic_count must be > 0" # quick check when initializing, have each node ping the other nodes. # When binding DPDK directly to the VF this helps ensure l2/l3 routes # are established before handing all control over to testpmd. @@ -498,6 +504,11 @@ def init_nodes_concurrent( pmd, hugepage_size=hugepage_size, sample_apps=sample_apps, + test_nics=[ + nic + for nic in node.nics.nics.values() + if nic is not node.nics.get_primary_nic() + ][:test_nic_count], ) for node in environment.nodes.list() ], @@ -907,7 +918,7 @@ def verify_dpdk_l3fwd_ntttcp_tcp( pmd, hugepage_size, sample_apps=["l3fwd"], - extra_nics=[subnet_b_nics[forwarder]], + test_nics=[subnet_b_nics[forwarder]], ) except (NotEnoughMemoryException, UnsupportedOperationException) as err: raise SkippedException(err) @@ -1199,3 +1210,227 @@ def annotate_dpdk_test_result( log.debug(f"Adding nic version: {nic_hw}") except AssertionError as err: test_kit.node.log.debug(f"Could not fetch NIC short name: {str(err)}") + + +devname_port_regex = re.compile( + r"dpdk-devname found port=(?P[0-9]+) driver=net_netvsc .*\n" +) + + +class DpdkDevnameInfo: + def __init__(self, testpmd: DpdkTestpmd) -> None: + self._node = testpmd.node + self._testpmd = testpmd + + def get_port_info(self, nics: List[NicInfo]) -> str: + if self._node.nics.is_mana_device_present(): + nic_args = ( + ",".join( + [f'--vdev="{nics[0].pci_slot}'] + + [f"mac={nic.mac_addr}" for nic in nics], + ) + + '"' + ) + else: + nic_args = f"-b {self._node.nics.get_primary_nic().pci_slot}" + self.nic_args = nic_args + output = self._node.execute( + f"{str(self._testpmd.get_example_app_path('dpdk-devname'))} {nic_args}", + sudo=True, + shell=True, + ).stdout + + matches = devname_port_regex.findall(output) + if not matches: + fail(f"dpdk-devname could not find port ids in: {output}") + + # and create the port mask in hex from the port ids + port_mask = 0 + self.port_ids = [] + for match in matches: + port_id = int(match) + self.port_ids += [port_id] + port_mask ^= 1 << (port_id) + self.port_mask = hex(port_mask)[2:] + return self.port_mask + + +def run_dpdk_symmetric_mp( + node: Node, log: Logger, variables: Dict[str, Any], trigger_rescind: bool = False +) -> None: + # setup and unwrap the resources for this test + # get a list of the upper non-primary nics and select two of them + test_nics = [ + nic + for nic in node.nics.nics.values() + if nic != node.nics.get_primary_nic() and nic.lower + ][:2] + + ping = node.tools[Ping] + ping.install() + # initialize for netvsc, we rely on the debug messages in this test + # to identify the hotplug events. + try: + test_kit = initialize_node_resources( + node, + log, + variables, + "netvsc", + HugePageSize.HUGE_2MB, + test_nics=test_nics, + ) + except (NotEnoughMemoryException, UnsupportedOperationException) as err: + raise SkippedException(err) + testpmd = test_kit.testpmd + if isinstance(testpmd.installer, PackageManagerInstall): + # The Testpmd tool doesn't get re-initialized + # even if you invoke it with new arguments. + raise SkippedException( + "DPDK symmetric_mp test is not implemented for " + " package manager installation." + ) + symmetric_mp_path = testpmd.get_example_app_path("dpdk-symmetric_mp") + + # setup the DPDK EAL arguments for netvsc + devname_info = DpdkDevnameInfo(testpmd=testpmd) + port_mask = devname_info.get_port_info(test_nics) + nic_args = devname_info.nic_args + + # get the DPDK port IDs for the netvsc devices. + # Don't assume which port is which since there are more than one. + + node.log.debug(f"Port mask: {port_mask}") + + # Port 2: RX - 0, TX - 100, Drop - 0 + # wait for the process to exit + result_regex = re.compile( + r"Port (?P[0-9]+): " + r"RX - (?P[0-9]+), " + r"TX - (?P[0-9]+), " + r"Drop - (?P[0-9]+).*" + ) + + # create the shared section of the symmetric_mp commandline invocation + symmetric_mp_args = ( + f"{nic_args} -n 2 " + "--log-level netvsc,debug --log-level mana,debug " + "--log-level eal,debug " + "--log-level vmbus,debug " + f"-- -p {port_mask} --num-procs 2" + ) + # start the first process (id 0) on core 1 + primary = node.tools[Timeout].start_with_timeout( + command=( + f"{str(symmetric_mp_path)} -l 1 --proc-type auto " + f"{symmetric_mp_args} --proc-id 0" + ), + timeout=630, + signal=SIGINT, + kill_timeout=30, + ) + + # wait for it to start + primary.wait_output("APP: Finished Process Init", timeout=20) + + # create the second process (id 1) on core 2 + secondary = node.tools[Timeout].start_with_timeout( + command=( + f"{str(symmetric_mp_path)} -l 2 --proc-type secondary " + f"{symmetric_mp_args} --proc-id 1" + ), + timeout=600, + signal=SIGINT, + kill_timeout=35, + ) + secondary.wait_output("APP: Finished Process Init", timeout=20) + + if trigger_rescind: + # turn SRIOV off + node.features[NetworkInterface].switch_sriov( + enable=False, wait=False, reset_connections=False + ) + + # wait for the RTE_DEV_EVENT_REMOVE message + primary.wait_output( + "HN_DRIVER: netvsc_hotadd_callback(): " + "Device notification type=1" # RTE_DEV_EVENT_REMOVE + ) # relying on compiler defaults here, not great. + + if trigger_rescind: + # turn SRIOV on + node.features[NetworkInterface].switch_sriov( + enable=True, wait=False, reset_connections=False + ) + + # wait for the RTE_DEV_EVENT_ADD message + primary.wait_output( + ( + "HN_DRIVER: netvsc_hotadd_callback(): " + "Device notification type=0" # RTE_DEV_EVENT_ADD + ), + delta_only=True, + ) # relying on compiler defaults here, not great. + primary.wait_output( + ( + "HN_DRIVER: netvsc_hotplug_retry(): Found matching MAC address, " + f"adding device {test_nics[0].pci_device_name} " + f"network name {test_nics[0].lower} " + f"args mac={test_nics[0].mac_addr},mac={test_nics[1].mac_addr}" + ), + delta_only=True, + ) # relying on compiler defaults here, not great. + + ping.ping_async( + target=test_nics[0].ip_addr, + nic_name=node.nics.get_primary_nic().name, + count=100, + ) + ping.ping( + target=test_nics[1].ip_addr, + nic_name=node.nics.get_primary_nic().name, + count=100, + ignore_error=True, + ) + + # # check the exit codes + secondary_result = secondary.wait_result( + expected_exit_code=0, + expected_exit_code_failure_message=( + "Secondary symmetric_mp process returned error code." + ), + ) + assert_that(secondary_result.exit_code).described_as( + f"Secondary process failure: {secondary_result.stdout}" + ).is_zero() + primary_result = primary.wait_result( + expected_exit_code=0, + expected_exit_code_failure_message="Primary process failed to exit with 0", + ) + node.log.debug( + f"Primary process:\n\n{primary_result.stdout}\n" + f"\nprimary stderr:\n{primary_result.stderr}" + ) + node.log.debug( + f"Secondary system:\n\n{secondary_result.stdout}\n" + f"secondary stderr:\n{secondary_result.stderr}" + ) + all_matches = [ + result_regex.finditer(secondary_result.stdout), + result_regex.finditer(primary_result.stdout), + # result_regex.search(secondary_result.stdout), + # result_regex.search(primary_result.stdout), + ] + match_count = 0 + for matches in all_matches: + match_count += 1 + if matches: + node.log.debug(str(matches)) + for match in matches: + port = match.group("port_id") + rx_count = match.group("rx_count") + tx_count = match.group("tx_count") + drop_count = match.group("drop_count") + node.log.debug( + f"(match {match_count}) {port} " + f"r:{rx_count} t:{tx_count} d:{drop_count}" + )