Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support placement type for disks #3555

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions lisa/sut_orchestrator/azure/arm_template.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ func getEphemeralOSImage(node object) object => {
name: '${node.name}-osDisk'
diffDiskSettings: {
option: 'local'
placement: 'CacheDisk'
placement: node.ephemeral_disk_placement_type
squirrelsc marked this conversation as resolved.
Show resolved Hide resolved
}
caching: 'ReadOnly'
createOption: 'FromImage'
Expand Down Expand Up @@ -333,7 +333,7 @@ resource nodes_data_disks 'Microsoft.Compute/disks@2022-03-02' = [
}
]

resource nodes_vms 'Microsoft.Compute/virtualMachines@2022-08-01' = [for i in range(0, node_count): {
resource nodes_vms 'Microsoft.Compute/virtualMachines@2024-03-01' = [for i in range(0, node_count): {
name: nodes[i].name
location: nodes[i].location
tags: combined_vm_tags
Expand Down
12 changes: 6 additions & 6 deletions lisa/sut_orchestrator/azure/autogen_arm_template.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
"metadata": {
"_generator": {
"name": "bicep",
"version": "0.30.23.60470",
"templateHash": "17909783643222378721"
"version": "0.32.4.45862",
"templateHash": "16398577375970436728"
}
},
"functions": [
Expand Down Expand Up @@ -113,7 +113,7 @@
"name": "[format('{0}-osDisk', parameters('node').name)]",
"diffDiskSettings": {
"option": "local",
"placement": "CacheDisk"
"placement": "[parameters('node').ephemeral_disk_placement_type]"
},
"caching": "ReadOnly",
"createOption": "FromImage",
Expand Down Expand Up @@ -685,7 +685,7 @@
"count": "[length(range(0, variables('node_count')))]"
},
"type": "Microsoft.Compute/virtualMachines",
"apiVersion": "2022-08-01",
"apiVersion": "2024-03-01",
"name": "[parameters('nodes')[range(0, variables('node_count'))[copyIndex()]].name]",
"location": "[parameters('nodes')[range(0, variables('node_count'))[copyIndex()]].location]",
"tags": "[variables('combined_vm_tags')]",
Expand Down Expand Up @@ -787,8 +787,8 @@
"metadata": {
"_generator": {
"name": "bicep",
"version": "0.30.23.60470",
"templateHash": "12249187708601787514"
"version": "0.32.4.45862",
"templateHash": "7856159159103188049"
}
},
"functions": [
Expand Down
21 changes: 21 additions & 0 deletions lisa/sut_orchestrator/azure/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import sys
from dataclasses import InitVar, dataclass, field
from datetime import datetime, timedelta, timezone
from enum import Enum
from functools import lru_cache, partial
from pathlib import Path, PurePath
from threading import Lock
Expand Down Expand Up @@ -1070,6 +1071,7 @@ class AzureNodeArmParameter(AzureNodeSchema):
os_disk_type: str = ""
data_disk_type: str = ""
disk_controller_type: str = ""
ephemeral_disk_placement_type: str = ""
security_profile: Dict[str, Any] = field(default_factory=dict)

@classmethod
Expand Down Expand Up @@ -1106,6 +1108,25 @@ def get_create_option() -> List[str]:
]


# EphemeralOSDiskPlacements
# refer
# https://learn.microsoft.com/en-us/azure/virtual-machines/ephemeral-os-disks-faq
class DiskPlacementType(str, Enum):
NONE = ""
RESOURCE = "ResourceDisk"
CACHE = "CacheDisk"
NVME = "NvmeDisk"


def get_disk_placement_priority() -> List[DiskPlacementType]:
return [
DiskPlacementType.NVME,
DiskPlacementType.CACHE,
DiskPlacementType.RESOURCE,
DiskPlacementType.NONE,
]


@dataclass_json()
@dataclass
class DataDiskSchema:
Expand Down
79 changes: 77 additions & 2 deletions lisa/sut_orchestrator/azure/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@
AzureCapability,
AzureImageSchema,
AzureNodeSchema,
DiskPlacementType,
check_or_create_storage_account,
create_update_private_dns_zone_groups,
create_update_private_endpoints,
Expand All @@ -113,6 +114,7 @@
delete_virtual_network_links,
find_by_name,
get_compute_client,
get_disk_placement_priority,
get_network_client,
get_node_context,
get_or_create_file_share,
Expand Down Expand Up @@ -1308,6 +1310,31 @@ def get_all_nics_ip_info(self) -> List[IpInfo]:
@dataclass()
class AzureDiskOptionSettings(schema.DiskOptionSettings):
has_resource_disk: Optional[bool] = None
ephemeral_disk_placement_type: Optional[
Union[search_space.SetSpace[DiskPlacementType], DiskPlacementType]
] = field( # type:ignore
default_factory=partial(
search_space.SetSpace,
items=[
DiskPlacementType.NONE,
DiskPlacementType.RESOURCE,
DiskPlacementType.CACHE,
DiskPlacementType.NVME,
],
),
metadata=field_metadata(
decoder=partial(
search_space.decode_nullable_set_space,
base_type=DiskPlacementType,
default_values=[
DiskPlacementType.NONE,
DiskPlacementType.RESOURCE,
DiskPlacementType.CACHE,
DiskPlacementType.NVME,
],
)
),
)

def __hash__(self) -> int:
return super().__hash__()
Expand All @@ -1316,7 +1343,11 @@ def __str__(self) -> str:
return self.__repr__()

def __repr__(self) -> str:
return f"has_resource_disk: {self.has_resource_disk},{super().__repr__()}"
return (
f"has_resource_disk: {self.has_resource_disk},"
f"ephemeral_disk_placement_type: {self.ephemeral_disk_placement_type},"
f"{super().__repr__()}"
)

def __eq__(self, o: object) -> bool:
if not super().__eq__(o):
Expand Down Expand Up @@ -1374,6 +1405,13 @@ def check(self, capability: Any) -> search_space.ResultReason:
),
"has_resource_disk",
)
result.merge(
search_space.check_setspace(
self.ephemeral_disk_placement_type,
capability.ephemeral_disk_placement_type,
),
"ephemeral_disk_placement_type",
)
result.merge(
search_space.check_countspace(
self.max_data_disk_count, capability.max_data_disk_count
Expand All @@ -1389,7 +1427,7 @@ def check(self, capability: Any) -> search_space.ResultReason:

return result

def _call_requirement_method(
def _call_requirement_method( # noqa: C901
self, method: RequirementMethod, capability: Any
) -> Any:
assert isinstance(
Expand Down Expand Up @@ -1476,6 +1514,38 @@ def _call_requirement_method(
schema.disk_controller_type_priority,
)

# refer
# https://learn.microsoft.com/en-us/powershell/module/az.compute/set-azvmssstorageprofile?view=azps-13.0.0 # noqa: E501
# https://github.com/MicrosoftDocs/azure-compute-docs/blob/main/articles/virtual-machines/ephemeral-os-disks-faq.md # noqa: E501
# Currently, the supported ephemeral disk placement types are - Resource(or Temp), Cache and NVMe. # noqa: E501
# The Ephemeral Disk Placement type is set in the "DiffDiskPlacement" property of the VM's storage profile. # noqa: E501
if value.os_disk_type == schema.DiskType.Ephemeral:
cap_ephemeral_disk_placement_type = capability.ephemeral_disk_placement_type
if isinstance(cap_ephemeral_disk_placement_type, search_space.SetSpace):
assert len(cap_ephemeral_disk_placement_type) > 0, (
"capability should have at least one ephemeral disk placement type,"
" but it's empty"
)
elif isinstance(cap_ephemeral_disk_placement_type, DiskPlacementType):
cap_ephemeral_disk_placement_type = search_space.SetSpace[
DiskPlacementType
](is_allow_set=True, items=[cap_ephemeral_disk_placement_type])
else:
raise LisaException(
"unknown ephemeral disk placement type "
f"on capability, type: {cap_ephemeral_disk_placement_type}"
)

value.ephemeral_disk_placement_type = getattr(
search_space, f"{method.value}_setspace_by_priority"
)(
self.ephemeral_disk_placement_type,
capability.ephemeral_disk_placement_type,
get_disk_placement_priority(),
)
else:
value.ephemeral_disk_placement_type = DiskPlacementType.NONE

# below values affect data disk only.
if self.data_disk_count is not None or capability.data_disk_count is not None:
value.data_disk_count = getattr(search_space, f"{method.value}_countspace")(
Expand Down Expand Up @@ -2057,6 +2127,11 @@ def _get_raw_data_disks_bsd(self) -> List[str]:

return data_disks

def get_ephemeral_disk_placement_type(self) -> Any:
azure_platform: AzurePlatform = self._platform # type: ignore
vm = get_vm(azure_platform, self._node)
return vm.storage_profile.os_disk.diff_disk_settings.placement


def get_azure_disk_type(disk_type: schema.DiskType) -> str:
assert isinstance(disk_type, schema.DiskType), (
Expand Down
72 changes: 66 additions & 6 deletions lisa/sut_orchestrator/azure/platform_.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@
CommunityGalleryImageSchema,
DataDiskCreateOption,
DataDiskSchema,
DiskPlacementType,
SharedImageGallerySchema,
check_or_create_resource_group,
check_or_create_storage_account,
Expand Down Expand Up @@ -1455,6 +1456,7 @@ def _create_node_arm_parameters(
arm_parameters.os_disk_type = features.get_azure_disk_type(
capability.disk.os_disk_type
)

assert isinstance(capability.disk.data_disk_type, schema.DiskType)
arm_parameters.data_disk_type = features.get_azure_disk_type(
capability.disk.data_disk_type
Expand All @@ -1471,6 +1473,17 @@ def _create_node_arm_parameters(
)
arm_parameters.disk_controller_type = capability.disk.disk_controller_type.value

cap_disk: features.AzureDiskOptionSettings = cast(
features.AzureDiskOptionSettings,
capability.disk,
)
squirrelsc marked this conversation as resolved.
Show resolved Hide resolved
assert isinstance(
cap_disk.ephemeral_disk_placement_type, DiskPlacementType
), f"actual: {type(cap_disk.ephemeral_disk_placement_type)}"
arm_parameters.ephemeral_disk_placement_type = (
cap_disk.ephemeral_disk_placement_type.value
)

assert capability.network_interface
assert isinstance(
capability.network_interface.nic_count, int
Expand Down Expand Up @@ -1832,14 +1845,45 @@ def _resource_sku_to_capability( # noqa: C901
else:
node_space.disk.disk_controller_type.add(schema.DiskControllerType.SCSI)

# If EphemeralOSDisk is supported, then check for the placement type
if azure_raw_capabilities.get("EphemeralOSDiskSupported", None) == "True":
# Check if CachedDiskBytes is greater than 30GB
# We use diff disk as cache disk for ephemeral OS disk
node_space.disk.os_disk_type.add(schema.DiskType.Ephemeral)
# Add the EphemeralDiskPlacementType
node_space.disk.ephemeral_disk_placement_type = search_space.SetSpace(
True, [DiskPlacementType.NONE]
)
ephemeral_disk_placement_types = azure_raw_capabilities.get(
"SupportedEphemeralOSDiskPlacements", None
)
if ephemeral_disk_placement_types:
for allowed_type in ephemeral_disk_placement_types.split(","):
try:
node_space.disk.ephemeral_disk_placement_type.add(
DiskPlacementType(allowed_type)
)
except ValueError:
self._log.error(
f"'{allowed_type}' is not a known Ephemeral Disk Placement"
f" Type "
f"({[x for x in DiskPlacementType]})"
)

# EphemeralDiskPlacementType can be - ResourceDisk, CacheDisk or NvmeDisk.
# Depending on that, "CachedDiskBytes" may or may not be found in
# capabilities.
# refer
# https://learn.microsoft.com/en-us/azure/virtual-machines/ephemeral-os-disks-faq
resource_disk_bytes = azure_raw_capabilities.get("MaxResourceVolumeMB", 0)
cached_disk_bytes = azure_raw_capabilities.get("CachedDiskBytes", 0)
cached_disk_bytes_gb = int(int(cached_disk_bytes) / 1024 / 1024 / 1024)
if cached_disk_bytes_gb >= 30:
node_space.disk.os_disk_type.add(schema.DiskType.Ephemeral)
node_space.disk.os_disk_size = cached_disk_bytes_gb
nvme_disk_bytes = azure_raw_capabilities.get("NvmeDiskSizeInMiB", 0)
if nvme_disk_bytes:
squirrelsc marked this conversation as resolved.
Show resolved Hide resolved
squirrelsc marked this conversation as resolved.
Show resolved Hide resolved
node_space.disk.os_disk_size = int(int(nvme_disk_bytes) / 1024)
elif cached_disk_bytes:
node_space.disk.os_disk_size = int(
int(cached_disk_bytes) / 1024 / 1024 / 1024
)
else:
node_space.disk.os_disk_size = int(int(resource_disk_bytes) / 1024)

# set AN
if azure_raw_capabilities.get("AcceleratedNetworkingEnabled", None) == "True":
Expand Down Expand Up @@ -2085,6 +2129,10 @@ def _generate_max_capability(self, vm_size: str, location: str) -> AzureCapabili
](is_allow_set=True, items=[])
node_space.disk.disk_controller_type.add(schema.DiskControllerType.SCSI)
node_space.disk.disk_controller_type.add(schema.DiskControllerType.NVME)
node_space.disk.ephemeral_disk_placement_type = search_space.SetSpace[
DiskPlacementType
](is_allow_set=True, items=[])
node_space.disk.ephemeral_disk_placement_type.add(DiskPlacementType.NVME)
node_space.network_interface = schema.NetworkInterfaceOptionSettings()
node_space.network_interface.data_path = search_space.SetSpace[
schema.NetworkDataPath
Expand Down Expand Up @@ -2793,6 +2841,18 @@ def _set_disk_features(
isinstance(node_space.disk.os_disk_type, search_space.SetSpace)
and node_space.disk.os_disk_type.isunique(schema.DiskType.Ephemeral)
):
node_disk: features.AzureDiskOptionSettings = cast(
features.AzureDiskOptionSettings,
node_space.disk,
)
if isinstance(node_disk.ephemeral_disk_placement_type, DiskPlacementType):
node_disk.ephemeral_disk_placement_type = search_space.SetSpace[
DiskPlacementType
](
is_allow_set=True,
items=[node_disk.ephemeral_disk_placement_type],
)

node_space.disk.os_disk_size = search_space.IntRange(
min=self._get_os_disk_size(azure_runbook)
)
Expand Down
Loading