Skip to content
Open
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
ce52b32
Fixed the change by making the retries attribute private (and also th…
anthony-yip Mar 21, 2025
28db9e5
Made all of tangoJob's attributes read only
anthony-yip Mar 23, 2025
9d57597
fixed the local/remote desyncing issues, left some print statements i…
anthony-yip Aug 24, 2025
3bd80cf
Better encapsulation of TangoJob._remoteLocation
anthony-yip Aug 24, 2025
f0c0d32
Fixing uses of makeDead that was changed 2 commits ago
anthony-yip Aug 24, 2025
5b5cbc0
Merge branch 'copy-in' into anthonyyip/infinite_retries_bugfix
anthony-yip Aug 25, 2025
ba83509
comments, logging and todos
anthony-yip Aug 30, 2025
600803c
Code to enable spot instances
anthony-yip Sep 16, 2025
d450112
type annotations
anthony-yip Sep 16, 2025
029c1a9
finished mypy stuff up to not using --check-untyped-defs
anthony-yip Sep 16, 2025
85bdf7c
doesn't quite work, but wanted a checkpoint before muddling with the …
anthony-yip Sep 22, 2025
7e6751e
more typing changes, streamlining the used of the TangoDictionary in …
anthony-yip Sep 23, 2025
87b2d4b
tested on a single cli job
anthony-yip Sep 23, 2025
df044e3
Merge branch 'copy-in' into anthonyyip/infinite_retries_bugfix
anthony-yip Sep 23, 2025
4c8a521
Merge branch 'anthonyyip/infinite_retries_bugfix' into anthonyyip/spo…
anthony-yip Sep 23, 2025
03e1f31
refactored afterJobExecution and detachVM
anthony-yip Sep 28, 2025
bc8c8a7
error messages
anthony-yip Sep 28, 2025
20b833b
code cleanup: worker always gets initialized with a preallocated VM (…
anthony-yip Sep 28, 2025
d2e069c
Merge branch 'copy-in' into anthonyyip/spot_instances
anthony-yip Oct 27, 2025
138e332
fixed logging type safety
anthony-yip Oct 27, 2025
d1a8b27
always assert that detachVM is called (taken care of with .keep_for_d…
anthony-yip Oct 27, 2025
de38329
more todos
anthony-yip Oct 27, 2025
299e999
stop Before bug fix
anthony-yip Oct 28, 2025
2325e54
empty line
anthony-yip Nov 11, 2025
5b34e24
replaced _clean with make_empty for both TangoQueue and TangoDictionary
anthony-yip Nov 18, 2025
ee45c4d
removed dead code
anthony-yip Nov 18, 2025
e9cdf51
Merge branch 'anthonyyip/spot_instances' into anthonyyip/ami_updates
anthony-yip Dec 2, 2025
204c1ca
Merge branch 'ec2-new-implementation' into anthonyyip/ami_updates
anthony-yip Dec 2, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 46 additions & 29 deletions vmms/ec2SSH.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,40 @@ def acquire_vm_semaphore():
def release_vm_semaphore():
"""Releases the VM sempahore"""
Ec2SSH._vm_semaphore.release()

def __get_valid_images(self) -> dict:
# Get images from ec2
images = self.boto3resource.images.filter(Owners=["self"])

img_name_to_img_obj = {}
for image in images:
if image.tags:
for tag in image.tags:
if tag["Key"] == "Name" and tag["Value"]:
if tag["Value"] in img_name_to_img_obj:
self.log.info(
"Ignore %s for duplicate name tag %s"
% (image.id, tag["Value"])
)
else:
img_name_to_img_obj[tag["Value"]] = image
self.log.info(
"Found image: %s with name tag %s"
% (image.id, tag["Value"])
)

imageAMIs = [item.id for item in images]
taggedAMIs = [img_name_to_img_obj[key].id for key in img_name_to_img_obj]
ignoredAMIs = list(set(imageAMIs) - set(taggedAMIs))

if len(ignoredAMIs) > 0:
self.log.info(
"Ignored images %s for lack of or ill-formed name tag"
% str(ignoredAMIs)
)

return img_name_to_img_obj


# TODO: the arguments accessKeyId and accessKey don't do anything
def __init__(self, accessKeyId=None, accessKey=None):
Expand Down Expand Up @@ -203,44 +237,17 @@ def __init__(self, accessKeyId=None, accessKey=None):
# self.createKeyPair()
# create boto3resource

self.img2ami = {} # this is a bad name, should really be img_name to img
self.images = []
try:
# This is a service resource
self.boto3resource: EC2ServiceResource = boto3.resource("ec2", config.Config.EC2_REGION) # TODO: rename this ot self.ec2resource
self.boto3client = boto3.client("ec2", config.Config.EC2_REGION)

# Get images from ec2
images = self.boto3resource.images.filter(Owners=["self"])
except Exception as e:
self.log.error("EC2SSH failed initialization: %s" % (e))
raise
self.img2ami = self.__get_valid_images()

for image in images:
if image.tags:
for tag in image.tags:
if tag["Key"] == "Name" and tag["Value"]:
if tag["Value"] in self.img2ami:
self.log.info(
"Ignore %s for duplicate name tag %s"
% (image.id, tag["Value"])
)
else:
self.img2ami[tag["Value"]] = image
self.log.info(
"Found image: %s with name tag %s"
% (image.id, tag["Value"])
)

imageAMIs = [item.id for item in images]
taggedAMIs = [self.img2ami[key].id for key in self.img2ami]
ignoredAMIs = list(set(imageAMIs) - set(taggedAMIs))

if len(ignoredAMIs) > 0:
self.log.info(
"Ignored images %s for lack of or ill-formed name tag"
% str(ignoredAMIs)
)

def instanceName(self, id, name):
"""instanceName - Constructs a VM instance name. Always use
Expand Down Expand Up @@ -282,7 +289,16 @@ def tangoMachineToEC2Instance(self, vm: TangoMachine) -> dict:
ec2instance["instance_type"] = config.Config.DEFAULT_INST_TYPE

# for now, ami is config default
ec2instance["ami"] = self.img2ami[vm.image].id
if vm.image in self.img2ami:
ec2instance["ami"] = self.img2ami[vm.image].id
else:
# We may need to rescan for new images
self.img2ami = self.__get_valid_images()
if vm.image in self.img2ami:
ec2instance["ami"] = self.img2ami[vm.image].id
else:
self.log.error("Image %s not found" % vm.image)
raise

self.log.info("tangoMachineToEC2Instance: %s" % str(ec2instance))
return ec2instance
Expand Down Expand Up @@ -809,6 +825,7 @@ def existsVM(self, vm):

def getImages(self):
"""getImages - return a constant; actually use the ami specified in config"""
self.img2ami = self.__get_valid_images()
return [key for key in self.img2ami]

# getTag: to do later
Expand Down