|
| 1 | +import asyncio |
| 2 | +import time |
| 3 | +from asyncio import Future, ensure_future |
| 4 | +from binascii import unhexlify, hexlify |
| 5 | +from typing import Optional, Callable, Dict, List |
| 6 | + |
| 7 | +from cpfl.core import TransmissionMethod |
| 8 | +from cpfl.core.model_manager import ModelManager |
| 9 | +from cpfl.core.peer_manager import PeerManager |
| 10 | +from cpfl.core.session_settings import SessionSettings |
| 11 | +from cpfl.util.eva.protocol import EVAProtocol |
| 12 | +from cpfl.util.eva.result import TransferResult |
| 13 | + |
| 14 | +from ipv8.community import Community |
| 15 | +from ipv8.requestcache import RequestCache |
| 16 | +from ipv8.types import Peer |
| 17 | + |
| 18 | + |
| 19 | +class LearningCommunity(Community): |
| 20 | + community_id = unhexlify('d5889074c1e4c60423cdb6e9307ba0ca5695ead7') |
| 21 | + |
| 22 | + def __init__(self, *args, **kwargs): |
| 23 | + super().__init__(*args, **kwargs) |
| 24 | + self.request_cache = RequestCache() |
| 25 | + self.my_id = self.my_peer.public_key.key_to_bin() |
| 26 | + self.round_complete_callback: Optional[Callable] = None |
| 27 | + self.aggregate_complete_callback: Optional[Callable] = None |
| 28 | + |
| 29 | + self.peers_list: List[Peer] = [] |
| 30 | + |
| 31 | + # Settings |
| 32 | + self.settings: Optional[SessionSettings] = None |
| 33 | + |
| 34 | + # State |
| 35 | + self.is_active = False |
| 36 | + self.did_setup = False |
| 37 | + self.shutting_down = False |
| 38 | + |
| 39 | + # Components |
| 40 | + self.peer_manager: PeerManager = PeerManager(self.my_id, 100000) |
| 41 | + self.model_manager: Optional[ModelManager] = None # Initialized when the process is setup |
| 42 | + |
| 43 | + # Model exchange parameters |
| 44 | + self.eva = EVAProtocol(self, self.on_receive, self.on_send_complete, self.on_error) |
| 45 | + |
| 46 | + # Availability traces |
| 47 | + self.traces: Optional[Dict] = None |
| 48 | + self.traces_count: int = 0 |
| 49 | + |
| 50 | + self.logger.info("The %s started with peer ID: %s", self.__class__.__name__, |
| 51 | + self.peer_manager.get_my_short_id()) |
| 52 | + |
| 53 | + def start(self): |
| 54 | + """ |
| 55 | + Start to participate in the training process. |
| 56 | + """ |
| 57 | + assert self.did_setup, "Process has not been setup - call setup() first" |
| 58 | + self.is_active = True |
| 59 | + |
| 60 | + def set_traces(self, traces: Dict) -> None: |
| 61 | + self.traces = traces |
| 62 | + events: int = 0 |
| 63 | + |
| 64 | + # Schedule the join/leave events |
| 65 | + for active_timestamp in self.traces["active"]: |
| 66 | + if active_timestamp == 0: |
| 67 | + continue # We assume peers will be online at t=0 |
| 68 | + |
| 69 | + self.register_anonymous_task("join", self.go_online, delay=active_timestamp) |
| 70 | + events += 1 |
| 71 | + |
| 72 | + for inactive_timestamp in self.traces["inactive"]: |
| 73 | + self.register_anonymous_task("leave", self.go_offline, delay=inactive_timestamp) |
| 74 | + events += 1 |
| 75 | + |
| 76 | + self.logger.info("Scheduled %d join/leave events for peer %s (trace length in sec: %d)", events, |
| 77 | + self.peer_manager.get_my_short_id(), traces["finish_time"]) |
| 78 | + |
| 79 | + # Schedule the next call to set_traces |
| 80 | + self.register_task("reapply-trace-%s-%d" % (self.peer_manager.get_my_short_id(), self.traces_count), |
| 81 | + self.set_traces, self.traces, delay=self.traces["finish_time"]) |
| 82 | + self.traces_count += 1 |
| 83 | + |
| 84 | + def go_online(self): |
| 85 | + self.is_active = True |
| 86 | + cur_time = asyncio.get_event_loop().time() if self.settings.is_simulation else time.time() |
| 87 | + self.logger.info("Participant %s comes online (t=%d)", self.peer_manager.get_my_short_id(), cur_time) |
| 88 | + |
| 89 | + def go_offline(self, graceful: bool = True): |
| 90 | + self.is_active = False |
| 91 | + cur_time = asyncio.get_event_loop().time() if self.settings.is_simulation else time.time() |
| 92 | + self.logger.info("Participant %s will go offline (t=%d)", self.peer_manager.get_my_short_id(), cur_time) |
| 93 | + |
| 94 | + def setup(self, settings: SessionSettings): |
| 95 | + self.settings = settings |
| 96 | + for participant in settings.participants: |
| 97 | + self.peer_manager.add_peer(unhexlify(participant)) |
| 98 | + |
| 99 | + # Initialize the model |
| 100 | + participant_index = settings.all_participants.index(hexlify(self.my_id).decode()) |
| 101 | + self.model_manager = ModelManager(None, settings, participant_index) |
| 102 | + |
| 103 | + # Setup the model transmission |
| 104 | + if self.settings.transmission_method == TransmissionMethod.EVA: |
| 105 | + self.logger.info("Setting up EVA protocol") |
| 106 | + self.eva.settings.block_size = settings.eva_block_size |
| 107 | + self.eva.settings.max_simultaneous_transfers = settings.eva_max_simultaneous_transfers |
| 108 | + else: |
| 109 | + raise RuntimeError("Unsupported transmission method %s", self.settings.transmission_method) |
| 110 | + |
| 111 | + self.did_setup = True |
| 112 | + |
| 113 | + def get_peers(self): |
| 114 | + if self.peers_list: |
| 115 | + return self.peers_list |
| 116 | + return super().get_peers() |
| 117 | + |
| 118 | + def get_peer_by_pk(self, target_pk: bytes): |
| 119 | + peers = list(self.get_peers()) |
| 120 | + for peer in peers: |
| 121 | + if peer.public_key.key_to_bin() == target_pk: |
| 122 | + return peer |
| 123 | + return None |
| 124 | + |
| 125 | + def on_eva_send_done(self, future: Future, peer: Peer, serialized_response: bytes, binary_data: bytes, start_time: float): |
| 126 | + if future.cancelled(): # Do not reschedule if the future was cancelled |
| 127 | + return |
| 128 | + |
| 129 | + if future.exception(): |
| 130 | + peer_id = self.peer_manager.get_short_id(peer.public_key.key_to_bin()) |
| 131 | + self.logger.warning("Transfer to participant %s failed, scheduling it again (Exception: %s)", |
| 132 | + peer_id, future.exception()) |
| 133 | + # The transfer failed - try it again after some delay |
| 134 | + ensure_future(asyncio.sleep(self.settings.model_send_delay)).add_done_callback( |
| 135 | + lambda _: self.schedule_eva_send_model(peer, serialized_response, binary_data, start_time)) |
| 136 | + else: |
| 137 | + # The transfer seems to be completed - record the transfer time |
| 138 | + end_time = asyncio.get_event_loop().time() if self.settings.is_simulation else time.time() |
| 139 | + |
| 140 | + def schedule_eva_send_model(self, peer: Peer, serialized_response: bytes, binary_data: bytes, start_time: float) -> Future: |
| 141 | + # Schedule the transfer |
| 142 | + future = ensure_future(self.eva.send_binary(peer, serialized_response, binary_data)) |
| 143 | + future.add_done_callback(lambda f: self.on_eva_send_done(f, peer, serialized_response, binary_data, start_time)) |
| 144 | + return future |
| 145 | + |
| 146 | + async def on_receive(self, result: TransferResult): |
| 147 | + raise NotImplementedError() |
| 148 | + |
| 149 | + async def on_send_complete(self, result: TransferResult): |
| 150 | + peer_id = self.peer_manager.get_short_id(result.peer.public_key.key_to_bin()) |
| 151 | + my_peer_id = self.peer_manager.get_my_short_id() |
| 152 | + self.logger.info(f'Outgoing transfer {my_peer_id} -> {peer_id} has completed: {result.info.decode()}') |
| 153 | + |
| 154 | + async def on_error(self, peer, exception): |
| 155 | + self.logger.error(f'An error has occurred in transfer to peer {peer}: {exception}') |
| 156 | + |
| 157 | + async def unload(self): |
| 158 | + self.shutting_down = True |
| 159 | + await self.request_cache.shutdown() |
| 160 | + await super().unload() |
0 commit comments