Skip to content

Commit 6075da0

Browse files
committed
fix(core): keep reading when writes are blocked on THP
1 parent fd4ad5c commit 6075da0

5 files changed

Lines changed: 103 additions & 44 deletions

File tree

core/.changelog.d/6506.fixed

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[T3W1] Avoid THP deadlock over USB.

core/src/trezor/wire/thp/channel.py

Lines changed: 41 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141

4242
if TYPE_CHECKING:
4343
from buffer_types import AnyBuffer, AnyBytes
44-
from typing import Any, Awaitable, Callable
44+
from typing import Any, Awaitable, Callable, Generator
4545

4646
from trezor.messages import ThpPairingCredential
4747
from trezor.wire import WireInterface
@@ -436,49 +436,58 @@ async def write_encrypted_payload(self, ctrl_byte: int, payload: AnyBytes) -> No
436436

437437
header = PacketHeader(ctrl_byte, self.get_channel_id_int(), payload_len)
438438

439-
async def _write_loop() -> None:
440-
"""Send the payload and wait for an ACK with retransmissions."""
439+
ack_latency_ms = self.channel_cache.get_int(CHANNEL_ACK_LATENCY_MS) or 0
441440

442-
ack_latency_ms = self.channel_cache.get_int(CHANNEL_ACK_LATENCY_MS) or 0
443-
if __debug__:
444-
self._log(f"Sending {len(payload)} bytes, latency: {ack_latency_ms} ms")
441+
# ACK is needed before sending more data
442+
ABP.set_sending_allowed(self.channel_cache, False)
445443

446-
# ACK is needed before sending more data
447-
ABP.set_sending_allowed(self.channel_cache, False)
444+
# allows preempting this channel, if another channel becomes active
445+
self.last_write_ms = utime.ticks_ms()
448446

449-
# allows preempting this channel, if another channel becomes active
450-
self.last_write_ms = utime.ticks_ms()
447+
def _write_loop() -> Generator[Any, Any, None]:
448+
"""
449+
Retransmit the payload (with increasing delay), raising `Timeout` in the end.
451450
452-
for i in range(_MAX_RETRANSMISSION_COUNT):
453-
await self._write_payload_once(header, payload)
451+
This task is spawned concurrently with `_wait_for_ack()` using `loop.race()`,
452+
so it will be cancelled when the expected ACK is received.
453+
"""
454+
from trezor.loop import sleep
454455

455-
# Channel's estimated latency + a variable delay (from 200ms till ~3.52s)
456-
timeout_ms = ack_latency_ms + round(10300 - 1010000 / (100 + i))
457-
try:
458-
# wait and return after receiving an ACK, or raise in case of an unexpected message.
459-
await self.recv_payload(
460-
expected_ctrl_byte=None, timeout_ms=timeout_ms
461-
)
462-
except Timeout:
463-
if __debug__:
464-
log.warning(__name__, "Retransmit after %d ms", timeout_ms)
465-
continue
466-
467-
ack_latency_ms = utime.ticks_diff(utime.ticks_ms(), self.last_write_ms)
468-
# Limit estimated latency to avoid integer overflows and too long delays
469-
ack_latency_ms = max(0, min(800, ack_latency_ms))
470-
self.channel_cache.set_int(CHANNEL_ACK_LATENCY_MS, ack_latency_ms)
456+
if __debug__:
457+
self._log(f"Sending {len(payload)} bytes, latency: {ack_latency_ms} ms")
471458

472-
# `ABP.set_sending_allowed()` will be called after a valid ACK
473-
if ABP.is_sending_allowed(self.channel_cache):
474-
return
459+
for i in range(_MAX_RETRANSMISSION_COUNT):
460+
# Try to send the payload (split into packets), or raise if transport is blocked
461+
yield from self._write_payload_once(header, payload)
462+
# Channel's estimated latency + a variable delay (from 200ms till ~3.52s)
463+
delay_ms = ack_latency_ms + round(10300 - 1010000 / (100 + i))
464+
yield from sleep(delay_ms)
465+
if __debug__:
466+
log.warning(__name__, "Retransmit after %d ms", delay_ms)
475467

476468
# restart event loop due to unresponsive channel
477469
raise Timeout("THP retransmission timeout")
478470

471+
def _wait_for_ack() -> Generator[Any, Any, None]:
472+
"""
473+
Wait for the expected ACK to be received.
474+
475+
This task is spawned concurrently with `_write_loop()` using `loop.race()`,
476+
so it will be cancelled when retransmission loop is over.
477+
"""
478+
while not ABP.is_sending_allowed(self.channel_cache):
479+
# `ABP.set_sending_allowed()` will be called after a valid ACK
480+
yield from self.recv_payload(expected_ctrl_byte=None)
481+
479482
try:
480-
return await _write_loop()
483+
# wait and return after receiving an ACK, or raise in case of an unexpected message / retransmission timeout.
484+
await race(_wait_for_ack(), _write_loop())
481485
finally:
486+
ack_latency_ms = utime.ticks_diff(utime.ticks_ms(), self.last_write_ms)
487+
# Limit estimated latency to avoid integer overflows and too long delays
488+
ack_latency_ms = max(0, min(800, ack_latency_ms))
489+
self.channel_cache.set_int(CHANNEL_ACK_LATENCY_MS, ack_latency_ms)
490+
482491
# Make sure to use the next `seq_bit` for the next payload
483492
ABP.set_send_seq_bit_to_opposite(self.channel_cache)
484493

core/tests/test_trezor.wire.thp.writer.py

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
if utils.USE_THP:
77
import thp_common
88
from mock_wire_interface import MockHID
9-
from trezor.loop import Timeout
9+
from trezor.loop import Timeout, race
1010
from trezor.wire.thp import ENCRYPTED, PacketHeader
1111
from trezor.wire.thp import alternating_bit_protocol as ABP
1212
from trezor.wire.thp.channel import _MAX_RETRANSMISSION_COUNT
@@ -134,15 +134,21 @@ def test_write_timeout(self):
134134
seq_bit = ABP.get_send_seq_bit(channel.channel_cache)
135135

136136
task = channel.write_encrypted_payload(ENCRYPTED, b"PAYLOAD")
137-
task.send(None) # start the generator
137+
race_obj = task.send(None) # start the generator
138+
assert isinstance(race_obj, race)
139+
_wait_for_ack, write_loop = race_obj.children
140+
write_loop.send(None) # start the generator
138141

139142
for _ in range(_MAX_RETRANSMISSION_COUNT - 1):
140-
task.send(None) # complete write
141-
task.throw(Timeout()) # no ACK is received
143+
write_loop.send(None) # complete write
144+
write_loop.send(None) # complete sleep
145+
146+
write_loop.send(None) # complete write last time
147+
with self.assertRaises(Timeout) as ctx:
148+
write_loop.send(None) # complete sleep & raise Timeout
142149

143-
task.send(None) # complete write last time
144150
with self.assertRaises(Timeout):
145-
task.throw(Timeout()) # no ACK is received
151+
task.throw(ctx.value) # re-raise timeout in `write_encrypted_payload`
146152

147153
# next write should use the next `seq_bit` (see #6138)
148154
self.assertNotEqual(ABP.get_send_seq_bit(channel.channel_cache), seq_bit)
@@ -152,16 +158,22 @@ def test_write_blocked(self):
152158
seq_bit = ABP.get_send_seq_bit(channel.channel_cache)
153159

154160
task = channel.write_encrypted_payload(ENCRYPTED, b"PAYLOAD")
155-
task.send(None) # start the generator
161+
race_obj = task.send(None) # start the generator
162+
assert isinstance(race_obj, race)
163+
_wait_for_ack, write_loop = race_obj.children
164+
write_loop.send(None) # start the generator
156165

157166
# Re-transmit a few times
158167
for _ in range(3):
159-
task.send(None) # complete write
160-
task.throw(Timeout()) # no ACK is received
168+
write_loop.send(None) # complete write
169+
write_loop.send(None) # complete sleep
170+
171+
with self.assertRaises(Timeout) as ctx:
172+
# timeout `_write_payload_once()` (as if `loop.sleep` has completed) using dummy "ticks" integer value
173+
write_loop.send(12345)
161174

162175
with self.assertRaises(Timeout):
163-
# timeout write (as if `loop.sleep` has completed) using dummy "ticks" integer value
164-
task.send(12345)
176+
task.throw(ctx.value) # re-raise timeout in `write_encrypted_payload`
165177

166178
# next write should use the next `seq_bit` (see #6138)
167179
self.assertNotEqual(ABP.get_send_seq_bit(channel.channel_cache), seq_bit)

tests/device_tests/thp/test_abp.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
1+
import functools
12
import time
3+
import typing as t
4+
from pathlib import Path
25
from unittest.mock import Mock
36

47
import pytest
58

6-
from trezorlib import messages
9+
from trezorlib import device, messages
710
from trezorlib.debuglink import DebugSession
811
from trezorlib.thp.client import TrezorClientThp
912

@@ -33,3 +36,31 @@ def test_abp(session: DebugSession) -> None:
3336
# we should now successfully read the response
3437
resp = session.read()
3538
messages.Success.ensure_isinstance(resp)
39+
40+
41+
def delay_call(func: t.Callable, seconds: float):
42+
43+
@functools.wraps(func)
44+
def wrapper(*args, **kw):
45+
time.sleep(seconds)
46+
return func(*args, **kw)
47+
48+
return wrapper
49+
50+
51+
HERE = Path(__file__).parent.resolve()
52+
53+
54+
def test_delay_acks_from_host(session: DebugSession) -> None:
55+
assert isinstance(session.client, TrezorClientThp)
56+
channel = session.client.channel
57+
58+
# delay THP ACK sending, to trigger retransmits
59+
channel._send_ack = delay_call(channel._send_ack, seconds=0.6)
60+
session.client.ping("Should succeed after some retransmits")
61+
session.client.ping("ButtonRequest should be retransmitted", button_protection=True)
62+
63+
file_name = "test_bg_eckhart.jpg"
64+
with open(HERE.parent / file_name, "rb") as f:
65+
# Multiple requests and responses
66+
device.apply_settings(session, homescreen=f.read())

tests/ui_tests/fixtures.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32351,6 +32351,7 @@
3235132351
"T3W1_cs_tezos-test_sign_tx.py::test_tezos_smart_contract_transfer": "c66aacc0efb0ef974a67a0a0b838257f3eafa0018438317824700fbf84c8c433",
3235232352
"T3W1_cs_tezos-test_sign_tx.py::test_tezos_smart_contract_transfer_to_contract": "a6aa76610b2f4c09d32c9f63e8a831cb561710ec6f5e02d165218fa766c1fd23",
3235332353
"T3W1_cs_thp-test_abp.py::test_abp": "c24521e569c08e3605b164212c876f8ac57c5eef6cca6f2ca53a635a883ebc4b",
32354+
"T3W1_cs_thp-test_abp.py::test_delay_acks_from_host": "8b42717a5490020e02da5b481bb9db9f5f333dceb15813e29cac09252f85c8cd",
3235432355
"T3W1_cs_thp-test_basic.py::test_v1": "987d1c62e6576b9cc24373e00df522efdc9736af978d6032f7d319af8daaef5d",
3235532356
"T3W1_cs_thp-test_basic.py::test_v2_unallocated": "987d1c62e6576b9cc24373e00df522efdc9736af978d6032f7d319af8daaef5d",
3235632357
"T3W1_cs_thp-test_handshake.py::test_no_unlock": "05416515d2a63d94fc0ba3b6afdb9b0eb139ed403b7c20b62203c776ffe5d6ee",
@@ -33931,6 +33932,7 @@
3393133932
"T3W1_de_tezos-test_sign_tx.py::test_tezos_smart_contract_transfer": "253a29728bc2b0273d5b09ee7ea84c1053d66bfaa1237ce389c97ca2ad8a6381",
3393233933
"T3W1_de_tezos-test_sign_tx.py::test_tezos_smart_contract_transfer_to_contract": "04932ccd7cdd4bbfa6f3665038d81f68c2822c8431b36bc9668c0aa2e6a418ab",
3393333934
"T3W1_de_thp-test_abp.py::test_abp": "b3ce10e1620297758d6f2b235127f1dd0d61804c8cc32bc6664db5c3a9d81dde",
33935+
"T3W1_de_thp-test_abp.py::test_delay_acks_from_host": "89f5aeb7233b3ca22ccee2f08cc9caee38f7dec1b72bc49d9c1a4224f4ca1b27",
3393433936
"T3W1_de_thp-test_basic.py::test_v1": "987d1c62e6576b9cc24373e00df522efdc9736af978d6032f7d319af8daaef5d",
3393533937
"T3W1_de_thp-test_basic.py::test_v2_unallocated": "987d1c62e6576b9cc24373e00df522efdc9736af978d6032f7d319af8daaef5d",
3393633938
"T3W1_de_thp-test_handshake.py::test_no_unlock": "9be3988c015d9a5260eb1537ab5cd398f4ddd36e2378157fe88f8ee04f533b92",
@@ -35511,6 +35513,7 @@
3551135513
"T3W1_en_tezos-test_sign_tx.py::test_tezos_smart_contract_transfer": "5559494cb76edbe2de07923e9002e0ccf580144e2f17ac879d3d465e42189879",
3551235514
"T3W1_en_tezos-test_sign_tx.py::test_tezos_smart_contract_transfer_to_contract": "8d2d3d0da8e522be34cdd398cc9b49c2cf860a0687a9ac2c1a37c7255800651a",
3551335515
"T3W1_en_thp-test_abp.py::test_abp": "931d9afceb0ba1e4faae891775819277242d889644d5c0c5863fc8c9fcf859b1",
35516+
"T3W1_en_thp-test_abp.py::test_delay_acks_from_host": "f6e49fc7fcf36d8f2eaec89587656ebd905b9d843b1960f0684c51c5b775e4ff",
3551435517
"T3W1_en_thp-test_basic.py::test_v1": "987d1c62e6576b9cc24373e00df522efdc9736af978d6032f7d319af8daaef5d",
3551535518
"T3W1_en_thp-test_basic.py::test_v2_unallocated": "987d1c62e6576b9cc24373e00df522efdc9736af978d6032f7d319af8daaef5d",
3551635519
"T3W1_en_thp-test_handshake.py::test_no_unlock": "a60a24172d0470601707825e0287a91a7388e8aeb00b5e49ec9b2643de35c1ca",
@@ -37091,6 +37094,7 @@
3709137094
"T3W1_es_tezos-test_sign_tx.py::test_tezos_smart_contract_transfer": "f485f09bb09e2717d91380a05c6ca9a3162a7068e665a1038574bde88271d9c0",
3709237095
"T3W1_es_tezos-test_sign_tx.py::test_tezos_smart_contract_transfer_to_contract": "ff6418a3e46044c19372760f8162def1f7a37d5276add41d7df4ffe409ae4b2b",
3709337096
"T3W1_es_thp-test_abp.py::test_abp": "56536ae9cd7c4ff8022def4ec3350031d3614064d961f53a2850f2be425af201",
37097+
"T3W1_es_thp-test_abp.py::test_delay_acks_from_host": "e86b0905548c3f025189b94122aacee1032a65b49b7de4d1ee880d27ce2d7895",
3709437098
"T3W1_es_thp-test_basic.py::test_v1": "987d1c62e6576b9cc24373e00df522efdc9736af978d6032f7d319af8daaef5d",
3709537099
"T3W1_es_thp-test_basic.py::test_v2_unallocated": "987d1c62e6576b9cc24373e00df522efdc9736af978d6032f7d319af8daaef5d",
3709637100
"T3W1_es_thp-test_handshake.py::test_no_unlock": "d362078d046200bf6a6421a18a3ad8c3e0eba86f88512fc934c15c850ae4148d",
@@ -38671,6 +38675,7 @@
3867138675
"T3W1_fr_tezos-test_sign_tx.py::test_tezos_smart_contract_transfer": "af27005f1a60e90ac80560815ad2a458e9787ad399fd1cf8ff46743a435a12b2",
3867238676
"T3W1_fr_tezos-test_sign_tx.py::test_tezos_smart_contract_transfer_to_contract": "f107171d02df2b7ac91c04505f3f8143016f372c82891806c31da43165ae89e0",
3867338677
"T3W1_fr_thp-test_abp.py::test_abp": "e8156cf4eda1d29060f05b4a18d50032b0b344230609b7de7cababfe0a86b20b",
38678+
"T3W1_fr_thp-test_abp.py::test_delay_acks_from_host": "477d766a3dddbadcaf240d3702e471260b39c79cc5a63e04832f104d8e6b11a7",
3867438679
"T3W1_fr_thp-test_basic.py::test_v1": "987d1c62e6576b9cc24373e00df522efdc9736af978d6032f7d319af8daaef5d",
3867538680
"T3W1_fr_thp-test_basic.py::test_v2_unallocated": "987d1c62e6576b9cc24373e00df522efdc9736af978d6032f7d319af8daaef5d",
3867638681
"T3W1_fr_thp-test_handshake.py::test_no_unlock": "56132ede5cabeccec0514da02d7c88fb8ebd78d8fb14a2b75ec8c124a7ccbf2d",
@@ -40256,6 +40261,7 @@
4025640261
"T3W1_pt_tezos-test_sign_tx.py::test_tezos_smart_contract_transfer": "3495d5e1353fbbbe3bff511512f74500dc1d29c0be40548c5f052ebd303102c6",
4025740262
"T3W1_pt_tezos-test_sign_tx.py::test_tezos_smart_contract_transfer_to_contract": "a3b2ff8deb91d0e7d8a5e13087d4b848bad23111de73a1123c3c3f3cfd08cfd5",
4025840263
"T3W1_pt_thp-test_abp.py::test_abp": "1c74667c078e25e7e0d37c7b2aa35f7c8ab02cd88e74695ecb355c82a293b0cc",
40264+
"T3W1_pt_thp-test_abp.py::test_delay_acks_from_host": "da50bba056af80a3881763b986575d4fa4ed40402dcb37aec75992f583629ef0",
4025940265
"T3W1_pt_thp-test_basic.py::test_v1": "987d1c62e6576b9cc24373e00df522efdc9736af978d6032f7d319af8daaef5d",
4026040266
"T3W1_pt_thp-test_basic.py::test_v2_unallocated": "987d1c62e6576b9cc24373e00df522efdc9736af978d6032f7d319af8daaef5d",
4026140267
"T3W1_pt_thp-test_handshake.py::test_no_unlock": "d362078d046200bf6a6421a18a3ad8c3e0eba86f88512fc934c15c850ae4148d",

0 commit comments

Comments
 (0)