Skip to content

Commit 788f9dc

Browse files
committed
feat(docworker): Improve retry mechanism for document generation
1 parent f29f5a2 commit 788f9dc

File tree

4 files changed

+82
-10
lines changed

4 files changed

+82
-10
lines changed
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
from .command_queue import CommandQueue, CommandWorker
1+
from .command_queue import CommandJobError, CommandQueue, CommandWorker
22

3-
__all__ = ['CommandQueue', 'CommandWorker']
3+
__all__ = ['CommandJobError', 'CommandQueue', 'CommandWorker']

packages/dsw-command-queue/dsw/command_queue/command_queue.py

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,45 @@ def signal_handler(recv_signal, frame):
4040
signal.signal(signal.SIGABRT, signal_handler)
4141

4242

43+
class CommandJobError(BaseException):
44+
45+
def __init__(self, job_id: str, message: str, try_again: bool,
46+
exc: BaseException | None = None):
47+
self.job_id = job_id
48+
self.message = message
49+
self.try_again = try_again
50+
self.exc = exc
51+
super().__init__(message)
52+
53+
def __str__(self):
54+
return self.message
55+
56+
def log_message(self):
57+
if self.exc is None:
58+
return self.message
59+
else:
60+
return f'{self.message} (caused by: [{type(self.exc).__name__}] {str(self.exc)})'
61+
62+
def db_message(self):
63+
if self.exc is None:
64+
return self.message
65+
return f'{self.message}\n\n' \
66+
f'Caused by: {type(self.exc).__name__}\n' \
67+
f'{str(self.exc)}'
68+
69+
@staticmethod
70+
def create(job_id: str, message: str, try_again: bool = True,
71+
exc: BaseException | None = None):
72+
if isinstance(exc, CommandJobError):
73+
return exc
74+
return CommandJobError(
75+
job_id=job_id,
76+
message=message,
77+
try_again=try_again,
78+
exc=exc,
79+
)
80+
81+
4382
class CommandWorker:
4483

4584
@abc.abstractmethod
@@ -190,8 +229,27 @@ def work():
190229
updated_at=datetime.datetime.now(tz=datetime.UTC),
191230
uuid=command.uuid,
192231
)
232+
except CommandJobError as e:
233+
if e.try_again and attempt_number < command.max_attempts:
234+
query = self.queries.query_command_error()
235+
msg = f'Failed with job error: {e.message} (will try again)'
236+
else:
237+
query = self.queries.query_command_error_stop()
238+
msg = f'Failed with job error: {e.message}'
239+
LOG.warning(msg)
240+
self.worker.process_exception(e)
241+
self.db.execute_query(
242+
query=query,
243+
attempts=attempt_number,
244+
error_message=msg,
245+
updated_at=datetime.datetime.now(tz=datetime.UTC),
246+
uuid=command.uuid,
247+
)
193248
except Exception as e:
194-
msg = f'Failed with exception: {str(e)} ({type(e).__name__})'
249+
if attempt_number < command.max_attempts:
250+
msg = f'Failed with exception [{type(e).__name__}]: {str(e)} (will try again)'
251+
else:
252+
msg = f'Failed with exception [{type(e).__name__}]: {str(e)}'
195253
LOG.warning(msg)
196254
self.worker.process_exception(e)
197255
self.db.execute_query(

packages/dsw-command-queue/dsw/command_queue/query.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,18 @@ def query_command_error() -> str:
3838
WHERE uuid = %(uuid)s;
3939
"""
4040

41+
@staticmethod
42+
def query_command_error_stop() -> str:
43+
return f"""
44+
UPDATE persistent_command
45+
SET attempts = %(attempts)s,
46+
max_attempts = %(attempts)s,
47+
last_error_message = %(error_message)s,
48+
state = '{CommandState.ERROR}',
49+
updated_at = %(updated_at)s
50+
WHERE uuid = %(uuid)s;
51+
"""
52+
4153
@staticmethod
4254
def query_command_done() -> str:
4355
return f"""

packages/dsw-document-worker/dsw/document_worker/worker.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,14 @@ def _enrich_context(self):
151151
extras['questionnaire'] = questionnaire.to_dict()
152152
self.doc_context['extras'] = extras
153153

154+
def check_compliance(self):
155+
metamodel_version = int(self.doc_context.get('metamodelVersion', '0'))
156+
if metamodel_version != CURRENT_METAMODEL:
157+
LOG.error('Command with metamodel version %d is not supported '
158+
'by this worker (version %d)', metamodel_version, CURRENT_METAMODEL)
159+
raise RuntimeError(f'Unsupported metamodel version: {metamodel_version} '
160+
f'(expected {CURRENT_METAMODEL})')
161+
154162
@handle_job_step('Failed to build final document')
155163
def build_document(self):
156164
LOG.info('Building document by rendering template with context')
@@ -231,6 +239,7 @@ def try_set_job_state(self, state: str, message: str) -> bool:
231239
return False
232240

233241
def _run(self):
242+
self.check_compliance()
234243
self.get_document()
235244

236245
self.prepare_template()
@@ -343,14 +352,7 @@ def run_once(self):
343352
queue.run_once()
344353

345354
def work(self, cmd: PersistentCommand):
346-
metamodel_version = int(cmd.body.get('metamodelVersion', '0'))
347-
if metamodel_version != CURRENT_METAMODEL:
348-
LOG.error('Command with metamodel version %d is not supported '
349-
'by this worker (version %d)', metamodel_version, CURRENT_METAMODEL)
350-
raise RuntimeError(f'Unsupported metamodel version: {metamodel_version} '
351-
f'(expected {CURRENT_METAMODEL})')
352355
document_uuid = cmd.body['document']['uuid']
353-
354356
Context.get().update_trace_id(cmd.uuid)
355357
Context.get().update_document_id(document_uuid)
356358
SentryReporter.set_context('cmd_uuid', cmd.uuid)

0 commit comments

Comments
 (0)