From ba143a907d98e5d22756f9972fcc8bcc83ac4120 Mon Sep 17 00:00:00 2001 From: Joaquin Dominguez <83036592+j-dominguez9@users.noreply.github.com> Date: Sun, 2 Jun 2024 21:32:22 -0400 Subject: [PATCH] Updated prompt (#3) * repo structure * adding unit tests * andrew repo rebase * repo structure * adding unit tests * andrew repo rebase * Dev (#3) * cutting costs * cutting costs * added docstrings and typehints * Create utils.py * Delete poetry.lock * eval first run * formatting * eval2 * small fixes * add .env.sample for people to use as a template * spanish and german flores200 outputs * repo restructure * deepl scores * updated readme * added ruff for PR formatting * changed model to gpt-4o * adding wmt script and the batch articles * nllb benchmarks + gradio app start * adding updated prompts - single chunk only * finished google translate, working on gradio * adding short sample text * french, korean, japanes, and mandarin scores * updated promppts, comet_eval script and japanese agent translation * updated prompts * added random generation to gradio app * adding minor changes * moved gradio app functions to utils file * gradio app works with regeneration and model anonymity * subj benchmark run ready * adding the batch en * adding wsj source text * final changes? * updated readme * fixed example script, updated readme --------- Co-authored-by: John Santerre Co-authored-by: Kevin Solorio Co-authored-by: Nedelina * cleanup * fixed prompt details --------- Co-authored-by: John Santerre Co-authored-by: Kevin Solorio Co-authored-by: Nedelina Co-authored-by: Nedelina Teneva --- src/translation_agent/utils.py | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/src/translation_agent/utils.py b/src/translation_agent/utils.py index 79f35ad..42fcbf7 100755 --- a/src/translation_agent/utils.py +++ b/src/translation_agent/utils.py @@ -87,12 +87,15 @@ def one_chunk_initial_translation( system_message = f"You are an expert linguist, specializing in translation from {source_lang} to {target_lang}." - translation_prompt = f""""This is an {source_lang} to {target_lang} translation, please provide the {target_lang} translation for this text. \ -Do not provide any explanations or text apart from the translation. -{source_lang}: {source_text} + translation_prompt = f"""Your task is to provide a professional translation of a text from {source_lang} to {target_lang}. -{target_lang}:\ -""" + Translate the text below, delimited by XML tags and , and output the translation. + Output only the translation and nothing else. + + + {source_text} + + """ prompt = translation_prompt.format(source_text=source_text) @@ -138,14 +141,14 @@ def one_chunk_reflect_on_translation( When writing suggestions, pay attention to whether there are ways to improve the translation's \n\ -(i) accuracy (by correcting errors of mistranslation, omission, or untranslated text),\n\ +(i) accuracy (by correcting errors of addition, mistranslation, omission, or untranslated text),\n\ (ii) fluency (by applying {target_lang} grammar, spelling and punctuation rules, and ensuring there are no unnecessary repetitions),\n\ (iii) style (by ensuring the translations reflect the style of the source text and takes into account any cultural context),\n\ (iv) terminology (by ensuring terminology use is consistent and reflects the source text domain; and by only ensuring you use equivalent idioms {target_lang}).\n\ Write a list of specific, helpful and constructive suggestions for improving the translation. Each suggestion should address one specific part of the translation. -Output only the suggestions and nothing else.""" # Output the list of suggestions in JSON, using the key "suggestions". +Output only the suggestions and nothing else.""" prompt = reflection_prompt.format( source_lang=source_lang, @@ -179,12 +182,13 @@ def one_chunk_improve_translation( str: The improved translation based on the expert suggestions. """ - system_message = f"You are an expert language translator, specializing in {source_lang} to {target_lang} translation." + system_message = f"You are an expert linguist, specializing in translation editing from {source_lang} to {target_lang}." - prompt = f"""Your task is to carefully read, then improve, a translation from {source_lang} to {target_lang}, taking into -account a set of expert suggestions and constructive criticisms. + prompt = f"""Your task is to carefully read, then edit, a translation from {source_lang} to {target_lang}, taking into +account a list of expert suggestions and constructive criticisms. -The source text, initial translation, and expert suggestions, delimited by XML tags , and are as follows: +The source text, the initial translation, and the expert linguist suggestions are delimited by XML tags , and \ +as follows: {source_text} @@ -198,11 +202,11 @@ def one_chunk_improve_translation( {reflection} -Please take into account the expert suggestions when rewriting the translations to improve it. Consider: +Please take into account the expert suggestions when editing the translation. Edit the translation by ensuring: -(i) accuracy (by correcting errors of addition, mistranslation, omission, untranslated text), -(ii) fluency (grammar, inconsistency, punctuation, register, spelling), \ -(iii) style (fix awkward wording), +(i) accuracy (by correcting errors of addition, mistranslation, omission, or untranslated text), +(ii) fluency (by applying {target_lang} grammar, spelling and punctuation rules and ensuring there are no unnecessary repetitions), \ +(iii) style (by ensuring the translations reflect the style of the source text) (iv) terminology (inappropriate for context, inconsistent use), or \ (v) other errors.