diff --git a/.gitignore b/.gitignore index 8e18f6ac37..3aedcd0701 100644 --- a/.gitignore +++ b/.gitignore @@ -44,6 +44,7 @@ archive # any log file *log.txt todo +scratchpad # Ignore GPT Engineer files projects diff --git a/README.md b/README.md index ac4af5c834..2d804a6c04 100644 --- a/README.md +++ b/README.md @@ -49,11 +49,6 @@ With an api key that has GPT4 access run: **Results** - Check the generated files in `projects/my-new-project/workspace` -### Limitations -Implementing additional chain of thought prompting, e.g. [Reflexion](https://github.com/noahshinn024/reflexion), should be able to make it more reliable and not miss requested functionality in the main prompt. - -Contributors welcome! If you are unsure what to add, check out the ideas listed in the Projects tab in the GitHub repo. - ## Features You can specify the "identity" of the AI agent by editing the files in the `identity` folder. @@ -63,7 +58,11 @@ Editing the identity, and evolving the `main_prompt`, is currently how you make Each step in `steps.py` will have its communication history with GPT4 stored in the logs folder, and can be rerun with `scripts/rerun_edited_message_logs.py`. ## Contributing -If you want to contribute, please check out the [projects](https://github.com/AntonOsika/gpt-engineer/projects?query=is%3Aopen) or [issues tab](https://github.com/AntonOsika/gpt-engineer/issues) in the GitHub repo and please read the [contributing document](.github/CONTRIBUTING.md) on how to contribute. Here is our [Discord 💬](https://discord.gg/4t5vXHhu) +We are building the open platform for devs to tinker with and build their personal code-generation toolbox. + +If you want to contribute, please check out the [roadmap](https://github.com/AntonOsika/gpt-engineer/blob/main/ROADMAP.md), [projects](https://github.com/AntonOsika/gpt-engineer/projects?query=is%3Aopen) or [issues tab](https://github.com/AntonOsika/gpt-engineer/issues) in the GitHub repo. You are welcome to read the [contributing document](.github/CONTRIBUTING.md) and join our [Discord 💬](https://discord.gg/4t5vXHhu). + +We are currently looking for more maintainers and community organisers. Email anton.osika@gmail.com if you are interested in an official role. ## Example diff --git a/benchmark/RESULTS.md b/benchmark/RESULTS.md index ad169f8b26..ce3f72b293 100644 --- a/benchmark/RESULTS.md +++ b/benchmark/RESULTS.md @@ -4,6 +4,36 @@ $ python scripts/benchmark.py ``` +# 2023-06-21 + +| Benchmark | Ran | Works | Perfect | +|--------------------|-----|-------|---------| +| currency_converter | ✅ | ❌ | ❌ | +| image_resizer | ✅ | ✅ | ✅ | +| pomodoro_timer | ✅ | ✅ | ✅ | +| url_shortener | ✅ | ✅ | ✅ | +| file_explorer | ✅ | ✅ | ✅ | +| markdown_editor | ✅ | ✅ | ❌ | +| timer_app | ✅ | ❌ | ❌ | +| weather_app | ✅ | ✅ | ✅ | +| file_organizer | ✅ | ✅ | ✅ | +| password_generator | ✅ | ✅ | ✅ | +| todo_list | ✅ | ✅ | ✅ | + + +# Notes on the errors +Most errors come from that the "generate entrypoint" are incorrect. Ignoring +those, we get 8/11 fully correct. + +All errors are very easy to fix. + +One error was trying to modify a constant. +One error was that the html template was not fully filled in. +One error is that a dependency was used incorrectly and easy to fix + + +# 2023-06-19 + | Benchmark | Ran | Works | Perfect | |--------------------|-----|-------|---------| | currency_converter | ❌ | ❌ | ❌ | diff --git a/gpt_engineer/main.py b/gpt_engineer/main.py index 34f0525c1d..cface530fc 100644 --- a/gpt_engineer/main.py +++ b/gpt_engineer/main.py @@ -7,6 +7,7 @@ import typer +from gpt_engineer import steps from gpt_engineer.ai import AI from gpt_engineer.db import DB, DBs from gpt_engineer.steps import STEPS @@ -20,7 +21,9 @@ def main( delete_existing: bool = typer.Argument(False, help="delete existing files"), model: str = "gpt-4", temperature: float = 0.1, - steps_config: str = "default", + steps_config: steps.Config = typer.Option( + steps.Config.DEFAULT, "--steps", "-s", help="decide which steps to run" + ), verbose: bool = typer.Option(False, "--verbose", "-v"), run_prefix: str = typer.Option( "", diff --git a/gpt_engineer/steps.py b/gpt_engineer/steps.py index 0df57c2a36..3370c9490c 100644 --- a/gpt_engineer/steps.py +++ b/gpt_engineer/steps.py @@ -2,6 +2,8 @@ import re import subprocess +from enum import Enum + from gpt_engineer.ai import AI from gpt_engineer.chat_to_files import to_files from gpt_engineer.db import DBs @@ -34,10 +36,10 @@ def clarify(ai: AI, dbs: DBs): break print() - user = input('(answer in text, or "q" to move on)\n') + user = input('(answer in text, or "c" to move on)\n') print() - if not user or user == "q": + if not user or user == "c": break user += ( @@ -145,10 +147,16 @@ def execute_entrypoint(ai, dbs): print() print('If yes, press enter. Otherwise, type "no"') print() - if input() != "": + if input() not in ["", "y", "yes"]: print("Ok, not executing the code.") return [] print("Executing the code...") + print( + "\033[92m" # green color + + "Note: If it does not work as expected, please consider running the code'" + + " in another way than above." + + "\033[0m" + ) print() subprocess.run("bash run.sh", shell=True, cwd=dbs.workspace.path) return [] @@ -165,6 +173,8 @@ def gen_entrypoint(ai, dbs): "b) run all necessary parts of the codebase (in parallell if necessary).\n" "Do not install globally. Do not use sudo.\n" "Do not explain the code, just give the commands.\n" + "Do not use placeholders, use example values (like . for a folder argument) " + "if necessary.\n" ), user="Information about the codebase:\n\n" + dbs.workspace["all_output.txt"], ) @@ -183,7 +193,7 @@ def use_feedback(ai: AI, dbs: DBs): ai.fassistant(dbs.workspace["all_output.txt"]), ai.fsystem(dbs.identity["use_feedback"]), ] - messages = ai.next(messages, dbs.memory["feedback"]) + messages = ai.next(messages, dbs.input["feedback"]) to_files(messages[-1]["content"], dbs.workspace) return messages @@ -201,13 +211,36 @@ def fix_code(ai: AI, dbs: DBs): return messages +class Config(str, Enum): + DEFAULT = "default" + BENCHMARK = "benchmark" + SIMPLE = "simple" + TDD = "tdd" + TDD_PLUS = "tdd+" + CLARIFY = "clarify" + RESPEC = "respec" + EXECUTE_ONLY = "execute_only" + USE_FEEDBACK = "use_feedback" + + # Different configs of what steps to run STEPS = { - "default": [simple_gen, gen_entrypoint, execute_entrypoint], - "benchmark": [simple_gen, gen_entrypoint], - "simple": [simple_gen, gen_entrypoint, execute_entrypoint], - "tdd": [gen_spec, gen_unit_tests, gen_code, gen_entrypoint, execute_entrypoint], - "tdd+": [ + Config.DEFAULT: [ + clarify, + gen_clarified_code, + gen_entrypoint, + execute_entrypoint, + ], + Config.BENCHMARK: [simple_gen, gen_entrypoint], + Config.SIMPLE: [simple_gen, gen_entrypoint, execute_entrypoint], + Config.TDD: [ + gen_spec, + gen_unit_tests, + gen_code, + gen_entrypoint, + execute_entrypoint, + ], + Config.TDD_PLUS: [ gen_spec, gen_unit_tests, gen_code, @@ -215,8 +248,13 @@ def fix_code(ai: AI, dbs: DBs): gen_entrypoint, execute_entrypoint, ], - "clarify": [clarify, gen_clarified_code, gen_entrypoint, execute_entrypoint], - "respec": [ + Config.CLARIFY: [ + clarify, + gen_clarified_code, + gen_entrypoint, + execute_entrypoint, + ], + Config.RESPEC: [ gen_spec, respec, gen_unit_tests, @@ -224,12 +262,10 @@ def fix_code(ai: AI, dbs: DBs): gen_entrypoint, execute_entrypoint, ], - "execute_only": [execute_entrypoint], - "use_feedback": [use_feedback], + Config.USE_FEEDBACK: [use_feedback, gen_entrypoint, execute_entrypoint], + Config.EXECUTE_ONLY: [gen_entrypoint, execute_entrypoint], } # Future steps that can be added: -# self_reflect_and_improve_files, -# add_tests -# run_tests_and_fix_files, -# improve_based_on_in_file_feedback_comments +# run_tests_and_fix_files +# execute_entrypoint_and_fix_files_if_needed diff --git a/pyproject.toml b/pyproject.toml index 37c4687215..c1ca348b10 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"] [project] name = "gpt-engineer" -version = "0.0.3" +version = "0.0.4" description = "Specify what you want it to build, the AI asks for clarification, and then builds it." readme = "README.md" requires-python = ">=3" diff --git a/scripts/benchmark.py b/scripts/benchmark.py index 7efa6865a3..cde4957022 100644 --- a/scripts/benchmark.py +++ b/scripts/benchmark.py @@ -35,7 +35,7 @@ def main( "-m", "gpt_engineer.main", bench_folder, - "--steps-config", + "--steps", "benchmark", ], stdout=log_file, @@ -66,7 +66,7 @@ def main( "-m", "gpt_engineer.main", bench_folder, - "--steps-config", + "--steps", "execute_only", ], ) diff --git a/scripts/print_chat.py b/scripts/print_chat.py index 2ff96f6f3c..eef244f6fb 100644 --- a/scripts/print_chat.py +++ b/scripts/print_chat.py @@ -16,19 +16,22 @@ def pretty_print_conversation(messages): } formatted_messages = [] for message in messages: - assistant_content = ( - message["function_call"] - if message.get("function_call") - else message["content"] - ) - role_to_message = { - "system": f"system: {message['content']}\n", - "user": f"user: {message['content']}\n", - "assistant": f"assistant: {assistant_content}\n", - "function": f"function ({message['name']}): {message['content']}\n", - } - - formatted_messages.append(role_to_message[message["role"]]) + if message["role"] == "function": + formatted_messages.append( + f"function ({message['name']}): {message['content']}\n" + ) + else: + assistant_content = ( + message["function_call"] + if message.get("function_call") + else message["content"] + ) + role_to_message = { + "system": f"system: {message['content']}\n", + "user": f"user: {message['content']}\n", + "assistant": f"assistant: {assistant_content}\n", + } + formatted_messages.append(role_to_message[message["role"]]) for formatted_message in formatted_messages: print(