From 01a3c2c7699a14a04975922ed1699d90d942b99c Mon Sep 17 00:00:00 2001 From: firattamur Date: Wed, 20 Mar 2024 00:30:59 +0300 Subject: [PATCH 1/2] =?UTF-8?q?=F0=9F=94=A7=20chore:=20update=20readme=20f?= =?UTF-8?q?ix=20typos=20old=20parameters=20etc?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 241 ++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 160 insertions(+), 81 deletions(-) diff --git a/README.md b/README.md index 6b7b6df..9a8a4bc 100644 --- a/README.md +++ b/README.md @@ -1,34 +1,38 @@ -
-

- - image - + + image +

- Structured Output Is All You Need! + Structured Output Is All You Need!


-LLMdantic redefines the integration of Large Language Models (LLMs) into your projects, offering a seamless, efficient, -and powerful way to work with the latest advancements in AI. By abstracting the complexities of LLMs, llmdantic allows -developers to focus on what truly matters: building innovative applications. - -* **Ease of Use.** Simplify your LLM interactions. Forget about the intricacies of prompts and models; define your - requirements and let llmdantic handle the rest. -* **Data Integrity.** With Pydantic, define input and output models that ensure your data aligns perfectly with your - requirements, maintaining structure and validation at every step. -* **Modular and Extensible.** Easily switch between different LLMs and customize your experience with the modular and - extensible framework provided by llmdantic. -* **Cost Tracking.** Keep track of your LLM usage and costs, ensuring you stay within budget and optimize your usage. -* **Batch Processing.** Process multiple data points in a single call, streamlining your operations and enhancing - efficiency. -* **Retry Mechanism.** Automatically retry failed requests, ensuring you get the results you need without any hassle. +LLMdantic is a powerful and efficient Python library that simplifies the integration of Large Language Models (LLMs) into your projects. Built on top of the incredible [Langchain](https://github.com/hwchase17/langchain) package and leveraging the power of [Pydantic](https://github.com/pydantic/pydantic) models, LLMdantic provides a seamless and structured approach to working with LLMs. + +## Features 🚀 + +- 🌐 Wide range of LLM support through Langchain integrations +- 🛡️ Ensures data integrity with Pydantic models for input and output validation +- 🧩 Modular and extensible design for easy customization +- 💰 Cost tracking and optimization for OpenAI models +- 🚀 Efficient batch processing for handling multiple data points +- 🔄 Robust retry mechanism for smooth and uninterrupted experience ## Getting Started 🌟 +### Requirements + +Before using LLMdantic, make sure you have set the required API keys for the LLMs you plan to use. For example, if you're using OpenAI's models, set the `OPENAI_API_KEY` environment variable: + +```bash +export OPENAI_API_KEY="your-api-key" +``` + +If you're using other LLMs, follow the instructions provided by the respective providers in Langchain's documentation. + ### Installation ```bash @@ -37,134 +41,209 @@ pip install llmdantic ### Usage -1. **Define Your Models** +#### 1. Define input and output schemas using Pydantic: + +- Use Pydantic to define input and output models with custom validation rules. + +> [!IMPORTANT] +> +> Add docstrings to validation rules to provide prompts for the LLM. This will help the LLM understand the validation rules and provide better results -- **inp_model**: Define the structure of the data you want to process. -- **out_model**: Define the structure of the data you expect to receive. - - Use Pydantic to define your models and add custom validation rules. - - Custom validation rules are used to ensure the integrity and quality of your data. - - Add docstrings to your custom validation rules to provide prompts for the LLM. ```python from pydantic import BaseModel, field_validator - class SummarizeInput(BaseModel): text: str - class SummarizeOutput(BaseModel): summary: str - @field_validator("summary") + @field_validator("summary") def summary_must_not_be_empty(cls, v) -> bool: - """Summary cannot be empty""" # Add docstring that explains the validation rule. This will be used as a prompt for the LLM + """Summary cannot be empty""" # Add docstring that explains the validation rule. This will be used as a prompt for the LLM. if not v.strip(): raise return v @field_validator("summary") - def summary_must_be_short(cls, v) -> bool: - """Summary must be less than 100 words""" # Add docstring that explains the validation rule. This will be used as a prompt for the LLM + def summary_must_be_short(cls, v) -> bool: + """Summary must be less than 100 words""" # Add docstring that explains the validation rule. This will be used as a prompt for the LLM. if len(v.split()) > 100: - raise + raise return v ``` -2. **Initialize LLMdantic** +#### 2. Create an LLMdantic client: -- Initialize **LLMdantic** with your input and output models. -- Also, provide a objective for the LLM to understand the task. +- Provide input and output models, objective, and configuration. + +> [!TIP] +> +> The `objective` is a prompt that will be used to generate the actual prompt sent to the LLM. It should be a high-level description of the task you want the LLM to perform. +> +> The `inp_schema` and `out_schema` are the input and output models you defined in the previous step. +> +> The `retries` parameter is the number of times the LLMdantic will retry the request in case of failure. ```python -from llmdantic import LLMdantic, LLMdanticConfig -from langchain_openai import OpenAI -from langchain.llms.base import BaseLanguageModel +from llmdantic import LLMdantic, LLMdanticConfig +from langchain_openai import ChatOpenAI -llm: BaseLanguageModel = OpenAI() +llm = ChatOpenAI() config: LLMdanticConfig = LLMdanticConfig( - objective="Summarize the text", - inp_model=SummarizeInput, - out_model=SummarizeOutput, - retries=3 + objective="Summarize the text", + inp_schema=SummarizeInput, + out_schema=SummarizeOutput, + retries=3, ) -llmdantic = LLMdantic( - llm=llm, - config=config -) +llmdantic = LLMdantic(llm=llm, config=config) ``` -3. **Process Your Data** +Here's the prompt template generated based on the input and output models: + +```text +Objective: Summarize the text -- Use the `invoke` or `batch` method to process your data. +Input 'SummarizeInput': +{input} -- `invoke` returns an instance of `LLMdanticResult` which contains: - - `text`: The output of the LLM. - - `output`: The output model with the processed data. - - `retry_count`: The number of retries made to get the result. - - `cost`: The cost of the request. - - `inp_tokens`: The number of tokens used for the input. - - `out_tokens`: The number of tokens used for the output. - - `successful_requests`: The number of successful requests made. +Output 'SummarizeOutput''s fields MUST FOLLOW the RULES: +SummarizeOutput.summary: +• SUMMARY CANNOT BE EMPTY +• SUMMARY MUST BE LESS THAN 100 WORDS -- `batch` returns a list of `LLMdanticResult` for each input data. +{format_instructions} +``` + +#### 3. Generate output using the LLMdantic: + +> [!TIP] +> +> The `invoke` method is used for single requests, while the `batch` method is used for batch processing. +> +> The `invoke` method returns an instance of `LLMdanticResult`, which contains the generated text, parsed output, and other useful information such as cost and usage stats such as the number of input and output tokens. Check out the [LLMdanticResult](#LLMdanticResult) model for more details. +> ```python from llmdantic import LLMdanticResult -data: SummarizeInput = SummarizeInput(text="A long article about natural language processing...") +data = SummarizeInput(text="A long article about natural language processing...") result: LLMdanticResult = llmdantic.invoke(data) -if result.output: - print(result.output.summary) +output: Optional[SummarizeOutput] = result.output + +if output: + print(output.summary) +``` + +Here's the actual prompt sent to the LLM based on the input data: + +```text +Objective: Summarize the text + +Input 'SummarizeInput': +{'text': 'A long article about natural language processing...'} + +Output 'SummarizeOutput''s fields MUST FOLLOW the RULES: +SummarizeOutput.summary: +• SUMMARY CANNOT BE EMPTY +• SUMMARY MUST BE LESS THAN 100 WORDS + +The output should be formatted as a JSON instance that conforms to the JSON schema below. + +As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]} +the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted. + +Here is the output schema: +{"properties": {"summary": {"title": "Summary", "type": "string"}}, "required": ["summary"]} ``` - For batch processing, pass a list of input data. +> [!IMPORTANT] +> +> The `batch` method returns a list of `LLMdanticResult` instances, each containing the generated text, parsed output, and other useful information such as cost and usage stats such as the number of input and output tokens. Check out the [LLMdanticResult](#LLMdanticResult) model for more details. +> +> The `concurrency` parameter is the number of concurrent requests to be made. Please check the usage limits of the LLM provider before setting this value. +> + ```python data: List[SummarizeInput] = [ SummarizeInput(text="A long article about natural language processing..."), - SummarizeInput(text="A long article about computer vision...") + SummarizeInput(text="A long article about computer vision...") ] -results: List[Optional[SummarizeOutput]] = llmdantic.batch(data) +results: List[LLMdanticResult] = llmdantic.batch(data, concurrency=2) for result in results: - if result: - print(result.summary) + if result.output: + print(result.output.summary) ``` -4. **Track Costs and Stats**: +#### 4. Monitor usage and costs: + +> [!IMPORTANT] +> +> The cost tracking feature is currently available for OpenAI models only. +> +> The `usage` attribute returns an instance of `LLMdanticUsage`, which contains the number of input and output tokens, successful requests, cost, and successful outputs. Check out the [LLMdanticUsage](#LLMdanticUsage) model for more details. +> +> Please note that the usage is tracked for the entire lifetime of the `LLMdantic` instance. -- Use the `cost` attribute of the `LLMdanticResult` to track the cost of the request. -- Use the `usage` attribute of the `LLMdantic` to track the usage stats overall. +- Use the `cost` attribute of the LLMdanticResult to track the cost of the request (currently available for OpenAI models). + +- Use the `usage` attribute of the LLMdantic to track the usage stats overall. ```python from llmdantic import LLMdanticResult -data: SummarizeInput = SummarizeInput(text="A long article about natural language processing...") +data: SummarizeInput = SummarizeInput(text="A long article about natural language processing...") result: LLMdanticResult = llmdantic.invoke(data) if result.output: print(result.output.summary) -# Track the cost of the request -print(f"Cost: {result.cost}") +# Track the cost of the request (OpenAI models only) +print(f"Cost: {result.cost}") # Track the usage stats print(f"Usage: {llmdantic.usage}") ``` +```bash +Cost: 0.0003665 +Overall Usage: LLMdanticUsage( + inp_tokens=219, + out_tokens=19, + total_tokens=238, + successful_requests=1, + cost=0.000367, + successful_outputs=1 +) +``` + ## Advanced Usage 🛠 -**LLMdantic** is built on top of the `langchain` package, which provides a modular and extensible framework for working -with LLMs. You can easily switch between different LLMs and customize your experience. +`LLMdantic` is built on top of the langchain package, which provides a modular and extensible framework for working with LLMs. You can easily switch between different LLMs and customize your experience. + +Switching LLMs -### Switching LLMs: +> [!IMPORTANT] +> +> Make sure to set the required API keys for the new LLM you plan to use. +> +> The `llm` parameter of the `LLMdantic` class should be an instance of `BaseLanguageModel` from the langchain package. +> + +> [!TIP] +> +> You can use the `langchain_community` package to access a wide range of LLMs from different providers. +> +> You may need to provide model_name, api_key, and other parameters based on the LLM you want to use. Check out the documentation of the respective LLM provider for more details. +> -- **LLMdantic** uses the `OpenAI` LLM by default. You can switch to a different LLM by providing an instance of the - desired LLM. ```python from llmdantic import LLMdantic, LLMdanticConfig @@ -175,16 +254,15 @@ llm: BaseLanguageModel = Ollama() config: LLMdanticConfig = LLMdanticConfig( objective="Summarize the text", - inp_model=SummarizeInput, - out_model=SummarizeOutput, - retries=3 + inp_schema=SummarizeInput, + out_schema=SummarizeOutput, + retries=3, ) llmdantic = LLMdantic( llm=llm, config=config ) - ``` ## Contributing 🤝 @@ -195,3 +273,4 @@ Contributions are welcome! Whether you're fixing bugs, adding new features, or i ## License 📄 **LLMdantic** is released under the [MIT License](LICENSE). Feel free to use it, contribute, and spread the word! + From 8cf8d71fd57f4bf71dc9e6b93733115bbb499273 Mon Sep 17 00:00:00 2001 From: firattamur Date: Wed, 20 Mar 2024 00:36:24 +0300 Subject: [PATCH 2/2] =?UTF-8?q?Bump=20version:=201.0.2=20=E2=86=92=201.0.3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .bumpversion.cfg | 2 +- llmdantic/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 8faabdf..14f96e3 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.0.2 +current_version = 1.0.3 commit = True tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-(?P[a-z]+)(?P\d+))? diff --git a/llmdantic/__init__.py b/llmdantic/__init__.py index f79bc9e..f17f2b3 100644 --- a/llmdantic/__init__.py +++ b/llmdantic/__init__.py @@ -1,6 +1,6 @@ """LLMdantic is a Python package that provides structured interaction with LLMs.""" -__version__ = "1.0.2" +__version__ = "1.0.3" from .llmdantic import LLMdantic from .models import LLMdanticConfig, LLMdanticResult