Add support for streaming chunks (#4)

* Add support for streaming chunks @client = Groq::Client.new(model_id: "llama3-70b-8192") @client.chat("Write a long poem about patience") do |content| print content end puts * Also allow chat(steam: proc) to enable + handle streaming * Support passing object with #call(content) or #call(content, chunk) * Bump CI to newer rubies * Pizza is better
drnic · Apr 25, 2024 · 0662e56 · 0662e56
1 parent e11e62b
commit 0662e56
Show file tree

Hide file tree

Showing 9 changed files with 483 additions and 21 deletions.
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -17,8 +17,8 @@ jobs:
  strategy:
  matrix:
  ruby:
- - '3.1.4'
- - '3.3.0'
+ - '3.1.5'
+ - '3.3.1'
 
  steps:
  - uses: actions/checkout@v4

diff --git a/Gemfile.lock b/Gemfile.lock
@@ -3,6 +3,7 @@ PATH
  specs:
  groq (0.2.0)
  activesupport (> 5)
+ event_stream_parser (~> 1.0)
  faraday (~> 2.0)
  json
 
@@ -57,6 +58,7 @@ GEM
  dry-inflector (~> 1.0)
  dry-logic (~> 1.4)
  zeitwerk (~> 2.6)
+ event_stream_parser (1.0.0)
  faraday (2.9.0)
  faraday-net_http (>= 2.0, < 3.2)
  faraday-net_http (3.1.0)

diff --git a/README.md b/README.md
@@ -352,6 +352,90 @@ If you pass `--debug` to `bin/console` you will have this logger setup for you.
 bin/console --debug
 ```
 
+### Streaming
+
+If your AI assistant responses are being telecast live to a human, then that human might want some progressive responses. The Groq API supports streaming responses.
+
+Pass a block to `chat()` with either one or two arguments.
+
+1. The first argument is the string content chunk of the response.
+2. The optional second argument is the full response object from the API containing extra metadata.
+
+The final block call will be the last chunk of the response:
+
+1. The first argument will be `nil`
+2. The optional second argument, the full response object, contains a summary of the Groq API usage, such as prompt tokens, prompt time, etc.
+
+```ruby
+puts "🍕 "
+messages = [
+ S("You are a pizza sales person."),
+ U("What do you sell?")
+]
+@client.chat(messages) do |content|
+ print content
+end
+puts
+```
+
+Each chunk of the response will be printed to the console as it is received. It will look pretty.
+
+The default `llama3-7b-8192` model is very very fast and you might not see any streaming. Try a slower model like `llama3-70b-8192` or `mixtral-8x7b-32768`.
+
+```ruby
+@client = Groq::Client.new(model_id: "llama3-70b-8192")
+@client.chat("Write a long poem about patience") do |content|
+ print content
+end
+puts
+```
+
+You can pass in a second argument to get the full response JSON object:
+
+```ruby
+@client.chat("Write a long poem about patience") do |content, response|
+ pp content
+ pp response
+end
+```
+
+Alternately, you can pass a `Proc` or any object that responds to `call` via a `stream:` keyword argument:
+
+```ruby
+@client.chat("Write a long poem about patience", stream: ->(content) { print content })
+```
+
+You could use a class with a `call` method with either one or two arguments, like the `Proc` discussion above.
+
+```ruby
+class MessageBits
+ def initialize(emoji)
+ print "#{emoji} "
+ @bits = []
+ end
+
+ def call(content)
+ if content.nil?
+ puts
+ else
+ print(content)
+ @bits << content
+ end
+ end
+
+ def to_s
+ @bits.join("")
+ end
+
+ def to_assistant_message
+ Assistant(to_s)
+ end
+end
+
+bits = MessageBits.new("🍕")
+@client.chat("Write a long poem about pizza", stream: bits)
+```
+
 ## Examples
 
 Here are some example uses of Groq, of the `groq` gem and its syntax.

diff --git a/examples/README.md b/examples/README.md
@@ -20,6 +20,16 @@ At the prompt, either talk to the AI agent, or some special commands:
 - `exit` to exit the conversation
 - `summary` to get a summary of the conversation so far
 
+### Streaming
+
+There is also an example of streaming the conversation to terminal as it is received from Groq API.
+
+It defaults to the slower `llama3-70b-8192` model so that the streaming is more noticable.
+
+```bash
+bundle exec examples/groq-user-chat-streaming.rb --agent-prompt examples/agent-prompts/pizzeria-sales.yml
+```
+
 ### Pizzeria
 
 Run the pizzeria example with the following command:

diff --git a/examples/groq-user-chat-streaming.rb b/examples/groq-user-chat-streaming.rb
@@ -0,0 +1,132 @@
+#!/usr/bin/env ruby
+
+require "optparse"
+require "groq"
+require "yaml"
+
+include Groq::Helpers
+
+@options = {
+ model: "llama3-70b-8192",
+ agent_prompt_path: File.join(File.dirname(__FILE__), "agent-prompts/helloworld.yml"),
+ timeout: 20
+}
+OptionParser.new do |opts|
+ opts.banner = "Usage: ruby script.rb [options]"
+
+ opts.on("-m", "--model MODEL", "Model name") do |v|
+ @options[:model] = v
+ end
+
+ opts.on("-a", "--agent-prompt PATH", "Path to agent prompt file") do |v|
+ @options[:agent_prompt_path] = v
+ end
+
+ opts.on("-t", "--timeout TIMEOUT", "Timeout in seconds") do |v|
+ @options[:timeout] = v.to_i
+ end
+
+ opts.on("-d", "--debug", "Enable debug mode") do |v|
+ @options[:debug] = v
+ end
+end.parse!
+
+raise "Missing --model option" if @options[:model].nil?
+raise "Missing --agent-prompt option" if @options[:agent_prompt_path].nil?
+
+def debug?
+ @options[:debug]
+end
+
+# Read the agent prompt from the file
+agent_prompt = YAML.load_file(@options[:agent_prompt_path])
+user_emoji = agent_prompt["user_emoji"]
+agent_emoji = agent_prompt["agent_emoji"]
+system_prompt = agent_prompt["system_prompt"] || agent_prompt["system"]
+can_go_first = agent_prompt["can_go_first"]
+
+# Initialize the Groq client
+@client = Groq::Client.new(model_id: @options[:model], request_timeout: @options[:timeout]) do |f|
+ if debug?
+ require "logger"
+
+ # Create a logger instance
+ logger = Logger.new($stdout)
+ logger.level = Logger::DEBUG
+
+ f.response :logger, logger, bodies: true # Log request and response bodies
+ end
+end
+
+puts "Welcome to the AI assistant! I'll respond to your queries."
+puts "You can quit by typing 'exit'."
+
+def produce_summary(messages)
+ combined = messages.map do |message|
+ if message["role"] == "user"
+ "User: #{message["content"]}"
+ else
+ "Assistant: #{message["content"]}"
+ end
+ end.join("\n")
+ response = @client.chat([
+ S("You are excellent at reading a discourse between a human and an AI assistant and summarising the current conversation."),
+ U("Here is the current conversation:\n\n------\n\n#{combined}")
+ ])
+ puts response["content"]
+end
+
+messages = [S(system_prompt)]
+
+if can_go_first
+ print "#{agent_emoji} "
+ message_bits = []
+ response = @client.chat(messages) do |content|
+ # content == nil on last message; and "" on first message
+ next unless content
+ print(content)
+ message_bits << content
+ end
+ puts
+ messages << A(message_bits.join(""))
+end
+
+class MessageBits
+ def initialize(emoji)
+ print "#{emoji} "
+ @bits = []
+ end
+
+ def call(content)
+ if content.nil?
+ puts
+ else
+ print(content)
+ @bits << content
+ end
+ end
+
+ def to_assistant_message
+ Assistant(@bits.join(""))
+ end
+end
+
+loop do
+ print "#{user_emoji} "
+ user_input = gets.chomp
+
+ break if user_input.downcase == "exit"
+
+ # produce summary
+ if user_input.downcase == "summary"
+ produce_summary(messages)
+ next
+ end
+
+ messages << U(user_input)
+
+ # Use Groq to generate a response
+ message_bits = MessageBits.new(agent_emoji)
+ @client.chat(messages, stream: message_bits)
+ messages << message_bits.to_assistant_message
+end
diff --git a/groq.gemspec b/groq.gemspec
@@ -36,6 +36,7 @@ Gem::Specification.new do |spec|
  spec.add_dependency "faraday", "~> 2.0"
  spec.add_dependency "json"
  spec.add_dependency "activesupport", "> 5" # for Concerns
+ spec.add_dependency "event_stream_parser", "~> 1.0"
 
  spec.add_development_dependency "vcr", "~> 6.0"
  spec.add_development_dependency "webmock", "~> 3.0"