Skip to content

Commit

Permalink
First implementation of filter afforder
Browse files Browse the repository at this point in the history
A filter afforder is a wrapper around a function afforder. The filter
afforder presents the text display of the browser to a language model
which filters out the irrelevant results based on the context.
  • Loading branch information
noahshinn committed Dec 6, 2023
1 parent 597a256 commit e30a899
Show file tree
Hide file tree
Showing 12 changed files with 211 additions and 22 deletions.
4 changes: 2 additions & 2 deletions actor/actorstrategy/basellm/llm_actor.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,15 @@ func New(models *llm.Models, options *actorstrategy.Options) actorstrategy.Actor
afforderStrategyID = options.AfforderStrategyID
}
}
a := afforder.AfforderStrategyByID(afforderStrategyID)
a := afforder.AfforderStrategyByID(afforderStrategyID, models)
return &BaseLLMActor{
models: models,
afforder: a,
}
}

func (a *BaseLLMActor) NextAction(ctx context.Context, traj *trajectory.Trajectory, br *browser.Browser) (trajectory.TrajectoryItem, error) {
messages, functionDefs, err := a.afforder.GetAffordances(traj, br)
messages, functionDefs, err := a.afforder.GetAffordances(ctx, traj, br)
if err != nil {
return nil, fmt.Errorf("failed to get affordances: %w", err)
}
Expand Down
2 changes: 1 addition & 1 deletion actor/actorstrategy/react/react_actor.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ func New(models *llm.Models, options *actorstrategy.Options) actorstrategy.Actor
afforderStrategyID = options.AfforderStrategyID
}
}
a := afforder.AfforderStrategyByID(afforderStrategyID)
a := afforder.AfforderStrategyByID(afforderStrategyID, models)
return &ReactActor{
models: models,
afforder: a,
Expand Down
4 changes: 2 additions & 2 deletions actor/actorstrategy/reflexion/reflexion_actor.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ func New(models *llm.Models, options *actorstrategy.Options) actorstrategy.Actor
maxNumIterations = options.MaxNumIterations
}
}
a := afforder.AfforderStrategyByID(afforderStrategyID)
a := afforder.AfforderStrategyByID(afforderStrategyID, models)
baseActorStrategy := basellm.New(models, &actorstrategy.Options{
AfforderStrategyID: afforderStrategyID,
})
Expand All @@ -47,7 +47,7 @@ func New(models *llm.Models, options *actorstrategy.Options) actorstrategy.Actor
}

func (a *ReflexionActor) NextAction(ctx context.Context, traj *trajectory.Trajectory, br *browser.Browser) (trajectory.TrajectoryItem, error) {
_, actionSpace, err := a.afforder.GetAffordances(traj, br)
_, actionSpace, err := a.afforder.GetAffordances(ctx, traj, br)
if err != nil {
return nil, fmt.Errorf("failed to get affordances: %w", err)
}
Expand Down
2 changes: 1 addition & 1 deletion actor/actorstrategy/verification/verification_actor.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ func New(models *llm.Models, options *actorstrategy.Options) actorstrategy.Actor
baseActorStrategy := basellm.New(models, &actorstrategy.Options{
AfforderStrategyID: afforderStrategyID,
})
a := afforder.AfforderStrategyByID(afforderStrategyID)
a := afforder.AfforderStrategyByID(afforderStrategyID, models)
return &VerificationActor{
models: models,
baseActorStrategy: baseActorStrategy,
Expand Down
157 changes: 157 additions & 0 deletions afforder/afforderstrategy/filterafforder/filter_afforder.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
package filterafforder

import (
"collaborativebrowser/afforder/afforderstrategy"
"collaborativebrowser/afforder/afforderstrategy/functionafforder"
"collaborativebrowser/browser"
"collaborativebrowser/llm"
"collaborativebrowser/trajectory"
"collaborativebrowser/utils/slicesx"
"context"
_ "embed"
"encoding/json"
"fmt"
"regexp"
"strings"
)

type FilterAfforder struct {
functionafforder.FunctionAfforder
models *llm.Models
}

//go:embed system_prompt_to_filter_affordances.txt
var systemPromptToFilterAffordances string

func New(models *llm.Models) afforderstrategy.AfforderStrategy {
return &FilterAfforder{
models: models,
}
}

func (fa *FilterAfforder) GetAffordances(ctx context.Context, traj *trajectory.Trajectory, br *browser.Browser) (messages []*llm.Message, functionDefs []*llm.FunctionDef, err error) {
filteredPageRender, err := fa.filterBrowserDisplay(ctx, br, traj)
if err != nil {
return nil, nil, fmt.Errorf("failed to render browser display: %w", err)
}
messages = fa.GetMessageAffordances(filteredPageRender, traj)
functionDefs = fa.GetFunctionAffordances()
return messages, functionDefs, nil
}

func (fa *FilterAfforder) filterBrowserDisplay(ctx context.Context, br *browser.Browser, traj *trajectory.Trajectory) (filteredBrowserDisplay string, err error) {
rawBrowserDisplay := br.GetDisplay().MD
numberedBrowserDisplay := displayBrowserContentWithLineno(rawBrowserDisplay)
trajDisplay := traj.GetAbbreviatedText()
messages := []*llm.Message{
{
Role: llm.MessageRoleSystem,
Content: systemPromptToFilterAffordances,
},
{
Role: llm.MessageRoleUser,
Content: fmt.Sprintf(`----- START BROWSER -----
%s
----- END BROWSER -----
----- START TRAJECTORY -----
%s
----- END TRAJECTORY -----
First, list a description of the next action that should be taken. Then, list a sequence of the irrelevant lines. These lines will be deleted and the remaining lines will be displayed as the web browser for your next action.`, numberedBrowserDisplay, trajDisplay),
},
}
functionDef := &llm.FunctionDef{
Name: "filter_irrelevant_lines",
Description: "Filter out the irrelevant lines from the browser display. The remaining lines will be displayed as the web browser for your next action.",
Parameters: llm.Parameters{
Type: "object",
Properties: map[string]llm.Property{
"next_action_description": {
Type: "string",
Description: "A description of the next action that should be taken. This description should be a single line of text.",
},
"irrelevant_lines": {
Type: "array",
Description: "A sequence of the irrelevant lines. Each item is a number or a range followed by a description.",
Items: &llm.ArrayItems{
Type: "string",
},
},
},
Required: []string{"next_action_description", "irrelevant_lines"},
},
}
var args map[string]interface{}
if res, err := fa.models.DefaultChatModel.Message(ctx, messages, &llm.MessageOptions{
Temperature: 0.0,
Functions: []*llm.FunctionDef{functionDef},
FunctionCall: "filter_affordances",
}); err != nil {
return "", fmt.Errorf("failed to get response from chat model: %w", err)
} else if res.FunctionCall == nil {
return "", fmt.Errorf("response from chat model did not include a function call")
} else if res.FunctionCall.Name != "filter_affordances" {
return "", fmt.Errorf("response from chat model did not include a function call to filter_affordances")
} else if err := json.Unmarshal([]byte(res.FunctionCall.Arguments), &args); err != nil {
return "", fmt.Errorf("failed to unmarshal arguments from chat model response: %w", err)
} else if irrelevantLinesRes, ok := args["irrelevant_lines"].([]interface{}); !ok {
return "", fmt.Errorf("failed to parse irrelevant lines from chat model response")
} else if slicesx.Any(irrelevantLinesRes, func(item interface{}) bool {
_, ok := item.(string)
return !ok
}) {
return "", fmt.Errorf("failed to parse irrelevant lines from chat model response")
} else if irrelevantLines, err := parseIrrelevantLinesResult(slicesx.Map(irrelevantLinesRes, func(item interface{}, i int) string {
return item.(string)
})); err != nil {
return "", fmt.Errorf("failed to parse irrelevant lines from chat model response: %w", err)
} else {
relevantLines := slicesx.Filter(strings.Split(rawBrowserDisplay, "\n"), func(line string, i int) bool {
_, ok := irrelevantLines[i]
return !ok
})
return strings.Join(relevantLines, "\n"), nil
}
}

func displayBrowserContentWithLineno(s string) string {
lines := slicesx.Map(strings.Split(s, "\n"), func(line string, i int) string {
return fmt.Sprintf("[%d] %s", i, line)
})
return strings.Join(lines, "\n")
}

func parseIrrelevantLinesResult(res []string) (map[int]struct{}, error) {
irrelevantLines := make(map[int]struct{})
re := regexp.MustCompile(`^(\d+(:|-)\d+|:\d+|\d+:|<\d+>) description="(.+)"$`)
for _, line := range res {
matches := re.FindStringSubmatch(line)
if matches == nil {
return nil, fmt.Errorf("invalid format: %s", line)
}
var start, end int
rangePart := matches[1]
switch {
case strings.Contains(rangePart, "-"):
fmt.Sscanf(rangePart, "%d-%d", &start, &end)
case strings.HasPrefix(rangePart, ":"):
fmt.Sscanf(rangePart, ":%d", &end)
start = 1 // Assuming start is 1 if not specified
case strings.HasSuffix(rangePart, ":"):
fmt.Sscanf(rangePart, "%d:", &start)
end = -1 // End is unknown
default:
fmt.Sscanf(rangePart, "<%d>", &start)
end = start
}
if start == end {
irrelevantLines[start] = struct{}{}
} else {
for i := start; i < end; i++ {
irrelevantLines[i] = struct{}{}
}
}
}
return irrelevantLines, nil
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Context
You will be given a text display of a web browser within a BROWSER window. You will also be given a conversation trajectory between you (an AI web browsing assistant) and a human user. Each line of content in the text web browser will be numbered in the format [<number>] <content>. Your goal is to read the browser content, read the trajectory, write a description of the intent of the next action, and then write a comma-delimited sequence of numbers or number ranges of irrelevant lines.

# Task
Your task is to describe the intent to prove that you understand the next action to take, then write the numbers or ranges of the irrelevant lines to the next future action at hand. These lines will be removed from the text browser display so that a future language model will not have unnecessary context in the text view. Beware: removing important lines may affect the performance of the future language model.

# Syntax
Write a newline-delimited sequence of numbers. Acceptable items in the sequence:
single number: "<number>"
range: "<start number>-<end number>" or ":<number>" (start to the number) or "<number>:" (number to the end)

Each number or range must be followed with a short few-word description of why it is being removed. For example:
<single number or range> description="<the description>"

Note: the end number is not inclusive.
16 changes: 13 additions & 3 deletions afforder/afforderstrategy/functionafforder/function_afforder.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"collaborativebrowser/browser/virtualid"
"collaborativebrowser/llm"
"collaborativebrowser/trajectory"
"context"
_ "embed"
"encoding/json"
"fmt"
Expand Down Expand Up @@ -103,19 +104,24 @@ func New() afforderstrategy.AfforderStrategy {
}
}

func (a *FunctionAfforder) GetAffordances(traj *trajectory.Trajectory, br *browser.Browser) ([]*llm.Message, []*llm.FunctionDef, error) {
func (a *FunctionAfforder) GetAffordances(ctx context.Context, traj *trajectory.Trajectory, br *browser.Browser) ([]*llm.Message, []*llm.FunctionDef, error) {
pageRender, err := br.Render(language.LanguageMD)
if err != nil {
return nil, nil, fmt.Errorf("browser failed to render page: %w", err)
}
messages := a.GetMessageAffordances(pageRender, traj)
return messages, a.permissibleFunctions, nil
}

func (a *FunctionAfforder) GetMessageAffordances(browserRender string, traj *trajectory.Trajectory) []*llm.Message {
state := fmt.Sprintf(`----- START BROWSER -----
%s
----- END BROWSER -----
----- START TRAJECTORY -----
%s
----- END TRAJECTORY -----
`, pageRender, traj.GetAbbreviatedText())
`, browserRender, traj.GetAbbreviatedText())
messages := []*llm.Message{
{
Role: llm.MessageRoleSystem,
Expand All @@ -126,7 +132,11 @@ func (a *FunctionAfforder) GetAffordances(traj *trajectory.Trajectory, br *brows
Content: fmt.Sprintf("%s\n\nLook at the Trajectory to inform your next action.", strings.TrimSpace(state)),
},
}
return messages, a.permissibleFunctions, nil
return messages
}

func (a *FunctionAfforder) GetFunctionAffordances() []*llm.FunctionDef {
return a.permissibleFunctions
}

func (a *FunctionAfforder) ParseNextAction(name string, arguments string) (trajectory.TrajectoryItem, error) {
Expand Down
3 changes: 2 additions & 1 deletion afforder/afforderstrategy/strategy.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@ import (
"collaborativebrowser/browser"
"collaborativebrowser/llm"
"collaborativebrowser/trajectory"
"context"
)

type AfforderStrategy interface {
GetAffordances(traj *trajectory.Trajectory, br *browser.Browser) (messages []*llm.Message, functionDefs []*llm.FunctionDef, err error)
GetAffordances(ctx context.Context, traj *trajectory.Trajectory, br *browser.Browser) (messages []*llm.Message, functionDefs []*llm.FunctionDef, err error)
ParseNextAction(name string, arguments string) (trajectory.TrajectoryItem, error)
DoesActionExist(name string) bool
}
5 changes: 3 additions & 2 deletions afforder/strategy.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"collaborativebrowser/afforder/afforderstrategy"
"collaborativebrowser/afforder/afforderstrategy/filterafforder"
"collaborativebrowser/afforder/afforderstrategy/functionafforder"
"collaborativebrowser/llm"
"log"
)

Expand All @@ -16,12 +17,12 @@ const (

const DefaultAfforderStrategyID = AfforderStrategyIDFunctionAfforder

func AfforderStrategyByID(id AfforderStrategyID) afforderstrategy.AfforderStrategy {
func AfforderStrategyByID(id AfforderStrategyID, models *llm.Models) afforderstrategy.AfforderStrategy {
switch id {
case AfforderStrategyIDFunctionAfforder:
return functionafforder.New()
case AfforderStrategyIDFilterAfforder:
return filterafforder.New()
return filterafforder.New(models)
default:
log.Printf("invalid afforder strategy ID: %s; defaulting to %s", id, DefaultAfforderStrategyID)
return functionafforder.New()
Expand Down
9 changes: 7 additions & 2 deletions llm/llm.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,13 @@ type Parameters struct {
}

type Property struct {
Type string `json:"type"`
Description string `json:"description"`
Type string `json:"type"`
Description string `json:"description"`
Items *ArrayItems `json:"items,omitempty"`
}

type ArrayItems struct {
Type string `json:"type"`
}

type FunctionCall struct {
Expand Down
2 changes: 1 addition & 1 deletion translators/html2md/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ func cleanup(mdText string) string {

// remove trailing spaces for each line
lines := strings.Split(s, "\n")
s = strings.Join(slicesx.Map(lines, func(str string) string {
s = strings.Join(slicesx.Map(lines, func(str string, _ int) string {
return strings.TrimSpace(str)
}), "\n")

Expand Down
14 changes: 7 additions & 7 deletions utils/slicesx/slices.go
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
package slicesx

func Map[T, U any](s []T, f func(T) U) []U {
func Map[T, U any](s []T, f func(item T, idx int) U) []U {
mapped := make([]U, len(s))
for i, v := range s {
mapped[i] = f(v)
for idx, v := range s {
mapped[idx] = f(v, idx)
}
return mapped
}

func Filter[T any](s []T, f func(T) bool) []T {
filtered := make([]T, 0)
for _, v := range s {
if f(v) {
func Filter[T any](s []T, f func(item T, idx int) bool) []T {
filtered := []T{}
for idx, v := range s {
if f(v, idx) {
filtered = append(filtered, v)
}
}
Expand Down

0 comments on commit e30a899

Please sign in to comment.