Skip to content

Commit 9ccbf4e

Browse files
committed
[draft] prepare outlines for outlines-core v0.2
1 parent 437ffe4 commit 9ccbf4e

File tree

11 files changed

+71
-38
lines changed

11 files changed

+71
-38
lines changed

benchmarks/bench_json_schema.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from outlines_core.fsm.json_schema import build_regex_from_schema
1+
from outlines_core.json_schema import build_regex_from_schema
22

33
from outlines.caching import cache_disabled
44
from outlines.fsm.guide import RegexGuide

docs/cookbook/chain_of_thought.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ We could generate a response using the json schema but for a change we will use
8181

8282
```python
8383
from outlines.fsm.json_schema import convert_json_schema_to_str
84-
from outlines_core.fsm.json_schema import build_regex_from_schema
84+
from outlines_core.json_schema import build_regex_from_schema
8585

8686
schema_str = convert_json_schema_to_str(json_schema=json_schema)
8787
regex_str = build_regex_from_schema(schema_str)

docs/cookbook/react_agent.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ We could generate a response using the json schema but we will use the regex and
121121

122122
```python
123123
from outlines.fsm.json_schema import convert_json_schema_to_str
124-
from outlines_core.fsm.json_schema import build_regex_from_schema
124+
from outlines_core.json_schema import build_regex_from_schema
125125

126126
json_schema = Decision.model_json_schema()
127127
schema_str = convert_json_schema_to_str(json_schema=json_schema)

outlines/fsm/guide.py

Lines changed: 61 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,14 @@
11
import collections
22
import copy
33
import warnings
4-
from typing import TYPE_CHECKING, Any, Generator, Union
4+
from dataclasses import dataclass
5+
from typing import TYPE_CHECKING, Any, Generator, List, Optional, Union
56

67
import torch
78
from lark.indenter import DedentError
89
from lark.lexer import UnexpectedCharacters, UnexpectedToken
9-
from outlines_core.fsm.guide import Generate
10-
from outlines_core.fsm.guide import Guide as CoreGuide
11-
from outlines_core.fsm.guide import RegexGuide as CoreRegexGuide
12-
from outlines_core.fsm.guide import Write
13-
from outlines_core.fsm.guide import (
14-
create_states_mapping as uncached_create_states_mapping,
15-
)
10+
from outlines_core import Guide as CoreGuide
11+
from outlines_core import Index, Vocabulary
1612

1713
from outlines import grammars
1814
from outlines.fsm.parsing import PartialLark, PartialParserState
@@ -21,6 +17,35 @@
2117
from outlines.models.tokenizer import Tokenizer
2218

2319

20+
@dataclass(frozen=True)
21+
class Write:
22+
"""Write instruction.
23+
24+
Attributes
25+
----------
26+
tokens
27+
The sequence of tokens to be added to the current sequence by the
28+
generation process.
29+
30+
"""
31+
32+
tokens: List[int]
33+
34+
35+
@dataclass(frozen=True)
36+
class Generate:
37+
"""Generate instruction
38+
39+
Attributes
40+
----------
41+
tokens
42+
The tokens that lead to a valid completion if generated. A value
43+
of ``None`` indicates that all tokens are allowed.
44+
"""
45+
46+
tokens: Optional[List[int]]
47+
48+
2449
Instruction = Union[Write, Generate]
2550

2651

@@ -72,29 +97,37 @@ def copy(self):
7297
return self
7398

7499

75-
def cached_create_states_mapping(regex_string, tokenizer, *args, **kwargs):
76-
return uncached_create_states_mapping(regex_string, tokenizer, *args, **kwargs)
100+
class RegexGuide(Guide):
101+
"""Guide to generate text in the language of a regular expression."""
77102

78-
79-
class RegexGuide(CoreRegexGuide):
80-
"""
81-
Guide to generate text in the language of a regular expression.
82-
CoreRegexGuide with outlines cache
83-
"""
103+
def __init__(self, guide, eos_tensor):
104+
self.eos_tensor = eos_tensor
105+
self._guide = guide
84106

85107
@classmethod
86-
def from_regex(
87-
cls,
88-
regex_string: str,
89-
tokenizer,
90-
**kwargs,
91-
):
92-
return super().from_regex(
93-
regex_string,
94-
tokenizer,
95-
_create_states_mapping=cached_create_states_mapping,
96-
**kwargs,
97-
)
108+
def from_regex(cls, regex, tokenizer):
109+
vocabulary = Vocabulary.from_pretrained(tokenizer.name_or_path())
110+
index = Index(regex, vocabulary)
111+
guide = Guide(index)
112+
113+
eos_tensor = torch.tensor([vocabulary.get_eos_token_id()])
114+
return cls(guide, eos_tensor)
115+
116+
def get_next_instruction(self, state):
117+
if self.is_final_state(state):
118+
return self.eos_tensor
119+
return None
120+
121+
def get_next_state(self, state, token_id):
122+
if token_id == self.eos_tensor or self.is_final_state(state):
123+
return self._guide.final_state
124+
return self._guide.advance(token_id)
125+
126+
def is_final_state(self, state):
127+
return self._guide.is_finished()
128+
129+
def copy(self):
130+
return RegexGuide(self._guide, self.eos_tensor)
98131

99132

100133
CFGState = collections.namedtuple("CFGState", ["parser_state", "prev_token"])

outlines/fsm/parsing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
)
4747
from lark.parsers.lalr_interactive_parser import InteractiveParser
4848
from lark.parsers.lalr_parser import LALR_Parser, ParseConf, ParserState, _Parser
49-
from outlines_core.fsm.regex import (
49+
from outlines_core.regex import (
5050
BetterFSM,
5151
get_token_transition_keys,
5252
make_deterministic_fsm,

outlines/generate/choice.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from functools import singledispatch
55
from typing import Callable, List, Union
66

7-
from outlines_core.fsm.json_schema import build_regex_from_schema
7+
from outlines_core.json_schema import build_regex_from_schema
88

99
from outlines.fsm.json_schema import get_schema_from_enum
1010
from outlines.generate.api import SequenceGeneratorAdapter

outlines/generate/json.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from typing import Callable, Optional, Union
55

66
from genson import SchemaBuilder
7-
from outlines_core.fsm.json_schema import build_regex_from_schema
7+
from outlines_core.json_schema import build_regex_from_schema
88
from pydantic import BaseModel
99

1010
from outlines.fsm.json_schema import get_schema_from_enum, get_schema_from_signature

outlines/processors/structured.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Type, Union
2828

2929
import torch
30-
from outlines_core.fsm.json_schema import build_regex_from_schema
30+
from outlines_core.json_schema import build_regex_from_schema
3131
from pydantic import BaseModel
3232

3333
from outlines.fsm.guide import CFGGuide, Guide, RegexGuide

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ dependencies = [
4040
"pycountry",
4141
"airportsdata",
4242
"torch",
43-
"outlines_core==0.1.26",
43+
"outlines_core==0.2.2",
4444
"genson",
4545
]
4646
dynamic = ["version"]

tests/fsm/test_json_schema.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from typing import List
66

77
import pytest
8-
from outlines_core.fsm.json_schema import build_regex_from_schema
8+
from outlines_core.json_schema import build_regex_from_schema
99
from pydantic import BaseModel, constr
1010

1111
from outlines.fsm.json_schema import get_schema_from_enum, get_schema_from_signature

0 commit comments

Comments
 (0)