diff --git a/pkg/functions/bnf_rules.go b/pkg/functions/bnf_rules.go new file mode 100644 index 00000000..13aa3654 --- /dev/null +++ b/pkg/functions/bnf_rules.go @@ -0,0 +1,47 @@ +package functions + +import "regexp" + +var ( + PRIMITIVE_RULES = map[string]string{ + "boolean": `("true" | "false") space`, + "number": `("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space`, + "integer": `("-"? ([0-9] | [1-9] [0-9]*)) space`, + "string": `"\"" ( + [^"\\] | + "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) + )* "\"" space`, + // TODO: we shouldn't forbid \" and \\ or all unicode and have this branch here, + // however, if we don't have it, the grammar will be ambiguous and + // empirically results are way worse. + "freestring": `( + [^\x00] | + "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) + )* space`, + "null": `"null" space`, + } + + INVALID_RULE_CHARS_RE = regexp.MustCompile(`[^a-zA-Z0-9-]+`) + GRAMMAR_LITERAL_ESCAPE_RE = regexp.MustCompile(`[\r\n"]`) + GRAMMAR_LITERAL_ESCAPES = map[string]string{ + "\r": `\r`, + "\n": `\n`, + `"`: `\"`, + } +) + +const ( + SPACE_RULE = `" "?` + + arrayNewLines = `arr ::= + "[\n" ( + realvalue + (",\n" realvalue)* + )? "]"` + + array = `arr ::= + "[" ( + realvalue + ("," realvalue)* + )? "]"` +) diff --git a/pkg/functions/function_structure.go b/pkg/functions/function_structure.go new file mode 100644 index 00000000..650236ec --- /dev/null +++ b/pkg/functions/function_structure.go @@ -0,0 +1,22 @@ +package functions + +import "encoding/json" + +type Item struct { + Type string `json:"type"` + Properties map[string]interface{} `json:"properties"` +} + +type JSONFunctionStructure struct { + OneOf []Item `json:"oneOf,omitempty"` + AnyOf []Item `json:"anyOf,omitempty"` + Defs map[string]interface{} `json:"$defs,omitempty"` +} + +func (j JSONFunctionStructure) Grammar(options ...func(*GrammarOption)) string { + grammarOpts := &GrammarOption{} + grammarOpts.Apply(options...) + + dat, _ := json.Marshal(j) + return NewJSONSchemaConverter(grammarOpts.PropOrder).GrammarFromBytes(dat, options...) +} diff --git a/pkg/functions/functions.go b/pkg/functions/functions.go index 49e9fc93..4f97f409 100644 --- a/pkg/functions/functions.go +++ b/pkg/functions/functions.go @@ -18,6 +18,15 @@ type Function struct { } type Functions []Function +type FunctionName struct { + Const string `json:"const"` +} + +type Argument struct { + Type string `json:"type"` + Properties map[string]interface{} `json:"properties"` +} + type Tool struct { Type string `json:"type"` Function Function `json:"function,omitempty"` @@ -86,3 +95,8 @@ func (f Functions) Select(name string) Functions { return funcs } + +func jsonString(v interface{}) string { + b, _ := json.Marshal(v) + return string(b) +} diff --git a/pkg/functions/functions_suite_test.go b/pkg/functions/functions_suite_test.go index 8964b1c8..59a90ab0 100644 --- a/pkg/functions/functions_suite_test.go +++ b/pkg/functions/functions_suite_test.go @@ -1,8 +1,10 @@ -package functions +package functions_test import ( "testing" + . "github.com/mudler/LocalAI/pkg/functions" + . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" ) @@ -11,3 +13,13 @@ func TestGrammar(t *testing.T) { RegisterFailHandler(Fail) RunSpecs(t, "Grammar test suite") } + +func createFunction(field1 string, field2 string, name string, properties map[string]interface{}) map[string]interface{} { + property := map[string]interface{}{} + property[field1] = FunctionName{Const: name} + property[field2] = Argument{ + Type: "object", + Properties: properties, + } + return property +} diff --git a/pkg/functions/grammar_json_schema.go b/pkg/functions/grammar_json_schema.go index 7356d01d..4c958ee7 100644 --- a/pkg/functions/grammar_json_schema.go +++ b/pkg/functions/grammar_json_schema.go @@ -5,70 +5,12 @@ package functions import ( "encoding/json" "fmt" - "regexp" "sort" "strings" "github.com/mudler/LocalAI/pkg/utils" ) -const ( - JSONBNF = `root ::= object -value ::= object | array | string | number | ("true" | "false" | "null") ws - -object ::= - "{" ws ( - string ":" ws value - ("," ws string ":" ws value)* - )? "}" ws - -array ::= - "[" ws ( - value - ("," ws value)* - )? "]" ws - -string ::= - "\"" ( - [^"\\] | - "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes - )* "\"" ws - -number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws - -ws ::= ([ \t\n] ws)?` -) - -var ( - SPACE_RULE = `" "?` - - PRIMITIVE_RULES = map[string]string{ - "boolean": `("true" | "false") space`, - "number": `("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space`, - "integer": `("-"? ([0-9] | [1-9] [0-9]*)) space`, - "string": `"\"" ( - [^"\\] | - "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) - )* "\"" space`, - // TODO: we shouldn't forbid \" and \\ or all unicode and have this branch here, - // however, if we don't have it, the grammar will be ambiguous and - // empirically results are way worse. - "freestring": `( - [^\x00] | - "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) - )* space`, - "null": `"null" space`, - } - - INVALID_RULE_CHARS_RE = regexp.MustCompile(`[^a-zA-Z0-9-]+`) - GRAMMAR_LITERAL_ESCAPE_RE = regexp.MustCompile(`[\r\n"]`) - GRAMMAR_LITERAL_ESCAPES = map[string]string{ - "\r": `\r`, - "\n": `\n`, - `"`: `\"`, - } -) - type JSONSchemaConverter struct { propOrder map[string]int rules map[string]string @@ -114,18 +56,6 @@ func (sc *JSONSchemaConverter) addRule(name, rule string) string { return key } -const arrayNewLines = `arr ::= - "[\n" ( - realvalue - (",\n" realvalue)* - )? "]"` - -const array = `arr ::= - "[" ( - realvalue - ("," realvalue)* - )? "]"` - func (sc *JSONSchemaConverter) finalizeGrammar(options ...func(*GrammarOption)) string { grammarOpts := &GrammarOption{} @@ -343,36 +273,3 @@ func (sc *JSONSchemaConverter) GrammarFromBytes(b []byte, options ...func(*Gramm _ = json.Unmarshal(b, &schema) return sc.Grammar(schema, options...) } - -func jsonString(v interface{}) string { - b, _ := json.Marshal(v) - return string(b) -} - -type FunctionName struct { - Const string `json:"const"` -} - -type Argument struct { - Type string `json:"type"` - Properties map[string]interface{} `json:"properties"` -} - -type Item struct { - Type string `json:"type"` - Properties map[string]interface{} `json:"properties"` -} - -type JSONFunctionStructure struct { - OneOf []Item `json:"oneOf,omitempty"` - AnyOf []Item `json:"anyOf,omitempty"` - Defs map[string]interface{} `json:"$defs,omitempty"` -} - -func (j JSONFunctionStructure) Grammar(options ...func(*GrammarOption)) string { - grammarOpts := &GrammarOption{} - grammarOpts.Apply(options...) - - dat, _ := json.Marshal(j) - return NewJSONSchemaConverter(grammarOpts.PropOrder).GrammarFromBytes(dat, options...) -} diff --git a/pkg/functions/grammar_json_schema_test.go b/pkg/functions/grammar_json_schema_test.go index bf52bd8d..6402bb40 100644 --- a/pkg/functions/grammar_json_schema_test.go +++ b/pkg/functions/grammar_json_schema_test.go @@ -9,16 +9,6 @@ import ( . "github.com/onsi/gomega" ) -func createFunction(field1 string, field2 string, name string, properties map[string]interface{}) map[string]interface{} { - property := map[string]interface{}{} - property[field1] = FunctionName{Const: name} - property[field2] = Argument{ - Type: "object", - Properties: properties, - } - return property -} - var testFunctions = []Item{ { Type: "object", diff --git a/pkg/functions/json_mode.go b/pkg/functions/json_mode.go new file mode 100644 index 00000000..46361b74 --- /dev/null +++ b/pkg/functions/json_mode.go @@ -0,0 +1,28 @@ +package functions + +const ( + JSONBNF = `root ::= object +value ::= object | array | string | number | ("true" | "false" | "null") ws + +object ::= + "{" ws ( + string ":" ws value + ("," ws string ":" ws value)* + )? "}" ws + +array ::= + "[" ws ( + value + ("," ws value)* + )? "]" ws + +string ::= + "\"" ( + [^"\\] | + "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes + )* "\"" ws + +number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws + +ws ::= ([ \t\n] ws)?` +)