@@ -9,12 +9,15 @@ import (
9
9
openai "github.com/gptscript-ai/chat-completion-client"
10
10
)
11
11
12
- const instructions = `When given JSON objects that conform to the following JSONSchema :
12
+ const instructions = `"actual" is considered equivalent to "expected" if and only if the following rules are satisfied :
13
13
14
14
%s
15
15
16
- Determine if "actual" is equal to "expected" based on the comparison constraints described by "criteria".
17
- "actual" is considered equal to "expected" if and only if the all of the constraints described by "criteria" are satisfied.
16
+ When given JSON objects that conform to the following JSONSchema:
17
+
18
+ %s
19
+
20
+ Determine if "actual" is considered equivalent to "expected".
18
21
19
22
After making a determination, respond with a JSON object that conforms to the following JSONSchema:
20
23
@@ -28,7 +31,7 @@ After making a determination, respond with a JSON object that conforms to the fo
28
31
},
29
32
"reasoning": {
30
33
"type": "string",
31
- "description": "The reasoning used to come to the determination, that points out all instances where the given criteria was violated "
34
+ "description": "The reasoning used to come to the determination"
32
35
}
33
36
},
34
37
"required": [
@@ -41,14 +44,13 @@ Your responses are concise and include only the json object described above.
41
44
`
42
45
43
46
type Judge [T any ] struct {
44
- client * openai.Client
45
- instructions string
47
+ client * openai.Client
48
+ comparisonSchema string
46
49
}
47
50
48
51
type comparison [T any ] struct {
49
- Expected T `json:"expected"`
50
- Actual T `json:"actual"`
51
- Criteria string `json:"criteria"`
52
+ Expected T `json:"expected"`
53
+ Actual T `json:"actual"`
52
54
}
53
55
54
56
type ruling struct {
@@ -70,22 +72,21 @@ func New[T any](client *openai.Client) (*Judge[T], error) {
70
72
return nil , fmt .Errorf ("failed to generate JSONSchema for %T: %w" , new (T ), err )
71
73
}
72
74
73
- schemaJSON , err := json .MarshalIndent (schema , "" , " " )
75
+ marshaled , err := json .MarshalIndent (schema , "" , " " )
74
76
if err != nil {
75
77
return nil , fmt .Errorf ("failed to marshal JSONSchema for %T: %w" , new (T ), err )
76
78
}
77
79
78
80
return & Judge [T ]{
79
- client : client ,
80
- instructions : fmt . Sprintf ( instructions , schemaJSON ),
81
+ client : client ,
82
+ comparisonSchema : string ( marshaled ),
81
83
}, nil
82
84
}
83
85
84
86
func (j * Judge [T ]) Equal (ctx context.Context , expected , actual T , criteria string ) (equal bool , reasoning string , err error ) {
85
87
comparisonJSON , err := json .MarshalIndent (& comparison [T ]{
86
88
Expected : expected ,
87
89
Actual : actual ,
88
- Criteria : criteria ,
89
90
}, "" , " " )
90
91
if err != nil {
91
92
return false , "" , fmt .Errorf ("failed to marshal judge testcase JSON: %w" , err )
@@ -101,7 +102,7 @@ func (j *Judge[T]) Equal(ctx context.Context, expected, actual T, criteria strin
101
102
Messages : []openai.ChatCompletionMessage {
102
103
{
103
104
Role : openai .ChatMessageRoleSystem ,
104
- Content : j . instructions ,
105
+ Content : fmt . Sprintf ( instructions , criteria , j . comparisonSchema ) ,
105
106
},
106
107
{
107
108
Role : openai .ChatMessageRoleUser ,
@@ -111,11 +112,11 @@ func (j *Judge[T]) Equal(ctx context.Context, expected, actual T, criteria strin
111
112
}
112
113
response , err := j .client .CreateChatCompletion (ctx , request )
113
114
if err != nil {
114
- return false , "" , fmt .Errorf ("failed to make judge chat completion request: %w" , err )
115
+ return false , "" , fmt .Errorf ("failed to create chat completion request: %w" , err )
115
116
}
116
117
117
118
if len (response .Choices ) < 1 {
118
- return false , "" , fmt .Errorf ("judge chat completion request returned no choices" )
119
+ return false , "" , fmt .Errorf ("chat completion request returned no choices" )
119
120
}
120
121
121
122
var equality ruling
0 commit comments