Skip to content

chore: compress env vars that are too large #695

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/docs/04-command-line-reference/gptscript.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,5 +51,6 @@ gptscript [flags] PROGRAM_FILE [INPUT...]
* [gptscript credential](gptscript_credential.md) - List stored credentials
* [gptscript eval](gptscript_eval.md) -
* [gptscript fmt](gptscript_fmt.md) -
* [gptscript getenv](gptscript_getenv.md) - Looks up an environment variable for use in GPTScript tools
* [gptscript parse](gptscript_parse.md) -

48 changes: 48 additions & 0 deletions docs/docs/04-command-line-reference/gptscript_getenv.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
---
title: "gptscript getenv"
---
## gptscript getenv

Looks up an environment variable for use in GPTScript tools

```
gptscript getenv [flags] KEY [DEFAULT]
```

### Options

```
-h, --help help for getenv
```

### Options inherited from parent commands

```
--cache-dir string Directory to store cache (default: $XDG_CACHE_HOME/gptscript) ($GPTSCRIPT_CACHE_DIR)
-C, --chdir string Change current working directory ($GPTSCRIPT_CHDIR)
--color Use color in output (default true) ($GPTSCRIPT_COLOR)
--config string Path to GPTScript config file ($GPTSCRIPT_CONFIG)
--confirm Prompt before running potentially dangerous commands ($GPTSCRIPT_CONFIRM)
--credential-context string Context name in which to store credentials ($GPTSCRIPT_CREDENTIAL_CONTEXT) (default "default")
--credential-override strings Credentials to override (ex: --credential-override github.com/example/cred-tool:API_TOKEN=1234) ($GPTSCRIPT_CREDENTIAL_OVERRIDE)
--debug Enable debug logging ($GPTSCRIPT_DEBUG)
--debug-messages Enable logging of chat completion calls ($GPTSCRIPT_DEBUG_MESSAGES)
--default-model string Default LLM model to use ($GPTSCRIPT_DEFAULT_MODEL) (default "gpt-4o")
--default-model-provider string Default LLM model provider to use, this will override OpenAI settings ($GPTSCRIPT_DEFAULT_MODEL_PROVIDER)
--disable-cache Disable caching of LLM API responses ($GPTSCRIPT_DISABLE_CACHE)
--dump-state string Dump the internal execution state to a file ($GPTSCRIPT_DUMP_STATE)
--events-stream-to string Stream events to this location, could be a file descriptor/handle (e.g. fd://2), filename, or named pipe (e.g. \\.\pipe\my-pipe) ($GPTSCRIPT_EVENTS_STREAM_TO)
-f, --input string Read input from a file ("-" for stdin) ($GPTSCRIPT_INPUT_FILE)
--no-trunc Do not truncate long log messages ($GPTSCRIPT_NO_TRUNC)
--openai-api-key string OpenAI API KEY ($OPENAI_API_KEY)
--openai-base-url string OpenAI base URL ($OPENAI_BASE_URL)
--openai-org-id string OpenAI organization ID ($OPENAI_ORG_ID)
-o, --output string Save output to a file, or - for stdout ($GPTSCRIPT_OUTPUT)
-q, --quiet No output logging (set --quiet=false to force on even when there is no TTY) ($GPTSCRIPT_QUIET)
--workspace string Directory to use for the workspace, if specified it will not be deleted on exit ($GPTSCRIPT_WORKSPACE)
```

### SEE ALSO

* [gptscript](gptscript.md) -

60 changes: 60 additions & 0 deletions pkg/cli/getenv.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
package cli

import (
"bytes"
"compress/gzip"
"encoding/base64"
"fmt"
"io"
"os"
"strings"

"github.com/spf13/cobra"
)

type Getenv struct {
}

func (e *Getenv) Customize(cmd *cobra.Command) {
cmd.Use = "getenv [flags] KEY [DEFAULT]"
cmd.Short = "Looks up an environment variable for use in GPTScript tools"
cmd.Args = cobra.RangeArgs(1, 2)
}

func (e *Getenv) Run(_ *cobra.Command, args []string) error {
var (
key = args[0]
def string
)
if len(args) > 1 {
def = args[1]
}
value := getEnv(key, def)
fmt.Print(value)
return nil
}

func getEnv(key, def string) string {
v := os.Getenv(key)
if v == "" {
return def
}

if strings.HasPrefix(v, `{"_gz":"`) && strings.HasSuffix(v, `"}`) {
data, err := base64.StdEncoding.DecodeString(v[8 : len(v)-2])
if err != nil {
return v
}
gz, err := gzip.NewReader(bytes.NewBuffer(data))
if err != nil {
return v
}
strBytes, err := io.ReadAll(gz)
if err != nil {
return v
}
return string(strBytes)
}

return v
}
57 changes: 57 additions & 0 deletions pkg/cli/getenv_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package cli

import (
"os"
"testing"
)

func TestGetEnv(t *testing.T) {
// Cleaning up
defer func(currentEnvValue string) {
os.Setenv("testKey", currentEnvValue)
}(os.Getenv("testKey"))

// Tests
testCases := []struct {
name string
key string
def string
envValue string
expectedResult string
}{
{
name: "NoValueUseDefault",
key: "testKey",
def: "defaultValue",
envValue: "",
expectedResult: "defaultValue",
},
{
name: "ValueExistsNoCompress",
key: "testKey",
def: "defaultValue",
envValue: "testValue",
expectedResult: "testValue",
},
{
name: "ValueExistsCompressed",
key: "testKey",
def: "defaultValue",
envValue: `{"_gz":"H4sIAEosrGYC/ytJLS5RKEvMKU0FACtB3ewKAAAA"}`,

expectedResult: "test value",
},
}

for _, test := range testCases {
t.Run(test.name, func(t *testing.T) {
os.Setenv(test.key, test.envValue)

result := getEnv(test.key, test.def)

if result != test.expectedResult {
t.Errorf("expected: %s, got: %s", test.expectedResult, result)
}
})
}
}
1 change: 1 addition & 0 deletions pkg/cli/gptscript.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ func New() *cobra.Command {
&Credential{root: root},
&Parse{},
&Fmt{},
&Getenv{},
&SDKServer{
GPTScript: root,
},
Expand Down
25 changes: 23 additions & 2 deletions pkg/engine/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ package engine

import (
"bytes"
"compress/gzip"
"context"
"encoding/base64"
"encoding/json"
"fmt"
"io"
Expand Down Expand Up @@ -44,6 +46,25 @@ func (o *outputWriter) Write(p []byte) (n int, err error) {
return len(p), nil
}

func compressEnv(envs []string) (result []string) {
for _, env := range envs {
k, v, ok := strings.Cut(env, "=")
if !ok || len(v) < 40_000 {
result = append(result, env)
continue
}

out := bytes.NewBuffer(nil)
b64 := base64.NewEncoder(base64.StdEncoding, out)
gz := gzip.NewWriter(b64)
_, _ = gz.Write([]byte(v))
_ = gz.Close()
_ = b64.Close()
result = append(result, k+`={"_gz":"`+out.String()+`"}`)
}
return
}

func (e *Engine) runCommand(ctx Context, tool types.Tool, input string, toolCategory ToolCategory) (cmdOut string, cmdErr error) {
id := counter.Next()

Expand Down Expand Up @@ -95,10 +116,10 @@ func (e *Engine) runCommand(ctx Context, tool types.Tool, input string, toolCate
for _, inputContext := range ctx.InputContext {
instructions = append(instructions, inputContext.Content)
}

var extraEnv = []string{
strings.TrimSpace("GPTSCRIPT_CONTEXT=" + strings.Join(instructions, "\n")),
}

cmd, stop, err := e.newCommand(ctx.Ctx, extraEnv, tool, input)
if err != nil {
return "", err
Expand Down Expand Up @@ -277,6 +298,6 @@ func (e *Engine) newCommand(ctx context.Context, extraEnv []string, tool types.T
}

cmd := exec.CommandContext(ctx, env.Lookup(envvars, args[0]), cmdArgs...)
cmd.Env = envvars
cmd.Env = compressEnv(envvars)
return cmd, stop, nil
}
32 changes: 32 additions & 0 deletions pkg/tests/runner_test.go

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pkg/tests/testdata/TestEnvOverflow/context.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"_gz":"H4sIAAAAAAAA/+zAgQAAAADCMNb8JQK4wjYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAgHgAA//+94pKFQBkBAA=="}
14 changes: 14 additions & 0 deletions pkg/tests/testdata/TestEnvOverflow/test.gpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
context: c

#!/bin/bash

echo "${GPTSCRIPT_CONTEXT}"
echo "${GPTSCRIPT_CONTEXT}" > ${GPTSCRIPT_TOOL_DIR}/context.json

---
name: c

#!/bin/bash

string=$(printf 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa%.0s' {1..1000})
echo "$string"