Refactor the 'getModel' callbacks into their own file (#63359)

chrsmith · web-flow · commit 49842741016f · 2024-06-19T19:27:06.000-07:00
Minor refactoring to the `cmd/frontend/internal/completions` package.

When we call `newCompletionsHandler` one of the parameters is a function
named `getModel`. This is called to determine which LLM model should be
use to respond to the incoming completion request. And we have two
implementations of this function, one for code completions and another
for chats.

This PR just moves the logic for those two implementations into their
own file (`get_model.go`), along with a couple of functions for which
they were the only caller.

There were two minor functionality changes I made, which I'll call out
in comments on this PR.

## Why?

As we rework how LLM models and associated configuration flows
throughout the backend, updating these functions will be a bit easier if
they are pulled out like this rather than being defined inline like they
are today.

Also, pretty much any place in the codebase where we have hard-coded an
LLM model is "on notice" and should instead be driven entirely by some
sort of global configuration file. (Since this is one of the key
problems server-side LLM config is trying to solve.)

## Test plan

NA, no functional changes. Relying on CI/CD and linter.

## Changelog

NA
diff --git a/cmd/frontend/internal/httpapi/completions/BUILD.bazel b/cmd/frontend/internal/httpapi/completions/BUILD.bazel
@@ -6,6 +6,7 @@ go_library(
     srcs = [
         "chat.go",
         "codecompletion.go",
+        "get_model.go",
         "handler.go",
         "limiter.go",
         "observability.go",
diff --git a/cmd/frontend/internal/httpapi/completions/chat.go b/cmd/frontend/internal/httpapi/completions/chat.go
@@ -5,17 +5,9 @@ import (
 
 	"net/http"
 
-	"github.com/sourcegraph/sourcegraph/cmd/frontend/internal/cody"
-	sgactor "github.com/sourcegraph/sourcegraph/internal/actor"
-	"github.com/sourcegraph/sourcegraph/internal/dotcom"
-
 	"github.com/sourcegraph/log"
 
-	"github.com/sourcegraph/sourcegraph/internal/completions/client/anthropic"
-	"github.com/sourcegraph/sourcegraph/internal/completions/client/fireworks"
-	"github.com/sourcegraph/sourcegraph/internal/completions/client/google"
 	"github.com/sourcegraph/sourcegraph/internal/completions/types"
-	"github.com/sourcegraph/sourcegraph/internal/conf/conftypes"
 	"github.com/sourcegraph/sourcegraph/internal/database"
 	"github.com/sourcegraph/sourcegraph/internal/redispool"
 	"github.com/sourcegraph/sourcegraph/internal/telemetry/telemetryrecorder"
@@ -43,79 +35,5 @@ func NewChatCompletionsStreamHandler(logger log.Logger, db database.DB) http.Han
 		types.CompletionsFeatureChat,
 		rl,
 		"chat",
-		func(ctx context.Context, requestParams types.CodyCompletionRequestParameters, c *conftypes.CompletionsConfig) (string, error) {
-			// Allow a number of additional models on Dotcom
-			if dotcom.SourcegraphDotComMode() {
-				actor := sgactor.FromContext(ctx)
-				user, err := actor.User(ctx, db.Users())
-				if err != nil {
-					return "", err
-				}
-
-				subscription, err := cody.SubscriptionForUser(ctx, db, *user)
-				if err != nil {
-					return "", err
-				}
-
-				if isAllowedCustomChatModel(requestParams.Model, subscription.ApplyProRateLimits) {
-					return requestParams.Model, nil
-				}
-			}
-			// No user defined models for now.
-			if requestParams.Fast {
-				return c.FastChatModel, nil
-			}
-			return c.ChatModel, nil
-		},
-	)
-}
-
-// We only allow dotcom clients to select a custom chat model and maintain an allowlist for which
-// custom values we support
-func isAllowedCustomChatModel(model string, isProUser bool) bool {
-	// When updating these two lists, make sure you also update `allowedModels` in codygateway_dotcom_user.go.
-	if isProUser {
-		switch model {
-		case
-			"anthropic/" + anthropic.Claude3Haiku,
-			"anthropic/" + anthropic.Claude3Sonnet,
-			"anthropic/" + anthropic.Claude3Opus,
-			"fireworks/" + fireworks.Mixtral8x7bInstruct,
-			"fireworks/" + fireworks.Mixtral8x22Instruct,
-			"openai/gpt-3.5-turbo",
-			"openai/gpt-4o",
-			"openai/gpt-4-turbo",
-			"openai/gpt-4-turbo-preview",
-			"google/" + google.Gemini15FlashLatest,
-			"google/" + google.Gemini15ProLatest,
-			"google/" + google.GeminiProLatest,
-			"google/" + google.Gemini15Flash,
-			"google/" + google.Gemini15Pro,
-			"google/" + google.GeminiPro,
-
-			// Remove after the Claude 3 rollout is complete
-			"anthropic/claude-2",
-			"anthropic/claude-2.0",
-			"anthropic/claude-2.1",
-			"anthropic/claude-instant-1.2-cyan",
-			"anthropic/claude-instant-1.2",
-			"anthropic/claude-instant-v1",
-			"anthropic/claude-instant-1":
-			return true
-		}
-	} else {
-		switch model {
-		case
-			"anthropic/" + anthropic.Claude3Haiku,
-			"anthropic/" + anthropic.Claude3Sonnet,
-			// Remove after the Claude 3 rollout is complete
-			"anthropic/claude-2",
-			"anthropic/claude-2.0",
-			"anthropic/claude-instant-v1",
-			"anthropic/claude-instant-1":
-			return true
-		}
-	}
-
-	return false
+		getChatModelFn(db))
 }
diff --git a/cmd/frontend/internal/httpapi/completions/codecompletion.go b/cmd/frontend/internal/httpapi/completions/codecompletion.go
@@ -1,20 +1,15 @@
 package completions
 
 import (
-	"context"
 	"net/http"
 
 	"github.com/sourcegraph/log"
 
-	"github.com/sourcegraph/sourcegraph/internal/completions/client/fireworks"
-	"github.com/sourcegraph/sourcegraph/internal/completions/client/google"
 	"github.com/sourcegraph/sourcegraph/internal/completions/types"
-	"github.com/sourcegraph/sourcegraph/internal/conf/conftypes"
 	"github.com/sourcegraph/sourcegraph/internal/database"
 	"github.com/sourcegraph/sourcegraph/internal/guardrails"
 	"github.com/sourcegraph/sourcegraph/internal/redispool"
 	"github.com/sourcegraph/sourcegraph/internal/telemetry/telemetryrecorder"
-	"github.com/sourcegraph/sourcegraph/lib/errors"
 )
 
 // NewCodeCompletionsHandler is an http handler which sends back code completion results.
@@ -31,54 +26,5 @@ func NewCodeCompletionsHandler(logger log.Logger, db database.DB, test guardrail
 		types.CompletionsFeatureCode,
 		rl,
 		"code",
-		func(_ context.Context, requestParams types.CodyCompletionRequestParameters, c *conftypes.CompletionsConfig) (string, error) {
-			customModel := allowedCustomModel(requestParams.Model)
-			if customModel != "" {
-				return customModel, nil
-			}
-			if requestParams.Model != "" {
-				return "", errors.Newf("Unsupported code completion model %q", requestParams.Model)
-			}
-			return c.CompletionModel, nil
-		},
-	)
-}
-
-func allowedCustomModel(model string) string {
-	switch model {
-	case "fireworks/starcoder",
-		"fireworks/starcoder-16b",
-		"fireworks/starcoder-7b",
-		"fireworks/starcoder2-15b",
-		"fireworks/starcoder2-7b",
-		"fireworks/" + fireworks.Starcoder16b,
-		"fireworks/" + fireworks.Starcoder7b,
-		"fireworks/" + fireworks.Llama27bCode,
-		"fireworks/" + fireworks.Llama213bCode,
-		"fireworks/" + fireworks.Llama213bCodeInstruct,
-		"fireworks/" + fireworks.Llama234bCodeInstruct,
-		"fireworks/" + fireworks.Mistral7bInstruct,
-		"fireworks/" + fireworks.FineTunedFIMVariant1,
-		"fireworks/" + fireworks.FineTunedFIMVariant2,
-		"fireworks/" + fireworks.FineTunedFIMVariant3,
-		"fireworks/" + fireworks.FineTunedFIMVariant4,
-		"fireworks/" + fireworks.FineTunedFIMLangSpecificMixtral,
-		"fireworks/" + fireworks.DeepseekCoder1p3b,
-		"fireworks/" + fireworks.DeepseekCoder7b,
-		"anthropic/claude-instant-1.2",
-		"anthropic/claude-3-haiku-20240307",
-		// Deprecated model identifiers
-		"anthropic/claude-instant-v1",
-		"anthropic/claude-instant-1",
-		"anthropic/claude-instant-1.2-cyan",
-		"google/" + google.Gemini15Flash,
-		"google/" + google.GeminiPro,
-		"fireworks/accounts/sourcegraph/models/starcoder-7b",
-		"fireworks/accounts/sourcegraph/models/starcoder-16b",
-		"fireworks/accounts/fireworks/models/starcoder-3b-w8a16",
-		"fireworks/accounts/fireworks/models/starcoder-1b-w8a16":
-		return model
-	}
-
-	return ""
+		getCodeCompletionModelFn())
 }
diff --git a/cmd/frontend/internal/httpapi/completions/get_model.go b/cmd/frontend/internal/httpapi/completions/get_model.go
@@ -0,0 +1,155 @@
+package completions
+
+import (
+	"context"
+
+	"github.com/sourcegraph/sourcegraph/cmd/frontend/internal/cody"
+	sgactor "github.com/sourcegraph/sourcegraph/internal/actor"
+	"github.com/sourcegraph/sourcegraph/internal/dotcom"
+	"github.com/sourcegraph/sourcegraph/lib/errors"
+
+	"github.com/sourcegraph/sourcegraph/internal/completions/client/anthropic"
+	"github.com/sourcegraph/sourcegraph/internal/completions/client/fireworks"
+	"github.com/sourcegraph/sourcegraph/internal/completions/client/google"
+	"github.com/sourcegraph/sourcegraph/internal/completions/types"
+	"github.com/sourcegraph/sourcegraph/internal/conf/conftypes"
+	"github.com/sourcegraph/sourcegraph/internal/database"
+)
+
+// getModelFn is the thunk used to return the LLM model we should use for processing
+// the supplied completion request. Depending on the incomming request, site config,
+// feature used, etc. it could be any number of things.
+type getModelFn func(ctx context.Context, requestParams types.CodyCompletionRequestParameters, c *conftypes.CompletionsConfig) (string, error)
+
+func getCodeCompletionModelFn() getModelFn {
+	return func(_ context.Context, requestParams types.CodyCompletionRequestParameters, c *conftypes.CompletionsConfig) (string, error) {
+		// For code completions, we only allow certain models to be used.
+		// (Regardless of if the user is on Cody Free, Pro, or Enterprise.)
+		if requestParams.Model != "" {
+			if isAllowedCodeCompletionModel(requestParams.Model) {
+				return requestParams.Model, nil
+			}
+			return "", errors.Newf("unsupported code completion model %q", requestParams.Model)
+		}
+		return c.CompletionModel, nil
+	}
+}
+
+func getChatModelFn(db database.DB) getModelFn {
+	return func(ctx context.Context, requestParams types.CodyCompletionRequestParameters, c *conftypes.CompletionsConfig) (string, error) {
+		// If running on dotcom, i.e. using Cody Free/Cody Pro, then a number
+		// of models are available depending on the caller's subscription status.
+		if dotcom.SourcegraphDotComMode() {
+			actor := sgactor.FromContext(ctx)
+			user, err := actor.User(ctx, db.Users())
+			if err != nil {
+				return "", err
+			}
+
+			subscription, err := cody.SubscriptionForUser(ctx, db, *user)
+			if err != nil {
+				return "", err
+			}
+
+			if isAllowedCustomChatModel(requestParams.Model, subscription.ApplyProRateLimits) {
+				return requestParams.Model, nil
+			}
+		}
+
+		// For any other Sourcegraph instance, i.e. using Cody Enterprise,
+		// we just use the configured "chat" or "fastChat" model.
+		if requestParams.Fast {
+			return c.FastChatModel, nil
+		}
+		return c.ChatModel, nil
+	}
+}
+
+func isAllowedCodeCompletionModel(model string) bool {
+	switch model {
+	case "fireworks/starcoder",
+		"fireworks/starcoder-16b",
+		"fireworks/starcoder-7b",
+		"fireworks/starcoder2-15b",
+		"fireworks/starcoder2-7b",
+		"fireworks/" + fireworks.Starcoder16b,
+		"fireworks/" + fireworks.Starcoder7b,
+		"fireworks/" + fireworks.Llama27bCode,
+		"fireworks/" + fireworks.Llama213bCode,
+		"fireworks/" + fireworks.Llama213bCodeInstruct,
+		"fireworks/" + fireworks.Llama234bCodeInstruct,
+		"fireworks/" + fireworks.Mistral7bInstruct,
+		"fireworks/" + fireworks.FineTunedFIMVariant1,
+		"fireworks/" + fireworks.FineTunedFIMVariant2,
+		"fireworks/" + fireworks.FineTunedFIMVariant3,
+		"fireworks/" + fireworks.FineTunedFIMVariant4,
+		"fireworks/" + fireworks.FineTunedFIMLangSpecificMixtral,
+		"fireworks/" + fireworks.DeepseekCoder1p3b,
+		"fireworks/" + fireworks.DeepseekCoder7b,
+		"anthropic/claude-instant-1.2",
+		"anthropic/claude-3-haiku-20240307",
+		// Deprecated model identifiers
+		"anthropic/claude-instant-v1",
+		"anthropic/claude-instant-1",
+		"anthropic/claude-instant-1.2-cyan",
+		"google/" + google.Gemini15Flash,
+		"google/" + google.GeminiPro,
+		"fireworks/accounts/sourcegraph/models/starcoder-7b",
+		"fireworks/accounts/sourcegraph/models/starcoder-16b",
+		"fireworks/accounts/fireworks/models/starcoder-3b-w8a16",
+		"fireworks/accounts/fireworks/models/starcoder-1b-w8a16":
+		return true
+	}
+
+	return false
+}
+
+// We only allow dotcom clients to select a custom chat model and maintain an allowlist for which
+// custom values we support
+func isAllowedCustomChatModel(model string, isProUser bool) bool {
+	// When updating these two lists, make sure you also update `allowedModels` in codygateway_dotcom_user.go.
+	if isProUser {
+		switch model {
+		case
+			"anthropic/" + anthropic.Claude3Haiku,
+			"anthropic/" + anthropic.Claude3Sonnet,
+			"anthropic/" + anthropic.Claude3Opus,
+			"fireworks/" + fireworks.Mixtral8x7bInstruct,
+			"fireworks/" + fireworks.Mixtral8x22Instruct,
+			"openai/gpt-3.5-turbo",
+			"openai/gpt-4o",
+			"openai/gpt-4-turbo",
+			"openai/gpt-4-turbo-preview",
+			"google/" + google.Gemini15FlashLatest,
+			"google/" + google.Gemini15ProLatest,
+			"google/" + google.GeminiProLatest,
+			"google/" + google.Gemini15Flash,
+			"google/" + google.Gemini15Pro,
+			"google/" + google.GeminiPro,
+
+			// Remove after the Claude 3 rollout is complete
+			"anthropic/claude-2",
+			"anthropic/claude-2.0",
+			"anthropic/claude-2.1",
+			"anthropic/claude-instant-1.2-cyan",
+			"anthropic/claude-instant-1.2",
+			"anthropic/claude-instant-v1",
+			"anthropic/claude-instant-1":
+			return true
+		}
+	} else {
+		switch model {
+		case
+			"anthropic/" + anthropic.Claude3Haiku,
+			"anthropic/" + anthropic.Claude3Sonnet,
+			// Remove after the Claude 3 rollout is complete
+			"anthropic/claude-2",
+			"anthropic/claude-2.0",
+			"anthropic/claude-instant-v1",
+			"anthropic/claude-instant-1":
+			return true
+		}
+	}
+
+	return false
+}
diff --git a/cmd/frontend/internal/httpapi/completions/handler.go b/cmd/frontend/internal/httpapi/completions/handler.go
@@ -115,6 +115,9 @@ func newCompletionsHandler(
 		requestParams.Model, err = getModel(ctx, requestParams, completionsConfig)
 		requestParams.User = completionsConfig.User
 		if err != nil {
+			// NOTE: We return the raw error to the user assuming that it contains relevant
+			// user-facing diagnostic information, and doesn't leak any internal details.
+			logger.Info("error fetching model", log.Error(err))
 			http.Error(w, err.Error(), http.StatusBadRequest)
 			return
 		}