ai/core: remove scaling of setting values. (#1418)

lgrammel · web-flow · commit eb150a626443 · 2024-04-23T13:13:18.000+02:00
diff --git a/.changeset/fair-avocados-itch.md b/.changeset/fair-avocados-itch.md
@@ -0,0 +1,10 @@
+---
+'@ai-sdk/provider-utils': patch
+'@ai-sdk/anthropic': patch
+'@ai-sdk/provider': patch
+'@ai-sdk/mistral': patch
+'@ai-sdk/openai': patch
+'ai': patch
+---
+
+ai/core: remove scaling of setting values (breaking change). If you were using the temperature, frequency penalty, or presence penalty settings, you need to update the providers and adjust the setting values.
diff --git a/docs/pages/docs/ai-core/settings.mdx b/docs/pages/docs/ai-core/settings.mdx
@@ -10,19 +10,17 @@ All AI functions (`generateText`, `streamText`, `generateObject`, `streamObject`
 
 - **maxTokens** - Maximum number of tokens to generate.
 - **temperature** - Temperature setting.
-  This is a number between 0 (almost no randomness) and 1 (very random).
+  The value is passed through to the provider. The range depends on the provider and model.
   It is recommended to set either `temperature` or `topP`, but not both.
-- **topP** - Nucleus sampling. This is a number between 0 and 1.
-  E.g. 0.1 would mean that only tokens with the top 10% probability mass are considered.
+- **topP** - Nucleus sampling.
+  The value is passed through to the provider. The range depends on the provider and model.
   It is recommended to set either `temperature` or `topP`, but not both.
 - **presencePenalty** - Presence penalty setting.
   It affects the likelihood of the model to repeat information that is already in the prompt.
-  The presence penalty is a number between -1 (increase repetition) and 1 (maximum penalty, decrease repetition).
-  0 means no penalty.
+  The value is passed through to the provider. The range depends on the provider and model.
 - **frequencyPenalty** - Frequency penalty setting.
   It affects the likelihood of the model to repeatedly use the same words or phrases.
-  The frequency penalty is a number between -1 (increase repetition) and 1 (maximum penalty, decrease repetition).
-  0 means no penalty.
+  The value is passed through to the provider. The range depends on the provider and model.
 - **seed** - The seed (integer) to use for random sampling.
   If set and supported by the model, calls will generate deterministic results.
 - **maxRetries** - Maximum number of retries. Set to 0 to disable retries. Default: 2.
diff --git a/packages/anthropic/src/anthropic-messages-language-model.ts b/packages/anthropic/src/anthropic-messages-language-model.ts
@@ -96,7 +96,7 @@ export class AnthropicMessagesLanguageModel implements LanguageModelV1 {
 
       // standardized settings:
       max_tokens: maxTokens ?? 4096, // 4096: max model output tokens
-      temperature, // uses 0..1 scale
+      temperature,
       top_p: topP,
 
       // prompt:
diff --git a/packages/core/core/generate-object/generate-object.ts b/packages/core/core/generate-object/generate-object.ts
@@ -32,19 +32,17 @@ This function does not stream the output. If you want to stream the output, use
 
 @param maxTokens - Maximum number of tokens to generate.
 @param temperature - Temperature setting. 
-This is a number between 0 (almost no randomness) and 1 (very random).
+The value is passed through to the provider. The range depends on the provider and model.
 It is recommended to set either `temperature` or `topP`, but not both.
-@param topP - Nucleus sampling. This is a number between 0 and 1.
-E.g. 0.1 would mean that only tokens with the top 10% probability mass are considered.
+@param topP - Nucleus sampling.
+The value is passed through to the provider. The range depends on the provider and model.
 It is recommended to set either `temperature` or `topP`, but not both.
 @param presencePenalty - Presence penalty setting. 
 It affects the likelihood of the model to repeat information that is already in the prompt.
-The presence penalty is a number between -1 (increase repetition) and 1 (maximum penalty, decrease repetition). 
-0 means no penalty.
+The value is passed through to the provider. The range depends on the provider and model.
 @param frequencyPenalty - Frequency penalty setting.
 It affects the likelihood of the model to repeatedly use the same words or phrases.
-The frequency penalty is a number between -1 (increase repetition) and 1 (maximum penalty, decrease repetition).
-0 means no penalty.
+The value is passed through to the provider. The range depends on the provider and model.
 @param seed - The seed (integer) to use for random sampling.
 If set and supported by the model, calls will generate deterministic results.
 
diff --git a/packages/core/core/generate-object/stream-object.ts b/packages/core/core/generate-object/stream-object.ts
@@ -37,19 +37,17 @@ This function streams the output. If you do not want to stream the output, use `
 
 @param maxTokens - Maximum number of tokens to generate.
 @param temperature - Temperature setting. 
-This is a number between 0 (almost no randomness) and 1 (very random).
+The value is passed through to the provider. The range depends on the provider and model.
 It is recommended to set either `temperature` or `topP`, but not both.
-@param topP - Nucleus sampling. This is a number between 0 and 1.
-E.g. 0.1 would mean that only tokens with the top 10% probability mass are considered.
+@param topP - Nucleus sampling.
+The value is passed through to the provider. The range depends on the provider and model.
 It is recommended to set either `temperature` or `topP`, but not both.
 @param presencePenalty - Presence penalty setting. 
 It affects the likelihood of the model to repeat information that is already in the prompt.
-The presence penalty is a number between -1 (increase repetition) and 1 (maximum penalty, decrease repetition). 
-0 means no penalty.
+The value is passed through to the provider. The range depends on the provider and model.
 @param frequencyPenalty - Frequency penalty setting.
 It affects the likelihood of the model to repeatedly use the same words or phrases.
-The frequency penalty is a number between -1 (increase repetition) and 1 (maximum penalty, decrease repetition).
-0 means no penalty.
+The value is passed through to the provider. The range depends on the provider and model.
 @param seed - The seed (integer) to use for random sampling.
 If set and supported by the model, calls will generate deterministic results.
 
diff --git a/packages/core/core/generate-text/generate-text.ts b/packages/core/core/generate-text/generate-text.ts
@@ -29,19 +29,17 @@ This function does not stream the output. If you want to stream the output, use
 
 @param maxTokens - Maximum number of tokens to generate.
 @param temperature - Temperature setting. 
-This is a number between 0 (almost no randomness) and 1 (very random).
+The value is passed through to the provider. The range depends on the provider and model.
 It is recommended to set either `temperature` or `topP`, but not both.
-@param topP - Nucleus sampling. This is a number between 0 and 1.
-E.g. 0.1 would mean that only tokens with the top 10% probability mass are considered.
+@param topP - Nucleus sampling.
+The value is passed through to the provider. The range depends on the provider and model.
 It is recommended to set either `temperature` or `topP`, but not both.
 @param presencePenalty - Presence penalty setting. 
 It affects the likelihood of the model to repeat information that is already in the prompt.
-The presence penalty is a number between -1 (increase repetition) and 1 (maximum penalty, decrease repetition). 
-0 means no penalty.
+The value is passed through to the provider. The range depends on the provider and model.
 @param frequencyPenalty - Frequency penalty setting.
 It affects the likelihood of the model to repeatedly use the same words or phrases.
-The frequency penalty is a number between -1 (increase repetition) and 1 (maximum penalty, decrease repetition).
-0 means no penalty.
+The value is passed through to the provider. The range depends on the provider and model.
 @param seed - The seed (integer) to use for random sampling.
 If set and supported by the model, calls will generate deterministic results.
 
diff --git a/packages/core/core/generate-text/stream-text.ts b/packages/core/core/generate-text/stream-text.ts
@@ -39,19 +39,17 @@ This function streams the output. If you do not want to stream the output, use `
 
 @param maxTokens - Maximum number of tokens to generate.
 @param temperature - Temperature setting. 
-This is a number between 0 (almost no randomness) and 1 (very random).
+The value is passed through to the provider. The range depends on the provider and model.
 It is recommended to set either `temperature` or `topP`, but not both.
-@param topP - Nucleus sampling. This is a number between 0 and 1.
-E.g. 0.1 would mean that only tokens with the top 10% probability mass are considered.
+@param topP - Nucleus sampling.
+The value is passed through to the provider. The range depends on the provider and model.
 It is recommended to set either `temperature` or `topP`, but not both.
 @param presencePenalty - Presence penalty setting. 
 It affects the likelihood of the model to repeat information that is already in the prompt.
-The presence penalty is a number between -1 (increase repetition) and 1 (maximum penalty, decrease repetition). 
-0 means no penalty.
+The value is passed through to the provider. The range depends on the provider and model.
 @param frequencyPenalty - Frequency penalty setting.
 It affects the likelihood of the model to repeatedly use the same words or phrases.
-The frequency penalty is a number between -1 (increase repetition) and 1 (maximum penalty, decrease repetition).
-0 means no penalty.
+The value is passed through to the provider. The range depends on the provider and model.
 @param seed - The seed (integer) to use for random sampling.
 If set and supported by the model, calls will generate deterministic results.
 
diff --git a/packages/core/core/prompt/prepare-call-settings.ts b/packages/core/core/prompt/prepare-call-settings.ts
@@ -39,14 +39,6 @@ export function prepareCallSettings({
         message: 'temperature must be a number',
       });
     }
-
-    if (temperature < 0 || temperature > 1) {
-      throw new InvalidArgumentError({
-        parameter: 'temperature',
-        value: temperature,
-        message: 'temperature must be between 0 and 1 (inclusive)',
-      });
-    }
   }
 
   if (topP != null) {
@@ -57,14 +49,6 @@ export function prepareCallSettings({
         message: 'topP must be a number',
       });
     }
-
-    if (topP < 0 || topP > 1) {
-      throw new InvalidArgumentError({
-        parameter: 'topP',
-        value: topP,
-        message: 'topP must be between 0 and 1 (inclusive)',
-      });
-    }
   }
 
   if (presencePenalty != null) {
@@ -75,14 +59,6 @@ export function prepareCallSettings({
         message: 'presencePenalty must be a number',
       });
     }
-
-    if (presencePenalty < -1 || presencePenalty > 1) {
-      throw new InvalidArgumentError({
-        parameter: 'presencePenalty',
-        value: presencePenalty,
-        message: 'presencePenalty must be between -1 and 1 (inclusive)',
-      });
-    }
   }
 
   if (frequencyPenalty != null) {
@@ -93,14 +69,6 @@ export function prepareCallSettings({
         message: 'frequencyPenalty must be a number',
       });
     }
-
-    if (frequencyPenalty < -1 || frequencyPenalty > 1) {
-      throw new InvalidArgumentError({
-        parameter: 'frequencyPenalty',
-        value: frequencyPenalty,
-        message: 'frequencyPenalty must be between -1 and 1 (inclusive)',
-      });
-    }
   }
 
   if (seed != null) {
diff --git a/packages/mistral/src/mistral-chat-language-model.ts b/packages/mistral/src/mistral-chat-language-model.ts
@@ -87,7 +87,7 @@ export class MistralChatLanguageModel implements LanguageModelV1 {
 
       // standardized settings:
       max_tokens: maxTokens,
-      temperature, // uses 0..1 scale
+      temperature,
       top_p: topP,
       random_seed: seed,
 
diff --git a/packages/openai/src/openai-chat-language-model.test.ts b/packages/openai/src/openai-chat-language-model.test.ts
@@ -330,51 +330,6 @@ describe('doStream', () => {
     });
   });
 
-  it('should scale the temperature', async () => {
-    prepareStreamResponse({ content: [] });
-
-    await provider.chat('gpt-3.5-turbo').doStream({
-      inputFormat: 'prompt',
-      mode: { type: 'regular' },
-      prompt: TEST_PROMPT,
-      temperature: 0.5,
-    });
-
-    expect((await server.getRequestBodyJson()).temperature).toBeCloseTo(1, 5);
-  });
-
-  it('should scale the frequency penalty', async () => {
-    prepareStreamResponse({ content: [] });
-
-    await provider.chat('gpt-3.5-turbo').doStream({
-      inputFormat: 'prompt',
-      mode: { type: 'regular' },
-      prompt: TEST_PROMPT,
-      frequencyPenalty: 0.2,
-    });
-
-    expect((await server.getRequestBodyJson()).frequency_penalty).toBeCloseTo(
-      0.4,
-      5,
-    );
-  });
-
-  it('should scale the presence penalty', async () => {
-    prepareStreamResponse({ content: [] });
-
-    await provider.chat('gpt-3.5-turbo').doStream({
-      inputFormat: 'prompt',
-      mode: { type: 'regular' },
-      prompt: TEST_PROMPT,
-      presencePenalty: -0.9,
-    });
-
-    expect((await server.getRequestBodyJson()).presence_penalty).toBeCloseTo(
-      -1.8,
-      5,
-    );
-  });
-
   it('should pass custom headers', async () => {
     prepareStreamResponse({ content: [] });
 
diff --git a/packages/openai/src/openai-chat-language-model.ts b/packages/openai/src/openai-chat-language-model.ts
@@ -12,7 +12,6 @@ import {
   generateId,
   isParseableJson,
   postJsonToApi,
-  scale,
 } from '@ai-sdk/provider-utils';
 import { z } from 'zod';
 import { convertToOpenAIChatMessages } from './convert-to-openai-chat-messages';
@@ -71,26 +70,10 @@ export class OpenAIChatLanguageModel implements LanguageModelV1 {
 
       // standardized settings:
       max_tokens: maxTokens,
-      temperature: scale({
-        value: temperature,
-        outputMin: 0,
-        outputMax: 2,
-      }),
+      temperature,
       top_p: topP,
-      frequency_penalty: scale({
-        value: frequencyPenalty,
-        inputMin: -1,
-        inputMax: 1,
-        outputMin: -2,
-        outputMax: 2,
-      }),
-      presence_penalty: scale({
-        value: presencePenalty,
-        inputMin: -1,
-        inputMax: 1,
-        outputMin: -2,
-        outputMax: 2,
-      }),
+      frequency_penalty: frequencyPenalty,
+      presence_penalty: presencePenalty,
       seed,
 
       // messages:
diff --git a/packages/openai/src/openai-completion-language-model.test.ts b/packages/openai/src/openai-completion-language-model.test.ts
@@ -208,51 +208,6 @@ describe('doStream', () => {
     });
   });
 
-  it('should scale the temperature', async () => {
-    prepareStreamResponse({ content: [] });
-
-    await provider.completion('gpt-3.5-turbo-instruct').doStream({
-      inputFormat: 'prompt',
-      mode: { type: 'regular' },
-      prompt: TEST_PROMPT,
-      temperature: 0.5,
-    });
-
-    expect((await server.getRequestBodyJson()).temperature).toBeCloseTo(1, 5);
-  });
-
-  it('should scale the frequency penalty', async () => {
-    prepareStreamResponse({ content: [] });
-
-    await provider.completion('gpt-3.5-turbo-instruct').doStream({
-      inputFormat: 'prompt',
-      mode: { type: 'regular' },
-      prompt: TEST_PROMPT,
-      frequencyPenalty: 0.2,
-    });
-
-    expect((await server.getRequestBodyJson()).frequency_penalty).toBeCloseTo(
-      0.4,
-      5,
-    );
-  });
-
-  it('should scale the presence penalty', async () => {
-    prepareStreamResponse({ content: [] });
-
-    await provider.completion('gpt-3.5-turbo-instruct').doStream({
-      inputFormat: 'prompt',
-      mode: { type: 'regular' },
-      prompt: TEST_PROMPT,
-      presencePenalty: -0.9,
-    });
-
-    expect((await server.getRequestBodyJson()).presence_penalty).toBeCloseTo(
-      -1.8,
-      5,
-    );
-  });
-
   it('should pass custom headers', async () => {
     prepareStreamResponse({ content: [] });
 
diff --git a/packages/openai/src/openai-completion-language-model.ts b/packages/openai/src/openai-completion-language-model.ts
@@ -9,7 +9,6 @@ import {
   createEventSourceResponseHandler,
   createJsonResponseHandler,
   postJsonToApi,
-  scale,
 } from '@ai-sdk/provider-utils';
 import { z } from 'zod';
 import { convertToOpenAICompletionPrompt } from './convert-to-openai-completion-prompt';
@@ -77,26 +76,10 @@ export class OpenAICompletionLanguageModel implements LanguageModelV1 {
 
       // standardized settings:
       max_tokens: maxTokens,
-      temperature: scale({
-        value: temperature,
-        outputMin: 0,
-        outputMax: 2,
-      }),
+      temperature,
       top_p: topP,
-      frequency_penalty: scale({
-        value: frequencyPenalty,
-        inputMin: -1,
-        inputMax: 1,
-        outputMin: -2,
-        outputMax: 2,
-      }),
-      presence_penalty: scale({
-        value: presencePenalty,
-        inputMin: -1,
-        inputMax: 1,
-        outputMin: -2,
-        outputMax: 2,
-      }),
+      frequency_penalty: frequencyPenalty,
+      presence_penalty: presencePenalty,
       seed,
 
       // prompt:
diff --git a/packages/provider-utils/src/index.ts b/packages/provider-utils/src/index.ts
diff --git a/packages/provider-utils/src/scale.ts b/packages/provider-utils/src/scale.ts
diff --git a/packages/provider/src/language-model/v1/language-model-v1-call-settings.ts b/packages/provider/src/language-model/v1/language-model-v1-call-settings.ts