Create an inference endpoint to perform an inference task with the azureopenai service.
The list of chat completion models that you can choose from in your Azure OpenAI deployment include:
The list of embeddings models that you can choose from in your deployment can be found in the Azure models documentation.
Required authorization
- Cluster privileges:
manage_inference
Path parameters
-
The type of the inference task that the model will perform. NOTE: The
chat_completiontask type only supports streaming and only through the _stream API.Values are
completionortext_embedding. -
The unique identifier of the inference endpoint.
Query parameters
-
Specifies the amount of time to wait for the inference endpoint to be created.
External documentation
Body
Required
-
The chunking configuration object. Applies only to the
text_embeddingtask type. Not applicable to thecompletiontask type.External documentation -
The type of service supported for the specified task type. In this case,
azureopenai.Value is
azureopenai. -
Settings used to install the inference model. These settings are specific to the
azureopenaiservice. -
Settings to configure the inference task. These settings are specific to the task type you specified.
PUT _inference/text_embedding/azure_openai_embeddings
{
"service": "azureopenai",
"service_settings": {
"api_key": "Api-Key",
"resource_name": "Resource-name",
"deployment_id": "Deployment-id",
"api_version": "2024-02-01"
}
}
resp = client.inference.put(
task_type="text_embedding",
inference_id="azure_openai_embeddings",
inference_config={
"service": "azureopenai",
"service_settings": {
"api_key": "Api-Key",
"resource_name": "Resource-name",
"deployment_id": "Deployment-id",
"api_version": "2024-02-01"
}
},
)
const response = await client.inference.put({
task_type: "text_embedding",
inference_id: "azure_openai_embeddings",
inference_config: {
service: "azureopenai",
service_settings: {
api_key: "Api-Key",
resource_name: "Resource-name",
deployment_id: "Deployment-id",
api_version: "2024-02-01",
},
},
});
response = client.inference.put(
task_type: "text_embedding",
inference_id: "azure_openai_embeddings",
body: {
"service": "azureopenai",
"service_settings": {
"api_key": "Api-Key",
"resource_name": "Resource-name",
"deployment_id": "Deployment-id",
"api_version": "2024-02-01"
}
}
)
$resp = $client->inference()->put([
"task_type" => "text_embedding",
"inference_id" => "azure_openai_embeddings",
"body" => [
"service" => "azureopenai",
"service_settings" => [
"api_key" => "Api-Key",
"resource_name" => "Resource-name",
"deployment_id" => "Deployment-id",
"api_version" => "2024-02-01",
],
],
]);
curl -X PUT -H "Authorization: ApiKey $ELASTIC_API_KEY" -H "Content-Type: application/json" -d '{"service":"azureopenai","service_settings":{"api_key":"Api-Key","resource_name":"Resource-name","deployment_id":"Deployment-id","api_version":"2024-02-01"}}' "$ELASTICSEARCH_URL/_inference/text_embedding/azure_openai_embeddings"
client.inference().put(p -> p
.inferenceId("azure_openai_embeddings")
.taskType(TaskType.TextEmbedding)
.inferenceConfig(i -> i
.service("azureopenai")
.serviceSettings(JsonData.fromJson("{\"api_key\":\"Api-Key\",\"resource_name\":\"Resource-name\",\"deployment_id\":\"Deployment-id\",\"api_version\":\"2024-02-01\"}"))
)
);
{
"service": "azureopenai",
"service_settings": {
"api_key": "Api-Key",
"resource_name": "Resource-name",
"deployment_id": "Deployment-id",
"api_version": "2024-02-01"
}
}
{
"service": "azureopenai",
"service_settings": {
"api_key": "Api-Key",
"resource_name": "Resource-name",
"deployment_id": "Deployment-id",
"api_version": "2024-02-01"
}
}