The type of the inference task that the model will perform.
Values are text_embedding, completion, or chat_completion.
The unique identifier of the inference endpoint.
Specifies the amount of time to wait for the inference endpoint to be created.
The chunking configuration object.
Applies only to the text_embedding task type.
Not applicable to the completion or chat_completion task types.
The type of service supported for the specified task type. In this case, llama.
Value is llama.
Settings used to install the inference model. These settings are specific to the llama service.
PUT _inference/text_embedding/llama-text-embedding
{
"service": "llama",
"service_settings": {
"url": "http://localhost:8321/v1/inference/embeddings",
"dimensions": 384,
"model_id": "all-MiniLM-L6-v2"
}
}
resp = client.inference.put(
task_type="text_embedding",
inference_id="llama-text-embedding",
inference_config={
"service": "llama",
"service_settings": {
"url": "http://localhost:8321/v1/inference/embeddings",
"dimensions": 384,
"model_id": "all-MiniLM-L6-v2"
}
},
)
const response = await client.inference.put({
task_type: "text_embedding",
inference_id: "llama-text-embedding",
inference_config: {
service: "llama",
service_settings: {
url: "http://localhost:8321/v1/inference/embeddings",
dimensions: 384,
model_id: "all-MiniLM-L6-v2",
},
},
});
response = client.inference.put(
task_type: "text_embedding",
inference_id: "llama-text-embedding",
body: {
"service": "llama",
"service_settings": {
"url": "http://localhost:8321/v1/inference/embeddings",
"dimensions": 384,
"model_id": "all-MiniLM-L6-v2"
}
}
)
$resp = $client->inference()->put([
"task_type" => "text_embedding",
"inference_id" => "llama-text-embedding",
"body" => [
"service" => "llama",
"service_settings" => [
"url" => "http://localhost:8321/v1/inference/embeddings",
"dimensions" => 384,
"model_id" => "all-MiniLM-L6-v2",
],
],
]);
curl -X PUT -H "Authorization: ApiKey $ELASTIC_API_KEY" -H "Content-Type: application/json" -d '{"service":"llama","service_settings":{"url":"http://localhost:8321/v1/inference/embeddings","dimensions":384,"model_id":"all-MiniLM-L6-v2"}}' "$ELASTICSEARCH_URL/_inference/text_embedding/llama-text-embedding"
client.inference().put(p -> p
.inferenceId("llama-text-embedding")
.taskType(TaskType.TextEmbedding)
.inferenceConfig(i -> i
.service("llama")
.serviceSettings(JsonData.fromJson("{\"url\":\"http://localhost:8321/v1/inference/embeddings\",\"dimensions\":384,\"model_id\":\"all-MiniLM-L6-v2\"}"))
)
);
{
"service": "llama",
"service_settings": {
"url": "http://localhost:8321/v1/inference/embeddings",
"dimensions": 384,
"model_id": "all-MiniLM-L6-v2"
}
}
{
"service": "llama",
"service_settings": {
"url": "http://localhost:8321/v1/openai/v1/chat/completions",
"model_id": "llama3.2:3b"
}
}
{
"service": "llama",
"service_settings": {
"url": "http://localhost:8321/v1/openai/v1/chat/completions",
"model_id": "llama3.2:3b"
}
}