Create a Watsonx inference endpoint
Generally available
Create an inference endpoint to perform an inference task with the watsonxai service.
You need an IBM Cloud Databases for Elasticsearch deployment to use the watsonxai inference service.
You can provision one through the IBM catalog, the Cloud Databases CLI plug-in, the Cloud Databases API, or Terraform.
Required authorization
- Cluster privileges:
manage_inference
Path parameters
-
The type of the inference task that the model will perform.
Values are
text_embedding,chat_completion, orcompletion. -
The unique identifier of the inference endpoint.
Query parameters
-
Specifies the amount of time to wait for the inference endpoint to be created.
External documentation
Body
Required
-
The chunking configuration object. Applies only to the
text_embeddingtask type. Not applicable to thecompletionorchat_completiontask types.External documentation -
The type of service supported for the specified task type. In this case,
watsonxai.Value is
watsonxai. -
Settings used to install the inference model. These settings are specific to the
watsonxaiservice.
PUT _inference/text_embedding/watsonx-embeddings
{
"service": "watsonxai",
"service_settings": {
"api_key": "Watsonx-API-Key",
"url": "Wastonx-URL",
"model_id": "ibm/slate-30m-english-rtrvr",
"project_id": "IBM-Cloud-ID",
"api_version": "2024-03-14"
}
}
resp = client.inference.put(
task_type="text_embedding",
inference_id="watsonx-embeddings",
inference_config={
"service": "watsonxai",
"service_settings": {
"api_key": "Watsonx-API-Key",
"url": "Wastonx-URL",
"model_id": "ibm/slate-30m-english-rtrvr",
"project_id": "IBM-Cloud-ID",
"api_version": "2024-03-14"
}
},
)
const response = await client.inference.put({
task_type: "text_embedding",
inference_id: "watsonx-embeddings",
inference_config: {
service: "watsonxai",
service_settings: {
api_key: "Watsonx-API-Key",
url: "Wastonx-URL",
model_id: "ibm/slate-30m-english-rtrvr",
project_id: "IBM-Cloud-ID",
api_version: "2024-03-14",
},
},
});
response = client.inference.put(
task_type: "text_embedding",
inference_id: "watsonx-embeddings",
body: {
"service": "watsonxai",
"service_settings": {
"api_key": "Watsonx-API-Key",
"url": "Wastonx-URL",
"model_id": "ibm/slate-30m-english-rtrvr",
"project_id": "IBM-Cloud-ID",
"api_version": "2024-03-14"
}
}
)
$resp = $client->inference()->put([
"task_type" => "text_embedding",
"inference_id" => "watsonx-embeddings",
"body" => [
"service" => "watsonxai",
"service_settings" => [
"api_key" => "Watsonx-API-Key",
"url" => "Wastonx-URL",
"model_id" => "ibm/slate-30m-english-rtrvr",
"project_id" => "IBM-Cloud-ID",
"api_version" => "2024-03-14",
],
],
]);
curl -X PUT -H "Authorization: ApiKey $ELASTIC_API_KEY" -H "Content-Type: application/json" -d '{"service":"watsonxai","service_settings":{"api_key":"Watsonx-API-Key","url":"Wastonx-URL","model_id":"ibm/slate-30m-english-rtrvr","project_id":"IBM-Cloud-ID","api_version":"2024-03-14"}}' "$ELASTICSEARCH_URL/_inference/text_embedding/watsonx-embeddings"
client.inference().put(p -> p
.inferenceId("watsonx-embeddings")
.taskType(TaskType.TextEmbedding)
.inferenceConfig(i -> i
.service("watsonxai")
.serviceSettings(JsonData.fromJson("{\"api_key\":\"Watsonx-API-Key\",\"url\":\"Wastonx-URL\",\"model_id\":\"ibm/slate-30m-english-rtrvr\",\"project_id\":\"IBM-Cloud-ID\",\"api_version\":\"2024-03-14\"}"))
)
);
{
"service": "watsonxai",
"service_settings": {
"api_key": "Watsonx-API-Key",
"url": "Wastonx-URL",
"model_id": "ibm/slate-30m-english-rtrvr",
"project_id": "IBM-Cloud-ID",
"api_version": "2024-03-14"
}
}