Create a behavioral analytics collection Technical preview; Added in 8.8.0

PUT /_application/analytics/{name}

Path parameters

  • name string Required

    The name of the analytics collection to be created or updated.

Responses

  • 200 application/json
    Hide response attributes Show response attributes object
    • acknowledged boolean Required

      For a successful response, this value is always true. On failure, an exception is returned instead.

    • name string Required

      The name of the analytics collection created or updated

PUT /_application/analytics/{name}
PUT _application/analytics/my_analytics_collection
resp = client.search_application.put_behavioral_analytics(
    name="my_analytics_collection",
)
const response = await client.searchApplication.putBehavioralAnalytics({
  name: "my_analytics_collection",
});
response = client.search_application.put_behavioral_analytics(
  name: "my_analytics_collection"
)
$resp = $client->searchApplication()->putBehavioralAnalytics([
    "name" => "my_analytics_collection",
]);
curl -X PUT -H "Authorization: ApiKey $ELASTIC_API_KEY" "$ELASTICSEARCH_URL/_application/analytics/my_analytics_collection"
client.searchApplication().putBehavioralAnalytics(p -> p
    .name("my_analytics_collection")
);




Create a behavioral analytics collection event Technical preview

POST /_application/analytics/{collection_name}/event/{event_type} External documentation

Path parameters

  • collection_name string Required

    The name of the behavioral analytics collection.

  • event_type string

    The analytics event type.

    Values are page_view, search, or search_click.

Query parameters

  • debug boolean

    Whether the response type has to include more details

application/json

Body Required

object object

Responses

  • 200 application/json
    Hide response attributes Show response attributes object
    • accepted boolean Required
    • event object
POST /_application/analytics/{collection_name}/event/{event_type}
POST _application/analytics/my_analytics_collection/event/search_click
{
  "session": {
    "id": "1797ca95-91c9-4e2e-b1bd-9c38e6f386a9"
  },
  "user": {
    "id": "5f26f01a-bbee-4202-9298-81261067abbd"
  },
  "search":{
    "query": "search term",
    "results": {
      "items": [
        {
          "document": {
            "id": "123",
            "index": "products"
          }
        }
      ],
      "total_results": 10
    },
    "sort": {
      "name": "relevance"
    },
    "search_application": "website"
  },
  "document":{
    "id": "123",
    "index": "products"
  }
}
resp = client.search_application.post_behavioral_analytics_event(
    collection_name="my_analytics_collection",
    event_type="search_click",
    payload={
        "session": {
            "id": "1797ca95-91c9-4e2e-b1bd-9c38e6f386a9"
        },
        "user": {
            "id": "5f26f01a-bbee-4202-9298-81261067abbd"
        },
        "search": {
            "query": "search term",
            "results": {
                "items": [
                    {
                        "document": {
                            "id": "123",
                            "index": "products"
                        }
                    }
                ],
                "total_results": 10
            },
            "sort": {
                "name": "relevance"
            },
            "search_application": "website"
        },
        "document": {
            "id": "123",
            "index": "products"
        }
    },
)
const response = await client.searchApplication.postBehavioralAnalyticsEvent({
  collection_name: "my_analytics_collection",
  event_type: "search_click",
  payload: {
    session: {
      id: "1797ca95-91c9-4e2e-b1bd-9c38e6f386a9",
    },
    user: {
      id: "5f26f01a-bbee-4202-9298-81261067abbd",
    },
    search: {
      query: "search term",
      results: {
        items: [
          {
            document: {
              id: "123",
              index: "products",
            },
          },
        ],
        total_results: 10,
      },
      sort: {
        name: "relevance",
      },
      search_application: "website",
    },
    document: {
      id: "123",
      index: "products",
    },
  },
});
response = client.search_application.post_behavioral_analytics_event(
  collection_name: "my_analytics_collection",
  event_type: "search_click",
  body: {
    "session": {
      "id": "1797ca95-91c9-4e2e-b1bd-9c38e6f386a9"
    },
    "user": {
      "id": "5f26f01a-bbee-4202-9298-81261067abbd"
    },
    "search": {
      "query": "search term",
      "results": {
        "items": [
          {
            "document": {
              "id": "123",
              "index": "products"
            }
          }
        ],
        "total_results": 10
      },
      "sort": {
        "name": "relevance"
      },
      "search_application": "website"
    },
    "document": {
      "id": "123",
      "index": "products"
    }
  }
)
$resp = $client->searchApplication()->postBehavioralAnalyticsEvent([
    "collection_name" => "my_analytics_collection",
    "event_type" => "search_click",
    "body" => [
        "session" => [
            "id" => "1797ca95-91c9-4e2e-b1bd-9c38e6f386a9",
        ],
        "user" => [
            "id" => "5f26f01a-bbee-4202-9298-81261067abbd",
        ],
        "search" => [
            "query" => "search term",
            "results" => [
                "items" => array(
                    [
                        "document" => [
                            "id" => "123",
                            "index" => "products",
                        ],
                    ],
                ),
                "total_results" => 10,
            ],
            "sort" => [
                "name" => "relevance",
            ],
            "search_application" => "website",
        ],
        "document" => [
            "id" => "123",
            "index" => "products",
        ],
    ],
]);
curl -X POST -H "Authorization: ApiKey $ELASTIC_API_KEY" -H "Content-Type: application/json" -d '{"session":{"id":"1797ca95-91c9-4e2e-b1bd-9c38e6f386a9"},"user":{"id":"5f26f01a-bbee-4202-9298-81261067abbd"},"search":{"query":"search term","results":{"items":[{"document":{"id":"123","index":"products"}}],"total_results":10},"sort":{"name":"relevance"},"search_application":"website"},"document":{"id":"123","index":"products"}}' "$ELASTICSEARCH_URL/_application/analytics/my_analytics_collection/event/search_click"
client.searchApplication().postBehavioralAnalyticsEvent(p -> p
    .collectionName("my_analytics_collection")
    .eventType(EventType.SearchClick)
    .payload(JsonData.fromJson("{\"session\":{\"id\":\"1797ca95-91c9-4e2e-b1bd-9c38e6f386a9\"},\"user\":{\"id\":\"5f26f01a-bbee-4202-9298-81261067abbd\"},\"search\":{\"query\":\"search term\",\"results\":{\"items\":[{\"document\":{\"id\":\"123\",\"index\":\"products\"}}],\"total_results\":10},\"sort\":{\"name\":\"relevance\"},\"search_application\":\"website\"},\"document\":{\"id\":\"123\",\"index\":\"products\"}}"))
);
Request example
Run `POST _application/analytics/my_analytics_collection/event/search_click` to send a `search_click` event to an analytics collection called `my_analytics_collection`.
{
  "session": {
    "id": "1797ca95-91c9-4e2e-b1bd-9c38e6f386a9"
  },
  "user": {
    "id": "5f26f01a-bbee-4202-9298-81261067abbd"
  },
  "search":{
    "query": "search term",
    "results": {
      "items": [
        {
          "document": {
            "id": "123",
            "index": "products"
          }
        }
      ],
      "total_results": 10
    },
    "sort": {
      "name": "relevance"
    },
    "search_application": "website"
  },
  "document":{
    "id": "123",
    "index": "products"
  }
}

















































Get trained models Generally available; Added in 7.7.0

GET /_cat/ml/trained_models/{model_id}

All methods and paths for this operation:

GET /_cat/ml/trained_models

GET /_cat/ml/trained_models/{model_id}

Get configuration and usage information about inference trained models.

IMPORTANT: CAT APIs are only intended for human consumption using the Kibana console or command line. They are not intended for use by applications. For application consumption, use the get trained models statistics API.

Required authorization

  • Cluster privileges: monitor_ml

Path parameters

  • model_id string Required

    A unique identifier for the trained model.

Query parameters

  • allow_no_match boolean

    Specifies what to do when the request: contains wildcard expressions and there are no models that match; contains the _all string or no identifiers and there are no matches; contains wildcard expressions and there are only partial matches. If true, the API returns an empty array when there are no matches and the subset of results when there are partial matches. If false, the API returns a 404 status code when there are no matches or only partial matches.

  • bytes string

    The unit used to display byte values.

    Values are b, kb, mb, gb, tb, or pb.

  • h string | array[string]

    A comma-separated list of column names to display.

    Supported values include:

    • create_time (or ct): The time when the trained model was created.
    • created_by (or c, createdBy): Information on the creator of the trained model.
    • data_frame_analytics_id (or df, dataFrameAnalytics, dfid): Identifier for the data frame analytics job that created the model. Only displayed if it is still available.
    • description (or d): The description of the trained model.
    • heap_size (or hs, modelHeapSize): The estimated heap size to keep the trained model in memory.
    • id: Identifier for the trained model.
    • ingest.count (or ic, ingestCount): The total number of documents that are processed by the model.
    • ingest.current (or icurr, ingestCurrent): The total number of document that are currently being handled by the trained model.
    • ingest.failed (or if, ingestFailed): The total number of failed ingest attempts with the trained model.
    • ingest.pipelines (or ip, ingestPipelines): The total number of ingest pipelines that are referencing the trained model.
    • ingest.time (or it, ingestTime): The total time that is spent processing documents with the trained model.
    • license (or l): The license level of the trained model.
    • operations (or o, modelOperations): The estimated number of operations to use the trained model. This number helps measuring the computational complexity of the model.
    • version (or v): The Elasticsearch version number in which the trained model was created.
  • s string | array[string]

    A comma-separated list of column names or aliases used to sort the response.

    Supported values include:

    • create_time (or ct): The time when the trained model was created.
    • created_by (or c, createdBy): Information on the creator of the trained model.
    • data_frame_analytics_id (or df, dataFrameAnalytics, dfid): Identifier for the data frame analytics job that created the model. Only displayed if it is still available.
    • description (or d): The description of the trained model.
    • heap_size (or hs, modelHeapSize): The estimated heap size to keep the trained model in memory.
    • id: Identifier for the trained model.
    • ingest.count (or ic, ingestCount): The total number of documents that are processed by the model.
    • ingest.current (or icurr, ingestCurrent): The total number of document that are currently being handled by the trained model.
    • ingest.failed (or if, ingestFailed): The total number of failed ingest attempts with the trained model.
    • ingest.pipelines (or ip, ingestPipelines): The total number of ingest pipelines that are referencing the trained model.
    • ingest.time (or it, ingestTime): The total time that is spent processing documents with the trained model.
    • license (or l): The license level of the trained model.
    • operations (or o, modelOperations): The estimated number of operations to use the trained model. This number helps measuring the computational complexity of the model.
    • version (or v): The Elasticsearch version number in which the trained model was created.
  • from number

    Skips the specified number of transforms.

  • size number

    The maximum number of transforms to display.

  • time string

    Unit used to display time values.

    Values are nanos, micros, ms, s, m, h, or d.

Responses

  • 200 application/json
    Hide response attributes Show response attributes object
    • id string

      The model identifier.

    • created_by string

      Information about the creator of the model.

    • heap_size number | string

      The estimated heap size to keep the model in memory.

      One of:

      The estimated heap size to keep the model in memory.

    • operations string

      The estimated number of operations to use the model. This number helps to measure the computational complexity of the model.

    • license string

      The license level of the model.

    • create_time string | number

      The time the model was created.

      One of:

      The time the model was created.

    • version string

      The version of Elasticsearch when the model was created.

    • description string

      A description of the model.

    • ingest.pipelines string

      The number of pipelines that are referencing the model.

    • ingest.count string

      The total number of documents that are processed by the model.

    • ingest.time string

      The total time spent processing documents with thie model.

    • ingest.current string

      The total number of documents that are currently being handled by the model.

    • ingest.failed string

      The total number of failed ingest attempts with the model.

    • data_frame.id string

      The identifier for the data frame analytics job that created the model. Only displayed if the job is still available.

    • data_frame.create_time string

      The time the data frame analytics job was created.

    • data_frame.source_index string

      The source index used to train in the data frame analysis.

    • data_frame.analysis string

      The analysis used by the data frame to build the model.

    • type string Generally available; Added in 8.0.0
GET /_cat/ml/trained_models/{model_id}
GET _cat/ml/trained_models?v=true&format=json
resp = client.cat.ml_trained_models(
    v=True,
    format="json",
)
const response = await client.cat.mlTrainedModels({
  v: "true",
  format: "json",
});
response = client.cat.ml_trained_models(
  v: "true",
  format: "json"
)
$resp = $client->cat()->mlTrainedModels([
    "v" => "true",
    "format" => "json",
]);
curl -X GET -H "Authorization: ApiKey $ELASTIC_API_KEY" "$ELASTICSEARCH_URL/_cat/ml/trained_models?v=true&format=json"
client.cat().mlTrainedModels();
Response examples (200)
A successful response from `GET _cat/ml/trained_models?v=true&format=json`.
[
  {
    "id": "ddddd-1580216177138",
    "heap_size": "0b",
    "operations": "196",
    "create_time": "2025-03-25T00:01:38.662Z",
    "type": "pytorch",
    "ingest.pipelines": "0",
    "data_frame.id": "__none__"
  },
  {
    "id": "lang_ident_model_1",
    "heap_size": "1mb",
    "operations": "39629",
    "create_time": "2019-12-05T12:28:34.594Z",
    "type": "lang_ident",
    "ingest.pipelines": "0",
    "data_frame.id": "__none__"
  }
]




















Get snapshot repository information Generally available; Added in 2.1.0

GET /_cat/repositories

Get a list of snapshot repositories for a cluster. IMPORTANT: cat APIs are only intended for human consumption using the command line or Kibana console. They are not intended for use by applications. For application consumption, use the get snapshot repository API.

Required authorization

  • Cluster privileges: monitor_snapshot

Query parameters

  • h string | array[string]

    List of columns to appear in the response. Supports simple wildcards.

  • s string | array[string]

    List of columns that determine how the table should be sorted. Sorting defaults to ascending and can be changed by setting :asc or :desc as a suffix to the column name.

  • local boolean

    If true, the request computes the list of selected nodes from the local cluster state. If false the list of selected nodes are computed from the cluster state of the master node. In both cases the coordinating node will send requests for further information to each selected node.

  • master_timeout string

    Period to wait for a connection to the master node.

    Values are -1 or 0.

Responses

  • 200 application/json
    Hide response attributes Show response attributes object
    • id string

      The unique repository identifier.

    • type string

      The repository type.

GET /_cat/repositories
GET /_cat/repositories?v=true&format=json
resp = client.cat.repositories(
    v=True,
    format="json",
)
const response = await client.cat.repositories({
  v: "true",
  format: "json",
});
response = client.cat.repositories(
  v: "true",
  format: "json"
)
$resp = $client->cat()->repositories([
    "v" => "true",
    "format" => "json",
]);
curl -X GET -H "Authorization: ApiKey $ELASTIC_API_KEY" "$ELASTICSEARCH_URL/_cat/repositories?v=true&format=json"
client.cat().repositories();
Response examples (200)
A successful response from `GET /_cat/repositories?v=true&format=json`.
[
  {
    "id": "repo1",
    "type": "fs"
  },
  {
    "id": "repo2",
    "type": "s3"
  }
]












Get task information Technical preview; Added in 5.0.0

GET /_cat/tasks

Get information about tasks currently running in the cluster. IMPORTANT: cat APIs are only intended for human consumption using the command line or Kibana console. They are not intended for use by applications. For application consumption, use the task management API.

Required authorization

  • Cluster privileges: monitor

Query parameters

  • actions array[string]

    The task action names, which are used to limit the response.

  • detailed boolean

    If true, the response includes detailed information about shard recoveries.

  • nodes array[string]

    Unique node identifiers, which are used to limit the response.

  • parent_task_id string

    The parent task identifier, which is used to limit the response.

  • h string | array[string]

    List of columns to appear in the response. Supports simple wildcards.

  • s string | array[string]

    List of columns that determine how the table should be sorted. Sorting defaults to ascending and can be changed by setting :asc or :desc as a suffix to the column name.

  • time string

    Unit used to display time values.

    Values are nanos, micros, ms, s, m, h, or d.

  • timeout string

    Period to wait for a response. If no response is received before the timeout expires, the request fails and returns an error.

    Values are -1 or 0.

  • wait_for_completion boolean

    If true, the request blocks until the task has completed.

Responses

  • 200 application/json
    Hide response attributes Show response attributes object
    • id string

      The identifier of the task with the node.

    • action string

      The task action.

    • task_id string

      The unique task identifier.

    • parent_task_id string

      The parent task identifier.

    • type string

      The task type.

    • start_time string

      The start time in milliseconds.

    • timestamp string

      The start time in HH:MM:SS format.

    • running_time_ns string

      The running time in nanoseconds.

    • running_time string

      The running time.

    • node_id string

      The unique node identifier.

    • ip string

      The IP address for the node.

    • port string

      The bound transport port for the node.

    • node string

      The node name.

    • version string

      The Elasticsearch version.

    • x_opaque_id string

      The X-Opaque-ID header.

    • description string

      The task action description.

GET /_cat/tasks
GET _cat/tasks?v=true&format=json
resp = client.cat.tasks(
    v=True,
    format="json",
)
const response = await client.cat.tasks({
  v: "true",
  format: "json",
});
response = client.cat.tasks(
  v: "true",
  format: "json"
)
$resp = $client->cat()->tasks([
    "v" => "true",
    "format" => "json",
]);
curl -X GET -H "Authorization: ApiKey $ELASTIC_API_KEY" "$ELASTICSEARCH_URL/_cat/tasks?v=true&format=json"
client.cat().tasks();
Response examples (200)
A successful response from `GET _cat/tasks?v=true&format=json`.
[
  {
    "action": "cluster:monitor/tasks/lists[n]",
    "task_id": "oTUltX4IQMOUUVeiohTt8A:124",
    "parent_task_id": "oTUltX4IQMOUUVeiohTt8A:123",
    "type": "direct",
    "start_time": "1458585884904",
    "timestamp": "01:48:24",
    "running_time": "44.1micros",
    "ip": "127.0.0.1:9300",
    "node": "oTUltX4IQMOUUVeiohTt8A"
  },
  {
    "action": "cluster:monitor/tasks/lists",
    "task_id": "oTUltX4IQMOUUVeiohTt8A:123",
    "parent_task_id": "-",
    "type": "transport",
    "start_time": "1458585884904",
    "timestamp": "01:48:24",
    "running_time": "186.2micros",
    "ip": "127.0.0.1:9300",
    "node": "oTUltX4IQMOUUVeiohTt8A"
  }
]












Cluster









































Get the cluster state Generally available; Added in 1.3.0

GET /_cluster/state/{metric}/{index}

All methods and paths for this operation:

GET /_cluster/state

GET /_cluster/state/{metric}
GET /_cluster/state/{metric}/{index}

Get comprehensive information about the state of the cluster.

The cluster state is an internal data structure which keeps track of a variety of information needed by every node, including the identity and attributes of the other nodes in the cluster; cluster-wide settings; index metadata, including the mapping and settings for each index; the location and status of every shard copy in the cluster.

The elected master node ensures that every node in the cluster has a copy of the same cluster state. This API lets you retrieve a representation of this internal state for debugging or diagnostic purposes. You may need to consult the Elasticsearch source code to determine the precise meaning of the response.

By default the API will route requests to the elected master node since this node is the authoritative source of cluster states. You can also retrieve the cluster state held on the node handling the API request by adding the ?local=true query parameter.

Elasticsearch may need to expend significant effort to compute a response to this API in larger clusters, and the response may comprise a very large quantity of data. If you use this API repeatedly, your cluster may become unstable.

WARNING: The response is a representation of an internal data structure. Its format is not subject to the same compatibility guarantees as other more stable APIs and may change from version to version. Do not query this API using external monitoring tools. Instead, obtain the information you require using other more stable cluster APIs.

Required authorization

  • Cluster privileges: monitor,manage

Path parameters

  • metric string | array[string] Required

    Limit the information returned to the specified metrics

  • index string | array[string] Required

    A comma-separated list of index names; use _all or empty string to perform the operation on all indices

Query parameters

  • allow_no_indices boolean

    Whether to ignore if a wildcard indices expression resolves into no concrete indices. (This includes _all string or when no indices have been specified)

  • expand_wildcards string | array[string]

    Whether to expand wildcard expression to concrete indices that are open, closed or both.

    Supported values include:

    • all: Match any data stream or index, including hidden ones.
    • open: Match open, non-hidden indices. Also matches any non-hidden data stream.
    • closed: Match closed, non-hidden indices. Also matches any non-hidden data stream. Data streams cannot be closed.
    • hidden: Match hidden data streams and hidden indices. Must be combined with open, closed, or both.
    • none: Wildcard expressions are not accepted.

    Values are all, open, closed, hidden, or none.

  • flat_settings boolean

    Return settings in flat format (default: false)

  • ignore_unavailable boolean

    Whether specified concrete indices should be ignored when unavailable (missing or closed)

  • local boolean

    Return local information, do not retrieve the state from master node (default: false)

  • master_timeout string

    Specify timeout for connection to master

    Values are -1 or 0.

  • wait_for_metadata_version number

    Wait for the metadata version to be equal or greater than the specified metadata version

  • wait_for_timeout string

    The maximum time to wait for wait_for_metadata_version before timing out

    Values are -1 or 0.

Responses

  • 200 application/json
GET /_cluster/state/{metric}/{index}
GET /_cluster/state?filter_path=metadata.cluster_coordination.last_committed_config
resp = client.cluster.state(
    filter_path="metadata.cluster_coordination.last_committed_config",
)
const response = await client.cluster.state({
  filter_path: "metadata.cluster_coordination.last_committed_config",
});
response = client.cluster.state(
  filter_path: "metadata.cluster_coordination.last_committed_config"
)
$resp = $client->cluster()->state([
    "filter_path" => "metadata.cluster_coordination.last_committed_config",
]);
curl -X GET -H "Authorization: ApiKey $ELASTIC_API_KEY" "$ELASTICSEARCH_URL/_cluster/state?filter_path=metadata.cluster_coordination.last_committed_config"




Ping the cluster Generally available

HEAD /

Get information about whether the cluster is running.

Responses

  • 200 application/json
HEAD /
curl \
 --request HEAD 'http://api.example.com/' \
 --header "Authorization: $API_KEY"








Get the hot threads for nodes Generally available

GET /_nodes/{node_id}/hot_threads

All methods and paths for this operation:

GET /_nodes/hot_threads

GET /_nodes/{node_id}/hot_threads

Get a breakdown of the hot threads on each selected node in the cluster. The output is plain text with a breakdown of the top hot threads for each node.

Required authorization

  • Cluster privileges: monitor,manage

Path parameters

  • node_id string | array[string] Required

    List of node IDs or names used to limit returned information.

Query parameters

  • ignore_idle_threads boolean

    If true, known idle threads (e.g. waiting in a socket select, or to get a task from an empty queue) are filtered out.

  • interval string

    The interval to do the second sampling of threads.

    Values are -1 or 0.

  • snapshots number

    Number of samples of thread stacktrace.

  • threads number

    Specifies the number of hot threads to provide information for.

  • timeout string

    Period to wait for a response. If no response is received before the timeout expires, the request fails and returns an error.

    Values are -1 or 0.

  • type string

    The type to sample.

    Values are cpu, wait, block, gpu, or mem.

  • sort string

    The sort order for 'cpu' type (default: total)

    Values are cpu, wait, block, gpu, or mem.

Responses

  • 200 application/json
GET /_nodes/{node_id}/hot_threads
GET /_nodes/hot_threads
resp = client.nodes.hot_threads()
const response = await client.nodes.hotThreads();
response = client.nodes.hot_threads
$resp = $client->nodes()->hotThreads();
curl -X GET -H "Authorization: ApiKey $ELASTIC_API_KEY" "$ELASTICSEARCH_URL/_nodes/hot_threads"
client.nodes().hotThreads(h -> h);





















Connector

The connector and sync jobs APIs provide a convenient way to create and manage Elastic connectors and sync jobs in an internal index. Connectors are Elasticsearch integrations that bring content from third-party data sources, which can be deployed on Elastic Cloud or hosted on your own infrastructure:

  • Elastic managed connectors (Native connectors) are a managed service on Elastic Cloud
  • Self-managed connectors (Connector clients) are self-managed on your infrastructure.

This API provides an alternative to relying solely on Kibana UI for connector and sync job management. The API comes with a set of validations and assertions to ensure that the state representation in the internal index remains valid.

Check out the connector API tutorial




















Create a connector Beta; Added in 8.12.0

POST /_connector

Connectors are Elasticsearch integrations that bring content from third-party data sources, which can be deployed on Elastic Cloud or hosted on your own infrastructure. Elastic managed connectors (Native connectors) are a managed service on Elastic Cloud. Self-managed connectors (Connector clients) are self-managed on your infrastructure.

application/json

Body

  • description string
  • index_name string
  • is_native boolean
  • language string
  • name string
  • service_type string

Responses

  • 200 application/json
    Hide response attributes Show response attributes object
    • result string Required

      Values are created, updated, deleted, not_found, or noop.

    • id string Required
POST /_connector
curl \
 --request POST 'http://api.example.com/_connector' \
 --header "Authorization: $API_KEY" \
 --header "Content-Type: application/json" \
 --data '{"description":"string","index_name":"string","is_native":true,"language":"string","name":"string","service_type":"string"}'




Check in a connector sync job Technical preview

PUT /_connector/_sync_job/{connector_sync_job_id}/_check_in

Check in a connector sync job and set the last_seen field to the current time before updating it in the internal index.

To sync data using self-managed connectors, you need to deploy the Elastic connector service on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors.

Path parameters

  • connector_sync_job_id string Required

    The unique identifier of the connector sync job to be checked in.

Responses

  • 200 application/json
PUT /_connector/_sync_job/{connector_sync_job_id}/_check_in
PUT _connector/_sync_job/my-connector-sync-job/_check_in
resp = client.connector.sync_job_check_in(
    connector_sync_job_id="my-connector-sync-job",
)
const response = await client.connector.syncJobCheckIn({
  connector_sync_job_id: "my-connector-sync-job",
});
response = client.connector.sync_job_check_in(
  connector_sync_job_id: "my-connector-sync-job"
)
$resp = $client->connector()->syncJobCheckIn([
    "connector_sync_job_id" => "my-connector-sync-job",
]);
curl -X PUT -H "Authorization: ApiKey $ELASTIC_API_KEY" "$ELASTICSEARCH_URL/_connector/_sync_job/my-connector-sync-job/_check_in"
client.connector().syncJobCheckIn(s -> s
    .connectorSyncJobId("my-connector-sync-job")
);
























Set the connector sync job stats Technical preview

PUT /_connector/_sync_job/{connector_sync_job_id}/_stats

Stats include: deleted_document_count, indexed_document_count, indexed_document_volume, and total_document_count. You can also update last_seen. This API is mainly used by the connector service for updating sync job information.

To sync data using self-managed connectors, you need to deploy the Elastic connector service on your own infrastructure. This service runs automatically on Elastic Cloud for Elastic managed connectors.

Path parameters

  • connector_sync_job_id string Required

    The unique identifier of the connector sync job.

application/json

Body Required

  • deleted_document_count number Required

    The number of documents the sync job deleted.

  • indexed_document_count number Required

    The number of documents the sync job indexed.

  • indexed_document_volume number Required

    The total size of the data (in MiB) the sync job indexed.

  • last_seen string

    The timestamp to use in the last_seen property for the connector sync job.

  • metadata object

    The connector-specific metadata.

    Hide metadata attribute Show metadata attribute object
    • * object Additional properties
  • total_document_count number

    The total number of documents in the target index after the sync job finished.

Responses

  • 200 application/json
PUT /_connector/_sync_job/{connector_sync_job_id}/_stats
PUT _connector/_sync_job/my-connector-sync-job/_stats
{
    "deleted_document_count": 10,
    "indexed_document_count": 20,
    "indexed_document_volume": 1000,
    "total_document_count": 2000,
    "last_seen": "2023-01-02T10:00:00Z"
}
resp = client.connector.sync_job_update_stats(
    connector_sync_job_id="my-connector-sync-job",
    deleted_document_count=10,
    indexed_document_count=20,
    indexed_document_volume=1000,
    total_document_count=2000,
    last_seen="2023-01-02T10:00:00Z",
)
const response = await client.connector.syncJobUpdateStats({
  connector_sync_job_id: "my-connector-sync-job",
  deleted_document_count: 10,
  indexed_document_count: 20,
  indexed_document_volume: 1000,
  total_document_count: 2000,
  last_seen: "2023-01-02T10:00:00Z",
});
response = client.connector.sync_job_update_stats(
  connector_sync_job_id: "my-connector-sync-job",
  body: {
    "deleted_document_count": 10,
    "indexed_document_count": 20,
    "indexed_document_volume": 1000,
    "total_document_count": 2000,
    "last_seen": "2023-01-02T10:00:00Z"
  }
)
$resp = $client->connector()->syncJobUpdateStats([
    "connector_sync_job_id" => "my-connector-sync-job",
    "body" => [
        "deleted_document_count" => 10,
        "indexed_document_count" => 20,
        "indexed_document_volume" => 1000,
        "total_document_count" => 2000,
        "last_seen" => "2023-01-02T10:00:00Z",
    ],
]);
curl -X PUT -H "Authorization: ApiKey $ELASTIC_API_KEY" -H "Content-Type: application/json" -d '{"deleted_document_count":10,"indexed_document_count":20,"indexed_document_volume":1000,"total_document_count":2000,"last_seen":"2023-01-02T10:00:00Z"}' "$ELASTICSEARCH_URL/_connector/_sync_job/my-connector-sync-job/_stats"
client.connector().syncJobUpdateStats(s -> s
    .connectorSyncJobId("my-connector-sync-job")
    .deletedDocumentCount(10L)
    .indexedDocumentCount(20L)
    .indexedDocumentVolume(1000L)
    .lastSeen(l -> l
        .time("2023-01-02T10:00:00Z")
    )
    .totalDocumentCount(2000)
);
Request example
An example body for a `PUT _connector/_sync_job/my-connector-sync-job/_stats` request.
{
    "deleted_document_count": 10,
    "indexed_document_count": 20,
    "indexed_document_volume": 1000,
    "total_document_count": 2000,
    "last_seen": "2023-01-02T10:00:00Z"
}





























































Create or update auto-follow patterns Generally available; Added in 6.5.0

PUT /_ccr/auto_follow/{name}

Create a collection of cross-cluster replication auto-follow patterns for a remote cluster. Newly created indices on the remote cluster that match any of the patterns are automatically configured as follower indices. Indices on the remote cluster that were created before the auto-follow pattern was created will not be auto-followed even if they match the pattern.

This API can also be used to update auto-follow patterns. NOTE: Follower indices that were configured automatically before updating an auto-follow pattern will remain unchanged even if they do not match against the new patterns.

External documentation

Path parameters

  • name string Required

    The name of the collection of auto-follow patterns.

Query parameters

  • master_timeout string

    Period to wait for a connection to the master node.

    Values are -1 or 0.

application/json

Body Required

  • remote_cluster string Required

    The remote cluster containing the leader indices to match against.

  • follow_index_pattern string

    The name of follower index. The template {{leader_index}} can be used to derive the name of the follower index from the name of the leader index. When following a data stream, use {{leader_index}}; CCR does not support changes to the names of a follower data stream’s backing indices.

  • leader_index_patterns array[string]

    An array of simple index patterns to match against indices in the remote cluster specified by the remote_cluster field.

  • leader_index_exclusion_patterns array[string]

    An array of simple index patterns that can be used to exclude indices from being auto-followed. Indices in the remote cluster whose names are matching one or more leader_index_patterns and one or more leader_index_exclusion_patterns won’t be followed.

  • max_outstanding_read_requests number

    The maximum number of outstanding reads requests from the remote cluster.

    Default value is 12.

  • settings object

    Settings to override from the leader index. Note that certain settings can not be overrode (e.g., index.number_of_shards).

    Hide settings attribute Show settings attribute object
    • * object Additional properties
  • max_outstanding_write_requests number

    The maximum number of outstanding reads requests from the remote cluster.

    Default value is 9.

  • read_poll_timeout string

    The maximum time to wait for new operations on the remote cluster when the follower index is synchronized with the leader index. When the timeout has elapsed, the poll for operations will return to the follower so that it can update some statistics. Then the follower will immediately attempt to read from the leader again.

  • max_read_request_operation_count number

    The maximum number of operations to pull per read from the remote cluster.

    Default value is 5120.

  • max_read_request_size number | string

    The maximum size in bytes of per read of a batch of operations pulled from the remote cluster.

    One of:

    The maximum size in bytes of per read of a batch of operations pulled from the remote cluster.

  • max_retry_delay string

    The maximum time to wait before retrying an operation that failed exceptionally. An exponential backoff strategy is employed when retrying.

  • max_write_buffer_count number

    The maximum number of operations that can be queued for writing. When this limit is reached, reads from the remote cluster will be deferred until the number of queued operations goes below the limit.

    Default value is 2147483647.

  • max_write_buffer_size number | string

    The maximum total bytes of operations that can be queued for writing. When this limit is reached, reads from the remote cluster will be deferred until the total bytes of queued operations goes below the limit.

    One of:

    The maximum total bytes of operations that can be queued for writing. When this limit is reached, reads from the remote cluster will be deferred until the total bytes of queued operations goes below the limit.

  • max_write_request_operation_count number

    The maximum number of operations per bulk write request executed on the follower.

    Default value is 5120.

  • max_write_request_size number | string

    The maximum total bytes of operations per bulk write request executed on the follower.

    One of:

    The maximum total bytes of operations per bulk write request executed on the follower.

Responses

  • 200 application/json
    Hide response attribute Show response attribute object
    • acknowledged boolean Required

      For a successful response, this value is always true. On failure, an exception is returned instead.

PUT /_ccr/auto_follow/{name}
PUT /_ccr/auto_follow/my_auto_follow_pattern
{
  "remote_cluster" : "remote_cluster",
  "leader_index_patterns" :
  [
    "leader_index*"
  ],
  "follow_index_pattern" : "{{leader_index}}-follower",
  "settings": {
    "index.number_of_replicas": 0
  },
  "max_read_request_operation_count" : 1024,
  "max_outstanding_read_requests" : 16,
  "max_read_request_size" : "1024k",
  "max_write_request_operation_count" : 32768,
  "max_write_request_size" : "16k",
  "max_outstanding_write_requests" : 8,
  "max_write_buffer_count" : 512,
  "max_write_buffer_size" : "512k",
  "max_retry_delay" : "10s",
  "read_poll_timeout" : "30s"
}
resp = client.ccr.put_auto_follow_pattern(
    name="my_auto_follow_pattern",
    remote_cluster="remote_cluster",
    leader_index_patterns=[
        "leader_index*"
    ],
    follow_index_pattern="{{leader_index}}-follower",
    settings={
        "index.number_of_replicas": 0
    },
    max_read_request_operation_count=1024,
    max_outstanding_read_requests=16,
    max_read_request_size="1024k",
    max_write_request_operation_count=32768,
    max_write_request_size="16k",
    max_outstanding_write_requests=8,
    max_write_buffer_count=512,
    max_write_buffer_size="512k",
    max_retry_delay="10s",
    read_poll_timeout="30s",
)
const response = await client.ccr.putAutoFollowPattern({
  name: "my_auto_follow_pattern",
  remote_cluster: "remote_cluster",
  leader_index_patterns: ["leader_index*"],
  follow_index_pattern: "{{leader_index}}-follower",
  settings: {
    "index.number_of_replicas": 0,
  },
  max_read_request_operation_count: 1024,
  max_outstanding_read_requests: 16,
  max_read_request_size: "1024k",
  max_write_request_operation_count: 32768,
  max_write_request_size: "16k",
  max_outstanding_write_requests: 8,
  max_write_buffer_count: 512,
  max_write_buffer_size: "512k",
  max_retry_delay: "10s",
  read_poll_timeout: "30s",
});
response = client.ccr.put_auto_follow_pattern(
  name: "my_auto_follow_pattern",
  body: {
    "remote_cluster": "remote_cluster",
    "leader_index_patterns": [
      "leader_index*"
    ],
    "follow_index_pattern": "{{leader_index}}-follower",
    "settings": {
      "index.number_of_replicas": 0
    },
    "max_read_request_operation_count": 1024,
    "max_outstanding_read_requests": 16,
    "max_read_request_size": "1024k",
    "max_write_request_operation_count": 32768,
    "max_write_request_size": "16k",
    "max_outstanding_write_requests": 8,
    "max_write_buffer_count": 512,
    "max_write_buffer_size": "512k",
    "max_retry_delay": "10s",
    "read_poll_timeout": "30s"
  }
)
$resp = $client->ccr()->putAutoFollowPattern([
    "name" => "my_auto_follow_pattern",
    "body" => [
        "remote_cluster" => "remote_cluster",
        "leader_index_patterns" => array(
            "leader_index*",
        ),
        "follow_index_pattern" => "{{leader_index}}-follower",
        "settings" => [
            "index.number_of_replicas" => 0,
        ],
        "max_read_request_operation_count" => 1024,
        "max_outstanding_read_requests" => 16,
        "max_read_request_size" => "1024k",
        "max_write_request_operation_count" => 32768,
        "max_write_request_size" => "16k",
        "max_outstanding_write_requests" => 8,
        "max_write_buffer_count" => 512,
        "max_write_buffer_size" => "512k",
        "max_retry_delay" => "10s",
        "read_poll_timeout" => "30s",
    ],
]);
curl -X PUT -H "Authorization: ApiKey $ELASTIC_API_KEY" -H "Content-Type: application/json" -d '{"remote_cluster":"remote_cluster","leader_index_patterns":["leader_index*"],"follow_index_pattern":"{{leader_index}}-follower","settings":{"index.number_of_replicas":0},"max_read_request_operation_count":1024,"max_outstanding_read_requests":16,"max_read_request_size":"1024k","max_write_request_operation_count":32768,"max_write_request_size":"16k","max_outstanding_write_requests":8,"max_write_buffer_count":512,"max_write_buffer_size":"512k","max_retry_delay":"10s","read_poll_timeout":"30s"}' "$ELASTICSEARCH_URL/_ccr/auto_follow/my_auto_follow_pattern"
client.ccr().putAutoFollowPattern(p -> p
    .followIndexPattern("{{leader_index}}-follower")
    .leaderIndexPatterns("leader_index*")
    .maxOutstandingReadRequests(16)
    .maxOutstandingWriteRequests(8)
    .maxReadRequestOperationCount(1024)
    .maxReadRequestSize("1024k")
    .maxRetryDelay(m -> m
        .time("10s")
    )
    .maxWriteBufferCount(512)
    .maxWriteBufferSize("512k")
    .maxWriteRequestOperationCount(32768)
    .maxWriteRequestSize("16k")
    .name("my_auto_follow_pattern")
    .readPollTimeout(r -> r
        .time("30s")
    )
    .remoteCluster("remote_cluster")
    .settings("index.number_of_replicas", JsonData.fromJson("0"))
);
Request example
Run `PUT /_ccr/auto_follow/my_auto_follow_pattern` to creates an auto-follow pattern.
{
  "remote_cluster" : "remote_cluster",
  "leader_index_patterns" :
  [
    "leader_index*"
  ],
  "follow_index_pattern" : "{{leader_index}}-follower",
  "settings": {
    "index.number_of_replicas": 0
  },
  "max_read_request_operation_count" : 1024,
  "max_outstanding_read_requests" : 16,
  "max_read_request_size" : "1024k",
  "max_write_request_operation_count" : 32768,
  "max_write_request_size" : "16k",
  "max_outstanding_write_requests" : 8,
  "max_write_buffer_count" : 512,
  "max_write_buffer_size" : "512k",
  "max_retry_delay" : "10s",
  "read_poll_timeout" : "30s"
}
Response examples (200)
A successful response for creating an auto-follow pattern.
{
  "acknowledged": true
}

Delete auto-follow patterns Generally available; Added in 6.5.0

DELETE /_ccr/auto_follow/{name}

Delete a collection of cross-cluster replication auto-follow patterns.

Required authorization

  • Cluster privileges: manage_ccr
External documentation

Path parameters

  • name string Required

    The auto-follow pattern collection to delete.

Query parameters

  • master_timeout string

    The period to wait for a connection to the master node. If the master node is not available before the timeout expires, the request fails and returns an error. It can also be set to -1 to indicate that the request should never timeout.

    Values are -1 or 0.

Responses

  • 200 application/json
    Hide response attribute Show response attribute object
    • acknowledged boolean Required

      For a successful response, this value is always true. On failure, an exception is returned instead.

DELETE /_ccr/auto_follow/{name}
DELETE /_ccr/auto_follow/my_auto_follow_pattern
resp = client.ccr.delete_auto_follow_pattern(
    name="my_auto_follow_pattern",
)
const response = await client.ccr.deleteAutoFollowPattern({
  name: "my_auto_follow_pattern",
});
response = client.ccr.delete_auto_follow_pattern(
  name: "my_auto_follow_pattern"
)
$resp = $client->ccr()->deleteAutoFollowPattern([
    "name" => "my_auto_follow_pattern",
]);
curl -X DELETE -H "Authorization: ApiKey $ELASTIC_API_KEY" "$ELASTICSEARCH_URL/_ccr/auto_follow/my_auto_follow_pattern"
client.ccr().deleteAutoFollowPattern(d -> d
    .name("my_auto_follow_pattern")
);
Response examples (200)
A successful response from `DELETE /_ccr/auto_follow/my_auto_follow_pattern`, which deletes an auto-follow pattern.
{
  "acknowledged" : true
}












Forget a follower Generally available; Added in 6.7.0

POST /{index}/_ccr/forget_follower

Remove the cross-cluster replication follower retention leases from the leader.

A following index takes out retention leases on its leader index. These leases are used to increase the likelihood that the shards of the leader index retain the history of operations that the shards of the following index need to run replication. When a follower index is converted to a regular index by the unfollow API (either by directly calling the API or by index lifecycle management tasks), these leases are removed. However, removal of the leases can fail, for example when the remote cluster containing the leader index is unavailable. While the leases will eventually expire on their own, their extended existence can cause the leader index to hold more history than necessary and prevent index lifecycle management from performing some operations on the leader index. This API exists to enable manually removing the leases when the unfollow API is unable to do so.

NOTE: This API does not stop replication by a following index. If you use this API with a follower index that is still actively following, the following index will add back retention leases on the leader. The only purpose of this API is to handle the case of failure to remove the following retention leases after the unfollow API is invoked.

External documentation

Path parameters

  • index string Required

    the name of the leader index for which specified follower retention leases should be removed

Query parameters

  • timeout string

    Period to wait for a response. If no response is received before the timeout expires, the request fails and returns an error.

    Values are -1 or 0.

application/json

Body Required

  • follower_cluster string
  • follower_index string
  • follower_index_uuid string
  • leader_remote_cluster string

Responses

  • 200 application/json
    Hide response attribute Show response attribute object
    • _shards object Required
      Hide _shards attributes Show _shards attributes object
      • failed number Required

        The number of shards the operation or search attempted to run on but failed.

      • successful number Required

        The number of shards the operation or search succeeded on.

      • total number Required

        The number of shards the operation or search will run on overall.

      • failures array[object]
        Hide failures attributes Show failures attributes object
        • index string
        • node string
        • reason object Required

          Cause and details about a request failure. This class defines the properties common to all error types. Additional details are also provided, that depend on the error type.

        • shard number
        • status string
        • primary boolean
      • skipped number
POST /{index}/_ccr/forget_follower
POST /<leader_index>/_ccr/forget_follower
{
  "follower_cluster" : "<follower_cluster>",
  "follower_index" : "<follower_index>",
  "follower_index_uuid" : "<follower_index_uuid>",
  "leader_remote_cluster" : "<leader_remote_cluster>"
}
resp = client.ccr.forget_follower(
    index="<leader_index>",
    follower_cluster="<follower_cluster>",
    follower_index="<follower_index>",
    follower_index_uuid="<follower_index_uuid>",
    leader_remote_cluster="<leader_remote_cluster>",
)
const response = await client.ccr.forgetFollower({
  index: "<leader_index>",
  follower_cluster: "<follower_cluster>",
  follower_index: "<follower_index>",
  follower_index_uuid: "<follower_index_uuid>",
  leader_remote_cluster: "<leader_remote_cluster>",
});
response = client.ccr.forget_follower(
  index: "<leader_index>",
  body: {
    "follower_cluster": "<follower_cluster>",
    "follower_index": "<follower_index>",
    "follower_index_uuid": "<follower_index_uuid>",
    "leader_remote_cluster": "<leader_remote_cluster>"
  }
)
$resp = $client->ccr()->forgetFollower([
    "index" => "<leader_index>",
    "body" => [
        "follower_cluster" => "<follower_cluster>",
        "follower_index" => "<follower_index>",
        "follower_index_uuid" => "<follower_index_uuid>",
        "leader_remote_cluster" => "<leader_remote_cluster>",
    ],
]);
curl -X POST -H "Authorization: ApiKey $ELASTIC_API_KEY" -H "Content-Type: application/json" -d '{"follower_cluster":"<follower_cluster>","follower_index":"<follower_index>","follower_index_uuid":"<follower_index_uuid>","leader_remote_cluster":"<leader_remote_cluster>"}' "$ELASTICSEARCH_URL/<leader_index>/_ccr/forget_follower"
client.ccr().forgetFollower(f -> f
    .followerCluster("<follower_cluster>")
    .followerIndex("<follower_index>")
    .followerIndexUuid("<follower_index_uuid>")
    .index("<leader_index>")
    .leaderRemoteCluster("<leader_remote_cluster>")
);
Request example
Run `POST /<leader_index>/_ccr/forget_follower`.
{
  "follower_cluster" : "<follower_cluster>",
  "follower_index" : "<follower_index>",
  "follower_index_uuid" : "<follower_index_uuid>",
  "leader_remote_cluster" : "<leader_remote_cluster>"
}
Response examples (200)
A successful response for removing the follower retention leases from the leader index.
{
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "failed" : 0,
    "failures" : [ ]
  }
}

Pause an auto-follow pattern Generally available; Added in 7.5.0

POST /_ccr/auto_follow/{name}/pause

Pause a cross-cluster replication auto-follow pattern. When the API returns, the auto-follow pattern is inactive. New indices that are created on the remote cluster and match the auto-follow patterns are ignored.

You can resume auto-following with the resume auto-follow pattern API. When it resumes, the auto-follow pattern is active again and automatically configures follower indices for newly created indices on the remote cluster that match its patterns. Remote indices that were created while the pattern was paused will also be followed, unless they have been deleted or closed in the interim.

Required authorization

  • Cluster privileges: manage_ccr
External documentation

Path parameters

  • name string Required

    The name of the auto-follow pattern to pause.

Query parameters

  • master_timeout string

    The period to wait for a connection to the master node. If the master node is not available before the timeout expires, the request fails and returns an error. It can also be set to -1 to indicate that the request should never timeout.

    Values are -1 or 0.

Responses

  • 200 application/json
    Hide response attribute Show response attribute object
    • acknowledged boolean Required

      For a successful response, this value is always true. On failure, an exception is returned instead.

POST /_ccr/auto_follow/{name}/pause
POST /_ccr/auto_follow/my_auto_follow_pattern/pause
resp = client.ccr.pause_auto_follow_pattern(
    name="my_auto_follow_pattern",
)
const response = await client.ccr.pauseAutoFollowPattern({
  name: "my_auto_follow_pattern",
});
response = client.ccr.pause_auto_follow_pattern(
  name: "my_auto_follow_pattern"
)
$resp = $client->ccr()->pauseAutoFollowPattern([
    "name" => "my_auto_follow_pattern",
]);
curl -X POST -H "Authorization: ApiKey $ELASTIC_API_KEY" "$ELASTICSEARCH_URL/_ccr/auto_follow/my_auto_follow_pattern/pause"
client.ccr().pauseAutoFollowPattern(p -> p
    .name("my_auto_follow_pattern")
);
Response examples (200)
A successful response from `POST /_ccr/auto_follow/my_auto_follow_pattern/pause`, which pauses an auto-follow pattern.
{
  "acknowledged" : true
}
















Unfollow an index Generally available; Added in 6.5.0

POST /{index}/_ccr/unfollow

Convert a cross-cluster replication follower index to a regular index. The API stops the following task associated with a follower index and removes index metadata and settings associated with cross-cluster replication. The follower index must be paused and closed before you call the unfollow API.


Currently cross-cluster replication does not support converting an existing regular index to a follower index. Converting a follower index to a regular index is an irreversible operation.

Required authorization

  • Index privileges: manage_follow_index
External documentation

Path parameters

  • index string Required

    The name of the follower index.

Query parameters

  • master_timeout string

    The period to wait for a connection to the master node. If the master node is not available before the timeout expires, the request fails and returns an error. It can also be set to -1 to indicate that the request should never timeout.

    Values are -1 or 0.

Responses

  • 200 application/json
    Hide response attribute Show response attribute object
    • acknowledged boolean Required

      For a successful response, this value is always true. On failure, an exception is returned instead.

POST /{index}/_ccr/unfollow
POST /follower_index/_ccr/unfollow
resp = client.ccr.unfollow(
    index="follower_index",
)
const response = await client.ccr.unfollow({
  index: "follower_index",
});
response = client.ccr.unfollow(
  index: "follower_index"
)
$resp = $client->ccr()->unfollow([
    "index" => "follower_index",
]);
curl -X POST -H "Authorization: ApiKey $ELASTIC_API_KEY" "$ELASTICSEARCH_URL/follower_index/_ccr/unfollow"
client.ccr().unfollow(u -> u
    .index("follower_index")
);
Response examples (200)
A successful response from `POST /follower_index/_ccr/unfollow`.
{
  "acknowledged" : true
}




Create a data stream Generally available; Added in 7.9.0

PUT /_data_stream/{name}

You must have a matching index template with data stream enabled.

Required authorization

  • Index privileges: create_index

Path parameters

  • name string Required

    Name of the data stream, which must meet the following criteria: Lowercase only; Cannot include \, /, *, ?, ", <, >, |, ,, #, :, or a space character; Cannot start with -, _, +, or .ds-; Cannot be . or ..; Cannot be longer than 255 bytes. Multi-byte characters count towards this limit faster.

Query parameters

  • master_timeout string

    Period to wait for a connection to the master node. If no response is received before the timeout expires, the request fails and returns an error.

    Values are -1 or 0.

  • timeout string

    Period to wait for a response. If no response is received before the timeout expires, the request fails and returns an error.

    Values are -1 or 0.

Responses

  • 200 application/json
    Hide response attribute Show response attribute object
    • acknowledged boolean Required

      For a successful response, this value is always true. On failure, an exception is returned instead.

PUT /_data_stream/{name}
PUT _data_stream/logs-foo-bar
resp = client.indices.create_data_stream(
    name="logs-foo-bar",
)
const response = await client.indices.createDataStream({
  name: "logs-foo-bar",
});
response = client.indices.create_data_stream(
  name: "logs-foo-bar"
)
$resp = $client->indices()->createDataStream([
    "name" => "logs-foo-bar",
]);
curl -X PUT -H "Authorization: ApiKey $ELASTIC_API_KEY" "$ELASTICSEARCH_URL/_data_stream/logs-foo-bar"
client.indices().createDataStream(c -> c
    .name("logs-foo-bar")
);









































































Get a document's source Generally available

GET /{index}/_source/{id}

Get the source of a document. For example:

GET my-index-000001/_source/1

You can use the source filtering parameters to control which parts of the _source are returned:

GET my-index-000001/_source/1/?_source_includes=*.id&_source_excludes=entities

Required authorization

  • Index privileges: read
External documentation

Path parameters

  • index string Required

    The name of the index that contains the document.

  • id string Required

    A unique document identifier.

Query parameters

  • preference string

    The node or shard the operation should be performed on. By default, the operation is randomized between the shard replicas.

  • realtime boolean

    If true, the request is real-time as opposed to near-real-time.

  • refresh boolean

    If true, the request refreshes the relevant shards before retrieving the document. Setting it to true should be done after careful thought and verification that this does not cause a heavy load on the system (and slow down indexing).

  • routing string

    A custom value used to route operations to a specific shard.

  • _source boolean | string | array[string]

    Indicates whether to return the _source field (true or false) or lists the fields to return.

  • _source_excludes string | array[string]

    A comma-separated list of source fields to exclude in the response.

  • _source_includes string | array[string]

    A comma-separated list of source fields to include in the response.

  • version number

    The version number for concurrency control. It must match the current version of the document for the request to succeed.

  • version_type string

    The version type.

    Supported values include:

    • internal: Use internal versioning that starts at 1 and increments with each update or delete.
    • external: Only index the document if the specified version is strictly higher than the version of the stored document or if there is no existing document.
    • external_gte: Only index the document if the specified version is equal or higher than the version of the stored document or if there is no existing document. NOTE: The external_gte version type is meant for special use cases and should be used with care. If used incorrectly, it can result in loss of data.
    • force: This option is deprecated because it can cause primary and replica shards to diverge.

    Values are internal, external, external_gte, or force.

Responses

  • 200 application/json
GET /{index}/_source/{id}
GET my-index-000001/_source/1
resp = client.get_source(
    index="my-index-000001",
    id="1",
)
const response = await client.getSource({
  index: "my-index-000001",
  id: 1,
});
response = client.get_source(
  index: "my-index-000001",
  id: "1"
)
$resp = $client->getSource([
    "index" => "my-index-000001",
    "id" => "1",
]);
curl -X GET -H "Authorization: ApiKey $ELASTIC_API_KEY" "$ELASTICSEARCH_URL/my-index-000001/_source/1"
client.getSource(g -> g
    .id("1")
    .index("my-index-000001")
);

Check for a document source Generally available; Added in 5.4.0

HEAD /{index}/_source/{id}

Check whether a document source exists in an index. For example:

HEAD my-index-000001/_source/1

A document's source is not available if it is disabled in the mapping.

Required authorization

  • Index privileges: read
External documentation

Path parameters

  • index string Required

    A comma-separated list of data streams, indices, and aliases. It supports wildcards (*).

  • id string Required

    A unique identifier for the document.

Query parameters

  • preference string

    The node or shard the operation should be performed on. By default, the operation is randomized between the shard replicas.

  • realtime boolean

    If true, the request is real-time as opposed to near-real-time.

  • refresh boolean

    If true, the request refreshes the relevant shards before retrieving the document. Setting it to true should be done after careful thought and verification that this does not cause a heavy load on the system (and slow down indexing).

  • routing string

    A custom value used to route operations to a specific shard.

  • _source boolean | string | array[string]

    Indicates whether to return the _source field (true or false) or lists the fields to return.

  • _source_excludes string | array[string]

    A comma-separated list of source fields to exclude in the response.

  • _source_includes string | array[string]

    A comma-separated list of source fields to include in the response.

  • version number

    The version number for concurrency control. It must match the current version of the document for the request to succeed.

  • version_type string

    The version type.

    Supported values include:

    • internal: Use internal versioning that starts at 1 and increments with each update or delete.
    • external: Only index the document if the specified version is strictly higher than the version of the stored document or if there is no existing document.
    • external_gte: Only index the document if the specified version is equal or higher than the version of the stored document or if there is no existing document. NOTE: The external_gte version type is meant for special use cases and should be used with care. If used incorrectly, it can result in loss of data.
    • force: This option is deprecated because it can cause primary and replica shards to diverge.

    Values are internal, external, external_gte, or force.

Responses

  • 200 application/json
HEAD /{index}/_source/{id}
HEAD my-index-000001/_source/1
resp = client.exists_source(
    index="my-index-000001",
    id="1",
)
const response = await client.existsSource({
  index: "my-index-000001",
  id: 1,
});
response = client.exists_source(
  index: "my-index-000001",
  id: "1"
)
$resp = $client->existsSource([
    "index" => "my-index-000001",
    "id" => "1",
]);
curl --head -H "Authorization: ApiKey $ELASTIC_API_KEY" "$ELASTICSEARCH_URL/my-index-000001/_source/1"
client.existsSource(e -> e
    .id("1")
    .index("my-index-000001")
);
















Get term vector information Generally available

POST /{index}/_termvectors/{id}

All methods and paths for this operation:

GET /{index}/_termvectors

POST /{index}/_termvectors
GET /{index}/_termvectors/{id}
POST /{index}/_termvectors/{id}

Get information and statistics about terms in the fields of a particular document.

You can retrieve term vectors for documents stored in the index or for artificial documents passed in the body of the request. You can specify the fields you are interested in through the fields parameter or by adding the fields to the request body. For example:

GET /my-index-000001/_termvectors/1?fields=message

Fields can be specified using wildcards, similar to the multi match query.

Term vectors are real-time by default, not near real-time. This can be changed by setting realtime parameter to false.

You can request three types of values: term information, term statistics, and field statistics. By default, all term information and field statistics are returned for all fields but term statistics are excluded.

Term information

  • term frequency in the field (always returned)
  • term positions (positions: true)
  • start and end offsets (offsets: true)
  • term payloads (payloads: true), as base64 encoded bytes

If the requested information wasn't stored in the index, it will be computed on the fly if possible. Additionally, term vectors could be computed for documents not even existing in the index, but instead provided by the user.


Start and end offsets assume UTF-16 encoding is being used. If you want to use these offsets in order to get the original text that produced this token, you should make sure that the string you are taking a sub-string of is also encoded using UTF-16.

Behaviour

The term and field statistics are not accurate. Deleted documents are not taken into account. The information is only retrieved for the shard the requested document resides in. The term and field statistics are therefore only useful as relative measures whereas the absolute numbers have no meaning in this context. By default, when requesting term vectors of artificial documents, a shard to get the statistics from is randomly selected. Use routing only to hit a particular shard.

Required authorization

  • Index privileges: read

Path parameters

  • index string Required

    The name of the index that contains the document.

  • id string Required

    A unique identifier for the document.

Query parameters

  • fields string | array[string]

    A comma-separated list or wildcard expressions of fields to include in the statistics. It is used as the default list unless a specific field list is provided in the completion_fields or fielddata_fields parameters.

  • field_statistics boolean

    If true, the response includes:

    • The document count (how many documents contain this field).
    • The sum of document frequencies (the sum of document frequencies for all terms in this field).
    • The sum of total term frequencies (the sum of total term frequencies of each term in this field).
  • offsets boolean

    If true, the response includes term offsets.

  • payloads boolean

    If true, the response includes term payloads.

  • positions boolean

    If true, the response includes term positions.

  • preference string

    The node or shard the operation should be performed on. It is random by default.

  • realtime boolean

    If true, the request is real-time as opposed to near-real-time.

  • routing string

    A custom value that is used to route operations to a specific shard.

  • term_statistics boolean

    If true, the response includes:

    • The total term frequency (how often a term occurs in all documents).
    • The document frequency (the number of documents containing the current term).

    By default these values are not returned since term statistics can have a serious performance impact.

  • version number

    If true, returns the document version as part of a hit.

  • version_type string

    The version type.

    Supported values include:

    • internal: Use internal versioning that starts at 1 and increments with each update or delete.
    • external: Only index the document if the specified version is strictly higher than the version of the stored document or if there is no existing document.
    • external_gte: Only index the document if the specified version is equal or higher than the version of the stored document or if there is no existing document. NOTE: The external_gte version type is meant for special use cases and should be used with care. If used incorrectly, it can result in loss of data.
    • force: This option is deprecated because it can cause primary and replica shards to diverge.

    Values are internal, external, external_gte, or force.

application/json

Body

  • doc object

    An artificial document (a document not present in the index) for which you want to retrieve term vectors.

  • filter object

    Filter terms based on their tf-idf scores. This could be useful in order find out a good characteristic vector of a document. This feature works in a similar manner to the second phase of the More Like This Query.

    Hide filter attributes Show filter attributes object
    • max_doc_freq number

      Ignore words which occur in more than this many docs. Defaults to unbounded.

    • max_num_terms number

      The maximum number of terms that must be returned per field.

      Default value is 25.

    • max_term_freq number

      Ignore words with more than this frequency in the source doc. It defaults to unbounded.

    • max_word_length number

      The maximum word length above which words will be ignored. Defaults to unbounded.

      Default value is 0.

    • min_doc_freq number

      Ignore terms which do not occur in at least this many docs.

      Default value is 1.

    • min_term_freq number

      Ignore words with less than this frequency in the source doc.

      Default value is 1.

    • min_word_length number

      The minimum word length below which words will be ignored.

      Default value is 0.

  • per_field_analyzer object

    Override the default per-field analyzer. This is useful in order to generate term vectors in any fashion, especially when using artificial documents. When providing an analyzer for a field that already stores term vectors, the term vectors will be regenerated.

    Hide per_field_analyzer attribute Show per_field_analyzer attribute object
    • * string Additional properties
  • fields array[string]

    A list of fields to include in the statistics. It is used as the default list unless a specific field list is provided in the completion_fields or fielddata_fields parameters.

  • field_statistics boolean

    If true, the response includes:

    • The document count (how many documents contain this field).
    • The sum of document frequencies (the sum of document frequencies for all terms in this field).
    • The sum of total term frequencies (the sum of total term frequencies of each term in this field).

    Default value is true.

  • offsets boolean

    If true, the response includes term offsets.

    Default value is true.

  • payloads boolean

    If true, the response includes term payloads.

    Default value is true.

  • positions boolean

    If true, the response includes term positions.

    Default value is true.

  • term_statistics boolean

    If true, the response includes:

    • The total term frequency (how often a term occurs in all documents).
    • The document frequency (the number of documents containing the current term).

    By default these values are not returned since term statistics can have a serious performance impact.

    Default value is false.

  • routing string

    A custom value that is used to route operations to a specific shard.

  • version number

    If true, returns the document version as part of a hit.

  • version_type string

    The version type.

    Supported values include:

    • internal: Use internal versioning that starts at 1 and increments with each update or delete.
    • external: Only index the document if the specified version is strictly higher than the version of the stored document or if there is no existing document.
    • external_gte: Only index the document if the specified version is equal or higher than the version of the stored document or if there is no existing document. NOTE: The external_gte version type is meant for special use cases and should be used with care. If used incorrectly, it can result in loss of data.
    • force: This option is deprecated because it can cause primary and replica shards to diverge.

    Values are internal, external, external_gte, or force.

Responses

  • 200 application/json
    Hide response attributes Show response attributes object
    • found boolean Required
    • _id string
    • _index string Required
    • term_vectors object
      Hide term_vectors attribute Show term_vectors attribute object
      • * object Additional properties
        Hide * attributes Show * attributes object
        • field_statistics object
          Hide field_statistics attributes Show field_statistics attributes object
          • doc_count number Required
          • sum_doc_freq number Required
          • sum_ttf number Required
        • terms object Required
          Hide terms attribute Show terms attribute object
          • * object Additional properties
            Hide * attributes Show * attributes object
            • doc_freq number
            • score number
            • term_freq number Required
            • tokens array[object]
            • ttf number
    • took number Required
    • _version number Required
POST /{index}/_termvectors/{id}
GET /my-index-000001/_termvectors/1
{
  "fields" : ["text"],
  "offsets" : true,
  "payloads" : true,
  "positions" : true,
  "term_statistics" : true,
  "field_statistics" : true
}
resp = client.termvectors(
    index="my-index-000001",
    id="1",
    fields=[
        "text"
    ],
    offsets=True,
    payloads=True,
    positions=True,
    term_statistics=True,
    field_statistics=True,
)
const response = await client.termvectors({
  index: "my-index-000001",
  id: 1,
  fields: ["text"],
  offsets: true,
  payloads: true,
  positions: true,
  term_statistics: true,
  field_statistics: true,
});
response = client.termvectors(
  index: "my-index-000001",
  id: "1",
  body: {
    "fields": [
      "text"
    ],
    "offsets": true,
    "payloads": true,
    "positions": true,
    "term_statistics": true,
    "field_statistics": true
  }
)
$resp = $client->termvectors([
    "index" => "my-index-000001",
    "id" => "1",
    "body" => [
        "fields" => array(
            "text",
        ),
        "offsets" => true,
        "payloads" => true,
        "positions" => true,
        "term_statistics" => true,
        "field_statistics" => true,
    ],
]);
curl -X GET -H "Authorization: ApiKey $ELASTIC_API_KEY" -H "Content-Type: application/json" -d '{"fields":["text"],"offsets":true,"payloads":true,"positions":true,"term_statistics":true,"field_statistics":true}' "$ELASTICSEARCH_URL/my-index-000001/_termvectors/1"
client.termvectors(t -> t
    .fieldStatistics(true)
    .fields("text")
    .id("1")
    .index("my-index-000001")
    .offsets(true)
    .payloads(true)
    .positions(true)
    .termStatistics(true)
);
Request examples
Run `GET /my-index-000001/_termvectors/1` to return all information and statistics for field `text` in document 1.
{
  "fields" : ["text"],
  "offsets" : true,
  "payloads" : true,
  "positions" : true,
  "term_statistics" : true,
  "field_statistics" : true
}
Run `GET /my-index-000001/_termvectors/1` to set per-field analyzers. A different analyzer than the one at the field may be provided by using the `per_field_analyzer` parameter.
{
  "doc" : {
    "fullname" : "John Doe",
    "text" : "test test test"
  },
  "fields": ["fullname"],
  "per_field_analyzer" : {
    "fullname": "keyword"
  }
}
Run `GET /imdb/_termvectors` to filter the terms returned based on their tf-idf scores. It returns the three most "interesting" keywords from the artificial document having the given "plot" field value. Notice that the keyword "Tony" or any stop words are not part of the response, as their tf-idf must be too low.
{
  "doc": {
    "plot": "When wealthy industrialist Tony Stark is forced to build an armored suit after a life-threatening incident, he ultimately decides to use its technology to fight against evil."
  },
  "term_statistics": true,
  "field_statistics": true,
  "positions": false,
  "offsets": false,
  "filter": {
    "max_num_terms": 3,
    "min_term_freq": 1,
    "min_doc_freq": 1
  }
}
Run `GET /my-index-000001/_termvectors/1`. Term vectors which are not explicitly stored in the index are automatically computed on the fly. This request returns all information and statistics for the fields in document 1, even though the terms haven't been explicitly stored in the index. Note that for the field text, the terms are not regenerated.
{
  "fields" : ["text", "some_field_without_term_vectors"],
  "offsets" : true,
  "positions" : true,
  "term_statistics" : true,
  "field_statistics" : true
}
Run `GET /my-index-000001/_termvectors`. Term vectors can be generated for artificial documents, that is for documents not present in the index. If dynamic mapping is turned on (default), the document fields not in the original mapping will be dynamically created.
{
  "doc" : {
    "fullname" : "John Doe",
    "text" : "test test test"
  }
}
Response examples (200)
A successful response from `GET /my-index-000001/_termvectors/1`.
{
  "_index": "my-index-000001",
  "_id": "1",
  "_version": 1,
  "found": true,
  "took": 6,
  "term_vectors": {
    "text": {
      "field_statistics": {
        "sum_doc_freq": 4,
        "doc_count": 2,
        "sum_ttf": 6
      },
      "terms": {
        "test": {
          "doc_freq": 2,
          "ttf": 4,
          "term_freq": 3,
          "tokens": [
            {
              "position": 0,
              "start_offset": 0,
              "end_offset": 4,
              "payload": "d29yZA=="
            },
            {
              "position": 1,
              "start_offset": 5,
              "end_offset": 9,
              "payload": "d29yZA=="
            },
            {
              "position": 2,
              "start_offset": 10,
              "end_offset": 14,
              "payload": "d29yZA=="
            }
          ]
        }
      }
    }
  }
}
A successful response from `GET /my-index-000001/_termvectors` with `per_field_analyzer` in the request body.
{
  "_index": "my-index-000001",
  "_version": 0,
  "found": true,
  "took": 6,
  "term_vectors": {
    "fullname": {
      "field_statistics": {
          "sum_doc_freq": 2,
          "doc_count": 4,
          "sum_ttf": 4
      },
      "terms": {
          "John Doe": {
            "term_freq": 1,
            "tokens": [
                {
                  "position": 0,
                  "start_offset": 0,
                  "end_offset": 8
                }
            ]
          }
      }
    }
  }
}
A successful response from `GET /my-index-000001/_termvectors` with a `filter` in the request body.
{
  "_index": "imdb",
  "_version": 0,
  "found": true,
  "term_vectors": {
      "plot": {
        "field_statistics": {
            "sum_doc_freq": 3384269,
            "doc_count": 176214,
            "sum_ttf": 3753460
        },
        "terms": {
            "armored": {
              "doc_freq": 27,
              "ttf": 27,
              "term_freq": 1,
              "score": 9.74725
            },
            "industrialist": {
              "doc_freq": 88,
              "ttf": 88,
              "term_freq": 1,
              "score": 8.590818
            },
            "stark": {
              "doc_freq": 44,
              "ttf": 47,
              "term_freq": 1,
              "score": 9.272792
            }
        }
      }
  }
}

























Run an enrich policy Generally available; Added in 7.5.0

PUT /_enrich/policy/{name}/_execute

Create the enrich index for an existing enrich policy.

Path parameters

  • name string Required

    Enrich policy to execute.

Query parameters

  • master_timeout string

    Period to wait for a connection to the master node.

    Values are -1 or 0.

  • wait_for_completion boolean

    If true, the request blocks other enrich policy execution requests until complete.

Responses

  • 200 application/json
    Hide response attributes Show response attributes object
    • status object
      Hide status attributes Show status attributes object
      • phase string Required

        Values are SCHEDULED, RUNNING, COMPLETE, FAILED, or CANCELLED.

      • step string
    • task string | number

PUT /_enrich/policy/{name}/_execute
PUT /_enrich/policy/my-policy/_execute?wait_for_completion=false
resp = client.enrich.execute_policy(
    name="my-policy",
    wait_for_completion=False,
)
const response = await client.enrich.executePolicy({
  name: "my-policy",
  wait_for_completion: "false",
});
response = client.enrich.execute_policy(
  name: "my-policy",
  wait_for_completion: "false"
)
$resp = $client->enrich()->executePolicy([
    "name" => "my-policy",
    "wait_for_completion" => "false",
]);
curl -X PUT -H "Authorization: ApiKey $ELASTIC_API_KEY" "$ELASTICSEARCH_URL/_enrich/policy/my-policy/_execute?wait_for_completion=false"
client.enrich().executePolicy(e -> e
    .name("my-policy")
    .waitForCompletion(false)
);









Delete an async EQL search Generally available; Added in 7.9.0

DELETE /_eql/search/{id}

Delete an async EQL search or a stored synchronous EQL search. The API also deletes results for the search.

Path parameters

  • id string Required

    Identifier for the search to delete. A search ID is provided in the EQL search API's response for an async search. A search ID is also provided if the request’s keep_on_completion parameter is true.

Responses

  • 200 application/json
    Hide response attribute Show response attribute object
    • acknowledged boolean Required

      For a successful response, this value is always true. On failure, an exception is returned instead.

DELETE /_eql/search/{id}
DELETE /_eql/search/FmNJRUZ1YWZCU3dHY1BIOUhaenVSRkEaaXFlZ3h4c1RTWFNocDdnY2FSaERnUTozNDE=
resp = client.eql.delete(
    id="FmNJRUZ1YWZCU3dHY1BIOUhaenVSRkEaaXFlZ3h4c1RTWFNocDdnY2FSaERnUTozNDE=",
)
const response = await client.eql.delete({
  id: "FmNJRUZ1YWZCU3dHY1BIOUhaenVSRkEaaXFlZ3h4c1RTWFNocDdnY2FSaERnUTozNDE=",
});
response = client.eql.delete(
  id: "FmNJRUZ1YWZCU3dHY1BIOUhaenVSRkEaaXFlZ3h4c1RTWFNocDdnY2FSaERnUTozNDE="
)
$resp = $client->eql()->delete([
    "id" => "FmNJRUZ1YWZCU3dHY1BIOUhaenVSRkEaaXFlZ3h4c1RTWFNocDdnY2FSaERnUTozNDE=",
]);
curl -X DELETE -H "Authorization: ApiKey $ELASTIC_API_KEY" "$ELASTICSEARCH_URL/_eql/search/FmNJRUZ1YWZCU3dHY1BIOUhaenVSRkEaaXFlZ3h4c1RTWFNocDdnY2FSaERnUTozNDE="
client.eql().delete(d -> d
    .id("FmNJRUZ1YWZCU3dHY1BIOUhaenVSRkEaaXFlZ3h4c1RTWFNocDdnY2FSaERnUTozNDE=")
);








ES|QL

The Elasticsearch Query Language (ES|QL) provides a powerful way to filter, transform, and analyze data stored in Elasticsearch, and in the future in other runtimes.

Learn more about ES|QL




















































Create or update a component template Generally available; Added in 7.8.0

POST /_component_template/{name}

All methods and paths for this operation:

PUT /_component_template/{name}

POST /_component_template/{name}

Component templates are building blocks for constructing index templates that specify index mappings, settings, and aliases.

An index template can be composed of multiple component templates. To use a component template, specify it in an index template’s composed_of list. Component templates are only applied to new data streams and indices as part of a matching index template.

Settings and mappings specified directly in the index template or the create index request override any settings or mappings specified in a component template.

Component templates are only used during index creation. For data streams, this includes data stream creation and the creation of a stream’s backing indices. Changes to component templates do not affect existing indices, including a stream’s backing indices.

You can use C-style /* *\/ block comments in component templates. You can include comments anywhere in the request body except before the opening curly bracket.

Applying component templates

You cannot directly apply a component template to a data stream or index. To be applied, a component template must be included in an index template's composed_of list.

Required authorization

  • Cluster privileges: manage_index_templates

Path parameters

  • name string Required

    Name of the component template to create. Elasticsearch includes the following built-in component templates: logs-mappings; logs-settings; metrics-mappings; metrics-settings;synthetics-mapping; synthetics-settings. Elastic Agent uses these templates to configure backing indices for its data streams. If you use Elastic Agent and want to overwrite one of these templates, set the version for your replacement template higher than the current version. If you don’t use Elastic Agent and want to disable all built-in component and index templates, set stack.templates.enabled to false using the cluster update settings API.

Query parameters

  • create boolean

    If true, this request cannot replace or update existing component templates.

  • cause string

    User defined reason for create the component template.

  • master_timeout string

    Period to wait for a connection to the master node. If no response is received before the timeout expires, the request fails and returns an error.

    Values are -1 or 0.

application/json

Body Required

  • template object Required

    The template to be applied which includes mappings, settings, or aliases configuration.

    Hide template attributes Show template attributes object
    • aliases object
      Hide aliases attribute Show aliases attribute object
      • * object Additional properties
        Hide * attributes Show * attributes object
        • filter object

          Query used to limit documents the alias can access.

          External documentation
        • index_routing string

          Value used to route indexing operations to a specific shard. If specified, this overwrites the routing value for indexing operations.

        • is_hidden boolean

          If true, the alias is hidden. All indices for the alias must have the same is_hidden value.

          Default value is false.

        • is_write_index boolean

          If true, the index is the write index for the alias.

          Default value is false.

        • routing string

          Value used to route indexing and search operations to a specific shard.

        • search_routing string

          Value used to route search operations to a specific shard. If specified, this overwrites the routing value for search operations.

    • mappings object
      Hide mappings attributes Show mappings attributes object
      • all_field object
        Hide all_field attributes Show all_field attributes object
        • analyzer string Required
        • enabled boolean Required
        • omit_norms boolean Required
        • search_analyzer string Required
        • similarity string Required
        • store boolean Required
        • store_term_vector_offsets boolean Required
        • store_term_vector_payloads boolean Required
        • store_term_vector_positions boolean Required
        • store_term_vectors boolean Required
      • date_detection boolean
      • dynamic string

        Values are strict, runtime, true, or false.

      • dynamic_date_formats array[string]
      • dynamic_templates array[object]
      • _field_names object
        Hide _field_names attribute Show _field_names attribute object
        • enabled boolean Required
      • index_field object
        Hide index_field attribute Show index_field attribute object
        • enabled boolean Required
      • _meta object
        Hide _meta attribute Show _meta attribute object
        • * object Additional properties
      • numeric_detection boolean
      • properties object
      • _routing object
        Hide _routing attribute Show _routing attribute object
        • required boolean Required
      • _size object
        Hide _size attribute Show _size attribute object
        • enabled boolean Required
      • _source object
        Hide _source attributes Show _source attributes object
        • compress boolean
        • compress_threshold string
        • enabled boolean
        • excludes array[string]
        • includes array[string]
      • runtime object
        Hide runtime attribute Show runtime attribute object
        • * object Additional properties
          Hide * attributes Show * attributes object
          • fields object

            For type composite

          • fetch_fields array[object]

            For type lookup

          • format string

            A custom format for date type runtime fields.

      • enabled boolean
      • subobjects string

        Values are true or false.

      • _data_stream_timestamp object
        Hide _data_stream_timestamp attribute Show _data_stream_timestamp attribute object
        • enabled boolean Required
    • settings object
      Index settings
    • defaults object

      Default settings, included when the request's include_default is true.

      Index settings
    • data_stream string
    • lifecycle object

      Data stream lifecycle applicable if this is a data stream.

      Hide lifecycle attributes Show lifecycle attributes object
      • data_retention string

        If defined, every document added to this data stream will be stored at least for this time frame. Any time after this duration the document could be deleted. When empty, every document in this data stream will be stored indefinitely.

      • downsampling object

        The downsampling configuration to execute for the managed backing index after rollover.

        Hide downsampling attribute Show downsampling attribute object
        • rounds array[object] Required

          The list of downsampling rounds to execute as part of this downsampling configuration

      • enabled boolean

        If defined, it turns data stream lifecycle on/off (true/false) for this data stream. A data stream lifecycle that's disabled (enabled: false) will have no effect on the data stream.

        Default value is true.

  • version number

    Version number used to manage component templates externally. This number isn't automatically generated or incremented by Elasticsearch. To unset a version, replace the template without specifying a version.

  • _meta object

    Optional user metadata about the component template. It may have any contents. This map is not automatically generated by Elasticsearch. This information is stored in the cluster state, so keeping it short is preferable. To unset _meta, replace the template without specifying this information.

    Hide _meta attribute Show _meta attribute object
    • * object Additional properties
  • deprecated boolean

    Marks this index template as deprecated. When creating or updating a non-deprecated index template that uses deprecated components, Elasticsearch will emit a deprecation warning.

Responses

  • 200 application/json
    Hide response attribute Show response attribute object
    • acknowledged boolean Required

      For a successful response, this value is always true. On failure, an exception is returned instead.

POST /_component_template/{name}
PUT _component_template/template_1
{
  "template": {
    "settings": {
      "number_of_shards": 1
    },
    "mappings": {
      "_source": {
        "enabled": false
      },
      "properties": {
        "host_name": {
          "type": "keyword"
        },
        "created_at": {
          "type": "date",
          "format": "EEE MMM dd HH:mm:ss Z yyyy"
        }
      }
    }
  }
}
resp = client.cluster.put_component_template(
    name="template_1",
    template={
        "settings": {
            "number_of_shards": 1
        },
        "mappings": {
            "_source": {
                "enabled": False
            },
            "properties": {
                "host_name": {
                    "type": "keyword"
                },
                "created_at": {
                    "type": "date",
                    "format": "EEE MMM dd HH:mm:ss Z yyyy"
                }
            }
        }
    },
)
const response = await client.cluster.putComponentTemplate({
  name: "template_1",
  template: {
    settings: {
      number_of_shards: 1,
    },
    mappings: {
      _source: {
        enabled: false,
      },
      properties: {
        host_name: {
          type: "keyword",
        },
        created_at: {
          type: "date",
          format: "EEE MMM dd HH:mm:ss Z yyyy",
        },
      },
    },
  },
});
response = client.cluster.put_component_template(
  name: "template_1",
  body: {
    "template": {
      "settings": {
        "number_of_shards": 1
      },
      "mappings": {
        "_source": {
          "enabled": false
        },
        "properties": {
          "host_name": {
            "type": "keyword"
          },
          "created_at": {
            "type": "date",
            "format": "EEE MMM dd HH:mm:ss Z yyyy"
          }
        }
      }
    }
  }
)
$resp = $client->cluster()->putComponentTemplate([
    "name" => "template_1",
    "body" => [
        "template" => [
            "settings" => [
                "number_of_shards" => 1,
            ],
            "mappings" => [
                "_source" => [
                    "enabled" => false,
                ],
                "properties" => [
                    "host_name" => [
                        "type" => "keyword",
                    ],
                    "created_at" => [
                        "type" => "date",
                        "format" => "EEE MMM dd HH:mm:ss Z yyyy",
                    ],
                ],
            ],
        ],
    ],
]);
curl -X PUT -H "Authorization: ApiKey $ELASTIC_API_KEY" -H "Content-Type: application/json" -d '{"template":{"settings":{"number_of_shards":1},"mappings":{"_source":{"enabled":false},"properties":{"host_name":{"type":"keyword"},"created_at":{"type":"date","format":"EEE MMM dd HH:mm:ss Z yyyy"}}}}}' "$ELASTICSEARCH_URL/_component_template/template_1"
Request examples
{
  "template": {
    "settings": {
      "number_of_shards": 1
    },
    "mappings": {
      "_source": {
        "enabled": false
      },
      "properties": {
        "host_name": {
          "type": "keyword"
        },
        "created_at": {
          "type": "date",
          "format": "EEE MMM dd HH:mm:ss Z yyyy"
        }
      }
    }
  }
}
You can include index aliases in a component template. During index creation, the `{index}` placeholder in the alias name will be replaced with the actual index name that the template gets applied to.
{
  "template": null,
  "settings": {
    "number_of_shards": 1
  },
  "aliases": {
    "alias1": {},
    "alias2": {
      "filter": {
        "term": {
          "user.id": "kimchy"
        }
      },
      "routing": "shard-1"
    },
    "{index}-alias": {}
  }
}
















































Delete indices Generally available

DELETE /{index}

Deleting an index deletes its documents, shards, and metadata. It does not delete related Kibana components, such as data views, visualizations, or dashboards.

You cannot delete the current write index of a data stream. To delete the index, you must roll over the data stream so a new write index is created. You can then use the delete index API to delete the previous write index.

Required authorization

  • Index privileges: delete_index

Path parameters

  • index string | array[string] Required

    Comma-separated list of indices to delete. You cannot specify index aliases. By default, this parameter does not support wildcards (*) or _all. To use wildcards or _all, set the action.destructive_requires_name cluster setting to false.

Query parameters

  • allow_no_indices boolean

    If false, the request returns an error if any wildcard expression, index alias, or _all value targets only missing or closed indices. This behavior applies even if the request targets other open indices.

  • expand_wildcards string | array[string]

    Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such as open,hidden.

    Supported values include:

    • all: Match any data stream or index, including hidden ones.
    • open: Match open, non-hidden indices. Also matches any non-hidden data stream.
    • closed: Match closed, non-hidden indices. Also matches any non-hidden data stream. Data streams cannot be closed.
    • hidden: Match hidden data streams and hidden indices. Must be combined with open, closed, or both.
    • none: Wildcard expressions are not accepted.

    Values are all, open, closed, hidden, or none.

  • ignore_unavailable boolean

    If false, the request returns an error if it targets a missing or closed index.

  • master_timeout string

    Period to wait for a connection to the master node. If no response is received before the timeout expires, the request fails and returns an error.

    Values are -1 or 0.

  • timeout string

    Period to wait for a response. If no response is received before the timeout expires, the request fails and returns an error.

    Values are -1 or 0.

Responses

  • 200 application/json
    Hide response attributes Show response attributes object
    • acknowledged boolean Required

      For a successful response, this value is always true. On failure, an exception is returned instead.

    • _shards object
      Hide _shards attributes Show _shards attributes object
      • failed number Required

        The number of shards the operation or search attempted to run on but failed.

      • successful number Required

        The number of shards the operation or search succeeded on.

      • total number Required

        The number of shards the operation or search will run on overall.

      • failures array[object]
        Hide failures attributes Show failures attributes object
        • index
        • node string
        • reason
        • shard number
        • status string
        • primary boolean
      • skipped number
DELETE /{index}
DELETE /books
resp = client.indices.delete(
    index="books",
)
const response = await client.indices.delete({
  index: "books",
});
response = client.indices.delete(
  index: "books"
)
$resp = $client->indices()->delete([
    "index" => "books",
]);
curl -X DELETE -H "Authorization: ApiKey $ELASTIC_API_KEY" "$ELASTICSEARCH_URL/books"
client.indices().delete(d -> d
    .index("books")
);








































































Get mapping definitions Generally available

GET /{index}/_mapping/field/{fields}

All methods and paths for this operation:

GET /_mapping/field/{fields}

GET /{index}/_mapping/field/{fields}

Retrieves mapping definitions for one or more fields. For data streams, the API retrieves field mappings for the stream’s backing indices.

This API is useful if you don't need a complete mapping or if an index mapping contains a large number of fields.

Required authorization

  • Index privileges: view_index_metadata

Path parameters

  • index string | array[string] Required

    Comma-separated list of data streams, indices, and aliases used to limit the request. Supports wildcards (*). To target all data streams and indices, omit this parameter or use * or _all.

  • fields string | array[string] Required

    Comma-separated list or wildcard expression of fields used to limit returned information. Supports wildcards (*).

Query parameters

  • allow_no_indices boolean

    If false, the request returns an error if any wildcard expression, index alias, or _all value targets only missing or closed indices. This behavior applies even if the request targets other open indices.

  • expand_wildcards string | array[string]

    Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such as open,hidden.

    Supported values include:

    • all: Match any data stream or index, including hidden ones.
    • open: Match open, non-hidden indices. Also matches any non-hidden data stream.
    • closed: Match closed, non-hidden indices. Also matches any non-hidden data stream. Data streams cannot be closed.
    • hidden: Match hidden data streams and hidden indices. Must be combined with open, closed, or both.
    • none: Wildcard expressions are not accepted.

    Values are all, open, closed, hidden, or none.

  • ignore_unavailable boolean

    If false, the request returns an error if it targets a missing or closed index.

  • include_defaults boolean

    If true, return all default settings in the response.

  • local boolean

    If true, the request retrieves information from the local node only.

Responses

  • 200 application/json
    Hide response attribute Show response attribute object
    • * object Additional properties
      Hide * attribute Show * attribute object
      • mappings object Required
        Hide mappings attribute Show mappings attribute object
        • * object Additional properties
          Hide * attributes Show * attributes object
          • full_name string Required
          • mapping object Required
GET /{index}/_mapping/field/{fields}
GET publications/_mapping/field/title
resp = client.indices.get_field_mapping(
    index="publications",
    fields="title",
)
const response = await client.indices.getFieldMapping({
  index: "publications",
  fields: "title",
});
response = client.indices.get_field_mapping(
  index: "publications",
  fields: "title"
)
$resp = $client->indices()->getFieldMapping([
    "index" => "publications",
    "fields" => "title",
]);
curl -X GET -H "Authorization: ApiKey $ELASTIC_API_KEY" "$ELASTICSEARCH_URL/publications/_mapping/field/title"
client.indices().getFieldMapping(g -> g
    .fields("title")
    .index("publications")
);
Response examples (200)
A sucessful response from `GET publications/_mapping/field/title`, which returns the mapping of a field called `title`.
{
   "publications": {
      "mappings": {
          "title": {
             "full_name": "title",
             "mapping": {
                "title": {
                   "type": "text"
                }
             }
          }
       }
   }
}
A successful response from `GET publications/_mapping/field/author.id,abstract,name`. The get field mapping API also supports wildcard notation.
{
   "publications": {
      "mappings": {
        "author.id": {
           "full_name": "author.id",
           "mapping": {
              "id": {
                 "type": "text"
              }
           }
        },
        "abstract": {
           "full_name": "abstract",
           "mapping": {
              "abstract": {
                 "type": "text"
              }
           }
        }
     }
   }
}
A successful response from `GET publications/_mapping/field/a*`.
{
   "publications": {
      "mappings": {
         "author.name": {
            "full_name": "author.name",
            "mapping": {
               "name": {
                 "type": "text"
               }
            }
         },
         "abstract": {
            "full_name": "abstract",
            "mapping": {
               "abstract": {
                  "type": "text"
               }
            }
         },
         "author.id": {
            "full_name": "author.id",
            "mapping": {
               "id": {
                  "type": "text"
               }
            }
         }
      }
   }
}
































Resolve the cluster Generally available; Added in 8.13.0

GET /_resolve/cluster/{name}

All methods and paths for this operation:

GET /_resolve/cluster

GET /_resolve/cluster/{name}

Resolve the specified index expressions to return information about each cluster, including the local "querying" cluster, if included. If no index expression is provided, the API will return information about all the remote clusters that are configured on the querying cluster.

This endpoint is useful before doing a cross-cluster search in order to determine which remote clusters should be included in a search.

You use the same index expression with this endpoint as you would for cross-cluster search. Index and cluster exclusions are also supported with this endpoint.

For each cluster in the index expression, information is returned about:

  • Whether the querying ("local") cluster is currently connected to each remote cluster specified in the index expression. Note that this endpoint actively attempts to contact the remote clusters, unlike the remote/info endpoint.
  • Whether each remote cluster is configured with skip_unavailable as true or false.
  • Whether there are any indices, aliases, or data streams on that cluster that match the index expression.
  • Whether the search is likely to have errors returned when you do the cross-cluster search (including any authorization errors if you do not have permission to query the index).
  • Cluster version information, including the Elasticsearch server version.

For example, GET /_resolve/cluster/my-index-*,cluster*:my-index-* returns information about the local cluster and all remotely configured clusters that start with the alias cluster*. Each cluster returns information about whether it has any indices, aliases or data streams that match my-index-*.

Note on backwards compatibility

The ability to query without an index expression was added in version 8.18, so when querying remote clusters older than that, the local cluster will send the index expression dummy* to those remote clusters. Thus, if an errors occur, you may see a reference to that index expression even though you didn't request it. If it causes a problem, you can instead include an index expression like *:* to bypass the issue.

You may want to exclude a cluster or index from a search when:

  • A remote cluster is not currently connected and is configured with skip_unavailable=false. Running a cross-cluster search under those conditions will cause the entire search to fail.
  • A cluster has no matching indices, aliases or data streams for the index expression (or your user does not have permissions to search them). For example, suppose your index expression is logs*,remote1:logs* and the remote1 cluster has no indices, aliases or data streams that match logs*. In that case, that cluster will return no results from that cluster if you include it in a cross-cluster search.
  • The index expression (combined with any query parameters you specify) will likely cause an exception to be thrown when you do the search. In these cases, the "error" field in the _resolve/cluster response will be present. (This is also where security/permission errors will be shown.)
  • A remote cluster is an older version that does not support the feature you want to use in your search.

Test availability of remote clusters

The remote/info endpoint is commonly used to test whether the "local" cluster (the cluster being queried) is connected to its remote clusters, but it does not necessarily reflect whether the remote cluster is available or not. The remote cluster may be available, while the local cluster is not currently connected to it.

You can use the _resolve/cluster API to attempt to reconnect to remote clusters. For example with GET _resolve/cluster or GET _resolve/cluster/*:*. The connected field in the response will indicate whether it was successful. If a connection was (re-)established, this will also cause the remote/info endpoint to now indicate a connected status.

Required authorization

  • Index privileges: view_index_metadata

Path parameters

  • name string | array[string] Required

    A comma-separated list of names or index patterns for the indices, aliases, and data streams to resolve. Resources on remote clusters can be specified using the <cluster>:<name> syntax. Index and cluster exclusions (e.g., -cluster1:*) are also supported. If no index expression is specified, information about all remote clusters configured on the local cluster is returned without doing any index matching

Query parameters

  • allow_no_indices boolean

    If false, the request returns an error if any wildcard expression, index alias, or _all value targets only missing or closed indices. This behavior applies even if the request targets other open indices. For example, a request targeting foo*,bar* returns an error if an index starts with foo but no index starts with bar. NOTE: This option is only supported when specifying an index expression. You will get an error if you specify index options to the _resolve/cluster API endpoint that takes no index expression.

  • expand_wildcards string | array[string]

    Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such as open,hidden. NOTE: This option is only supported when specifying an index expression. You will get an error if you specify index options to the _resolve/cluster API endpoint that takes no index expression.

    Supported values include:

    • all: Match any data stream or index, including hidden ones.
    • open: Match open, non-hidden indices. Also matches any non-hidden data stream.
    • closed: Match closed, non-hidden indices. Also matches any non-hidden data stream. Data streams cannot be closed.
    • hidden: Match hidden data streams and hidden indices. Must be combined with open, closed, or both.
    • none: Wildcard expressions are not accepted.

    Values are all, open, closed, hidden, or none.

  • ignore_throttled boolean Deprecated

    If true, concrete, expanded, or aliased indices are ignored when frozen. NOTE: This option is only supported when specifying an index expression. You will get an error if you specify index options to the _resolve/cluster API endpoint that takes no index expression.

  • ignore_unavailable boolean

    If false, the request returns an error if it targets a missing or closed index. NOTE: This option is only supported when specifying an index expression. You will get an error if you specify index options to the _resolve/cluster API endpoint that takes no index expression.

  • timeout string

    The maximum time to wait for remote clusters to respond. If a remote cluster does not respond within this timeout period, the API response will show the cluster as not connected and include an error message that the request timed out.

    The default timeout is unset and the query can take as long as the networking layer is configured to wait for remote clusters that are not responding (typically 30 seconds).

    Values are -1 or 0.

Responses

  • 200 application/json
    Hide response attribute Show response attribute object
    • * object Additional properties

      Provides information about each cluster request relevant to doing a cross-cluster search.

      Hide * attributes Show * attributes object
      • connected boolean Required

        Whether the remote cluster is connected to the local (querying) cluster.

      • skip_unavailable boolean Required

        The skip_unavailable setting for a remote cluster.

      • matching_indices boolean

        Whether the index expression provided in the request matches any indices, aliases or data streams on the cluster.

      • error string

        Provides error messages that are likely to occur if you do a search with this index expression on the specified cluster (for example, lack of security privileges to query an index).

      • version object

        Provides version information about the cluster.

        Hide version attributes Show version attributes object
        • build_flavor string Required
        • minimum_index_compatibility_version string Required
        • minimum_wire_compatibility_version string Required
        • number string Required
GET /_resolve/cluster/{name}
GET /_resolve/cluster/not-present,clust*:my-index*,oldcluster:*?ignore_unavailable=false&timeout=5s
resp = client.indices.resolve_cluster(
    name="not-present,clust*:my-index*,oldcluster:*",
    ignore_unavailable=False,
    timeout="5s",
)
const response = await client.indices.resolveCluster({
  name: "not-present,clust*:my-index*,oldcluster:*",
  ignore_unavailable: "false",
  timeout: "5s",
});
response = client.indices.resolve_cluster(
  name: "not-present,clust*:my-index*,oldcluster:*",
  ignore_unavailable: "false",
  timeout: "5s"
)
$resp = $client->indices()->resolveCluster([
    "name" => "not-present,clust*:my-index*,oldcluster:*",
    "ignore_unavailable" => "false",
    "timeout" => "5s",
]);
curl -X GET -H "Authorization: ApiKey $ELASTIC_API_KEY" "$ELASTICSEARCH_URL/_resolve/cluster/not-present,clust*:my-index*,oldcluster:*?ignore_unavailable=false&timeout=5s"
client.indices().resolveCluster(r -> r
    .ignoreUnavailable(false)
    .name(List.of("not-present","clust*:my-index*","oldcluster:*"))
    .timeout(t -> t
        .offset(5)
    )
);
Response examples (200)
A successful response from `GET /_resolve/cluster/my-index*,clust*:my-index*`. Each cluster has its own response section. The cluster you sent the request to is labelled as "(local)".
{
  "(local)": {
    "connected": true,
    "skip_unavailable": false,
    "matching_indices": true,
    "version": {
      "number": "8.13.0",
      "build_flavor": "default",
      "minimum_wire_compatibility_version": "7.17.0",
      "minimum_index_compatibility_version": "7.0.0"
    }
  },
  "cluster_one": {
    "connected": true,
    "skip_unavailable": true,
    "matching_indices": true,
    "version": {
      "number": "8.13.0",
      "build_flavor": "default",
      "minimum_wire_compatibility_version": "7.17.0",
      "minimum_index_compatibility_version": "7.0.0"
    }
  },
  "cluster_two": {
    "connected": true,
    "skip_unavailable": false,
    "matching_indices": true,
    "version": {
      "number": "8.13.0",
      "build_flavor": "default",
      "minimum_wire_compatibility_version": "7.17.0",
      "minimum_index_compatibility_version": "7.0.0"
    }
  }
}
A successful response from `GET /_resolve/cluster/not-present,clust*:my-index*,oldcluster:*?ignore_unavailable=false&timeout=5s`. This type of request can be used to identify potential problems with your cross-cluster search. Note also that a `timeout` of 5 seconds is sent, which sets the maximum time the query will wait for remote clusters to respond. The local cluster has no index called `not_present`. Searching with `ignore_unavailable=false` would return a "no such index" error. The `cluster_one` remote cluster has no indices that match the pattern `my-index*`. There may be no indices that match the pattern or the index could be closed. The `cluster_two` remote cluster is not connected (the attempt to connect failed). Since this cluster is marked as `skip_unavailable=false`, you should probably exclude this cluster from the search by adding `-cluster_two:*` to the search index expression. For `cluster_three`, the error message indicates that this remote cluster did not respond within the 5-second timeout window specified, so it is also marked as not connected. The `oldcluster` remote cluster shows that it has matching indices, but no version information is included. This indicates that the cluster version predates the introduction of the `_resolve/cluster` API, so you may want to exclude it from your cross-cluster search.
{
  "(local)": {
    "connected": true,
    "skip_unavailable": false,
    "error": "no such index [not_present]"
  },
  "cluster_one": {
    "connected": true,
    "skip_unavailable": true,
    "matching_indices": false,
    "version": {
      "number": "8.13.0",
      "build_flavor": "default",
      "minimum_wire_compatibility_version": "7.17.0",
      "minimum_index_compatibility_version": "7.0.0"
    }
  },
  "cluster_two": {
    "connected": false,
    "skip_unavailable": false
  },
  "cluster_three": {
    "connected": false,
    "skip_unavailable": false,
    "error": "Request timed out before receiving a response from the remote cluster"
  },
  "oldcluster": {
    "connected": true,
    "skip_unavailable": false,
    "matching_indices": true
  }
}





















































Create or update a lifecycle policy Generally available; Added in 6.6.0

PUT /_ilm/policy/{policy}

If the specified policy exists, it is replaced and the policy version is incremented.

NOTE: Only the latest version of the policy is stored, you cannot revert to previous versions.

Required authorization

  • Index privileges: manage
  • Cluster privileges: manage_ilm
External documentation

Path parameters

  • policy string Required

    Identifier for the policy.

Query parameters

  • master_timeout string

    Period to wait for a connection to the master node. If no response is received before the timeout expires, the request fails and returns an error.

    Values are -1 or 0.

  • timeout string

    Period to wait for a response. If no response is received before the timeout expires, the request fails and returns an error.

    Values are -1 or 0.

application/json

Body

  • policy object
    Hide policy attributes Show policy attributes object
    • phases object Required
      Hide phases attributes Show phases attributes object
      • cold object
      • delete object
      • frozen object
      • hot object
      • warm object
    • _meta object

      Arbitrary metadata that is not automatically generated or used by Elasticsearch.

      Hide _meta attribute Show _meta attribute object
      • * object Additional properties

Responses

  • 200 application/json
    Hide response attribute Show response attribute object
    • acknowledged boolean Required

      For a successful response, this value is always true. On failure, an exception is returned instead.

PUT /_ilm/policy/{policy}
PUT _ilm/policy/my_policy
{
  "policy": {
    "_meta": {
      "description": "used for nginx log",
      "project": {
        "name": "myProject",
        "department": "myDepartment"
      }
    },
    "phases": {
      "warm": {
        "min_age": "10d",
        "actions": {
          "forcemerge": {
            "max_num_segments": 1
          }
        }
      },
      "delete": {
        "min_age": "30d",
        "actions": {
          "delete": {}
        }
      }
    }
  }
}
resp = client.ilm.put_lifecycle(
    name="my_policy",
    policy={
        "_meta": {
            "description": "used for nginx log",
            "project": {
                "name": "myProject",
                "department": "myDepartment"
            }
        },
        "phases": {
            "warm": {
                "min_age": "10d",
                "actions": {
                    "forcemerge": {
                        "max_num_segments": 1
                    }
                }
            },
            "delete": {
                "min_age": "30d",
                "actions": {
                    "delete": {}
                }
            }
        }
    },
)
const response = await client.ilm.putLifecycle({
  name: "my_policy",
  policy: {
    _meta: {
      description: "used for nginx log",
      project: {
        name: "myProject",
        department: "myDepartment",
      },
    },
    phases: {
      warm: {
        min_age: "10d",
        actions: {
          forcemerge: {
            max_num_segments: 1,
          },
        },
      },
      delete: {
        min_age: "30d",
        actions: {
          delete: {},
        },
      },
    },
  },
});
response = client.ilm.put_lifecycle(
  policy: "my_policy",
  body: {
    "policy": {
      "_meta": {
        "description": "used for nginx log",
        "project": {
          "name": "myProject",
          "department": "myDepartment"
        }
      },
      "phases": {
        "warm": {
          "min_age": "10d",
          "actions": {
            "forcemerge": {
              "max_num_segments": 1
            }
          }
        },
        "delete": {
          "min_age": "30d",
          "actions": {
            "delete": {}
          }
        }
      }
    }
  }
)
$resp = $client->ilm()->putLifecycle([
    "policy" => "my_policy",
    "body" => [
        "policy" => [
            "_meta" => [
                "description" => "used for nginx log",
                "project" => [
                    "name" => "myProject",
                    "department" => "myDepartment",
                ],
            ],
            "phases" => [
                "warm" => [
                    "min_age" => "10d",
                    "actions" => [
                        "forcemerge" => [
                            "max_num_segments" => 1,
                        ],
                    ],
                ],
                "delete" => [
                    "min_age" => "30d",
                    "actions" => [
                        "delete" => new ArrayObject([]),
                    ],
                ],
            ],
        ],
    ],
]);
curl -X PUT -H "Authorization: ApiKey $ELASTIC_API_KEY" -H "Content-Type: application/json" -d '{"policy":{"_meta":{"description":"used for nginx log","project":{"name":"myProject","department":"myDepartment"}},"phases":{"warm":{"min_age":"10d","actions":{"forcemerge":{"max_num_segments":1}}},"delete":{"min_age":"30d","actions":{"delete":{}}}}}}' "$ELASTICSEARCH_URL/_ilm/policy/my_policy"
client.ilm().putLifecycle(p -> p
    .name("my_policy")
    .policy(po -> po
        .phases(ph -> ph
            .delete(d -> d
                .actions(a -> a
                    .delete(de -> de)
                )
                .minAge(m -> m
                    .time("30d")
                )
            )
            .warm(w -> w
                .actions(a -> a
                    .forcemerge(f -> f
                        .maxNumSegments(1)
                    )
                )
                .minAge(m -> m
                    .time("10d")
                )
            )
        )
        .meta(Map.of("description", JsonData.fromJson("\"used for nginx log\""),"project", JsonData.fromJson("{\"name\":\"myProject\",\"department\":\"myDepartment\"}")))
    )
);
Request example
Run `PUT _ilm/policy/my_policy` to create a new policy with arbitrary metadata.
{
  "policy": {
    "_meta": {
      "description": "used for nginx log",
      "project": {
        "name": "myProject",
        "department": "myDepartment"
      }
    },
    "phases": {
      "warm": {
        "min_age": "10d",
        "actions": {
          "forcemerge": {
            "max_num_segments": 1
          }
        }
      },
      "delete": {
        "min_age": "30d",
        "actions": {
          "delete": {}
        }
      }
    }
  }
}
Response examples (200)
A successful response when creating a new lifecycle policy.
{
  "acknowledged": true
}
































Stop the ILM plugin Generally available; Added in 6.6.0

POST /_ilm/stop

Halt all lifecycle management operations and stop the index lifecycle management plugin. This is useful when you are performing maintenance on the cluster and need to prevent ILM from performing any actions on your indices.

The API returns as soon as the stop request has been acknowledged, but the plugin might continue to run until in-progress operations complete and the plugin can be safely stopped. Use the get ILM status API to check whether ILM is running.

Required authorization

  • Cluster privileges: manage_ilm

Query parameters

  • master_timeout string

    Period to wait for a connection to the master node. If no response is received before the timeout expires, the request fails and returns an error.

    Values are -1 or 0.

  • timeout string

    Period to wait for a response. If no response is received before the timeout expires, the request fails and returns an error.

    Values are -1 or 0.

Responses

  • 200 application/json
    Hide response attribute Show response attribute object
    • acknowledged boolean Required

      For a successful response, this value is always true. On failure, an exception is returned instead.

POST /_ilm/stop
POST _ilm/stop
resp = client.ilm.stop()
const response = await client.ilm.stop();
response = client.ilm.stop
$resp = $client->ilm()->stop();
curl -X POST -H "Authorization: ApiKey $ELASTIC_API_KEY" "$ELASTICSEARCH_URL/_ilm/stop"
client.ilm().stop(s -> s);
Response examples (200)
A successful response when stopping the ILM plugin.
{
  "acknowledged": true
}





Perform completion inference on the service Generally available; Added in 8.11.0

POST /_inference/completion/{inference_id}

Path parameters

  • inference_id string Required

    The inference Id

Query parameters

  • timeout string

    Specifies the amount of time to wait for the inference request to complete.

    Values are -1 or 0.

application/json

Body

Responses

  • 200 application/json
    Hide response attribute Show response attribute object
    • completion array[object] Required

      The completion result object

      Hide completion attribute Show completion attribute object
      • result string Required
POST /_inference/completion/{inference_id}
POST _inference/completion/openai_chat_completions
{
  "input": "What is Elastic?"
}
resp = client.inference.completion(
    inference_id="openai_chat_completions",
    input="What is Elastic?",
)
const response = await client.inference.completion({
  inference_id: "openai_chat_completions",
  input: "What is Elastic?",
});
response = client.inference.completion(
  inference_id: "openai_chat_completions",
  body: {
    "input": "What is Elastic?"
  }
)
$resp = $client->inference()->completion([
    "inference_id" => "openai_chat_completions",
    "body" => [
        "input" => "What is Elastic?",
    ],
]);
curl -X POST -H "Authorization: ApiKey $ELASTIC_API_KEY" -H "Content-Type: application/json" -d '{"input":"What is Elastic?"}' "$ELASTICSEARCH_URL/_inference/completion/openai_chat_completions"
client.inference().completion(c -> c
    .inferenceId("openai_chat_completions")
    .input("What is Elastic?")
);
Request example
Run `POST _inference/completion/openai_chat_completions` to perform a completion on the example question.
{
  "input": "What is Elastic?"
}
Response examples (200)
A successful response from `POST _inference/completion/openai_chat_completions`.
{
  "completion": [
    {
      "result": "Elastic is a company that provides a range of software solutions for search, logging, security, and analytics. Their flagship product is Elasticsearch, an open-source, distributed search engine that allows users to search, analyze, and visualize large volumes of data in real-time. Elastic also offers products such as Kibana, a data visualization tool, and Logstash, a log management and pipeline tool, as well as various other tools and solutions for data analysis and management."
    }
  ]
}




Create an inference endpoint Generally available; Added in 8.11.0

PUT /_inference/{task_type}/{inference_id}

All methods and paths for this operation:

PUT /_inference/{inference_id}

PUT /_inference/{task_type}/{inference_id}

IMPORTANT: The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.

The following integrations are available through the inference API. You can find the available task types next to the integration name:

  • AlibabaCloud AI Search (completion, rerank, sparse_embedding, text_embedding)
  • Amazon Bedrock (completion, text_embedding)
  • Amazon SageMaker (chat_completion, completion, rerank, sparse_embedding, text_embedding)
  • Anthropic (completion)
  • Azure AI Studio (completion, text_embedding)
  • Azure OpenAI (completion, text_embedding)
  • Cohere (completion, rerank, text_embedding)
  • DeepSeek (chat_completion, completion)
  • Elasticsearch (rerank, sparse_embedding, text_embedding - this service is for built-in models and models uploaded through Eland)
  • ELSER (sparse_embedding)
  • Google AI Studio (completion, text_embedding)
  • Google Vertex AI (chat_completion, completion, rerank, text_embedding)
  • Hugging Face (chat_completion, completion, rerank, text_embedding)
  • JinaAI (rerank, text_embedding)
  • Llama (chat_completion, completion, text_embedding)
  • Mistral (chat_completion, completion, text_embedding)
  • OpenAI (chat_completion, completion, text_embedding)
  • VoyageAI (rerank, text_embedding)
  • Watsonx inference integration (text_embedding)

Required authorization

  • Cluster privileges: manage_inference

Path parameters

  • task_type string Required

    The task type. Refer to the integration list in the API description for the available task types.

    Values are sparse_embedding, text_embedding, rerank, completion, or chat_completion.

  • inference_id string Required

    The inference Id

Query parameters

  • timeout string

    Specifies the amount of time to wait for the inference endpoint to be created.

    Values are -1 or 0.

application/json

Body Required

  • chunking_settings object

    Chunking configuration object

    Hide chunking_settings attributes Show chunking_settings attributes object
    • max_chunk_size number

      The maximum size of a chunk in words. This value cannot be higher than 300 or lower than 20 (for sentence strategy) or 10 (for word strategy).

      Default value is 250.

    • overlap number

      The number of overlapping words for chunks. It is applicable only to a word chunking strategy. This value cannot be higher than half the max_chunk_size value.

      Default value is 100.

    • sentence_overlap number

      The number of overlapping sentences for chunks. It is applicable only for a sentence chunking strategy. It can be either 1 or 0.

      Default value is 1.

    • strategy string

      The chunking strategy: sentence or word.

      Default value is sentence.

  • service string Required

    The service type

  • service_settings object Required

    Settings specific to the service

  • task_settings object

    Task settings specific to the service and task type

Responses

  • 200 application/json
    Hide response attributes Show response attributes object

    Represents an inference endpoint as returned by the GET API

    • chunking_settings object

      Chunking configuration object

      Hide chunking_settings attributes Show chunking_settings attributes object
      • max_chunk_size number

        The maximum size of a chunk in words. This value cannot be higher than 300 or lower than 20 (for sentence strategy) or 10 (for word strategy).

        Default value is 250.

      • overlap number

        The number of overlapping words for chunks. It is applicable only to a word chunking strategy. This value cannot be higher than half the max_chunk_size value.

        Default value is 100.

      • sentence_overlap number

        The number of overlapping sentences for chunks. It is applicable only for a sentence chunking strategy. It can be either 1 or 0.

        Default value is 1.

      • strategy string

        The chunking strategy: sentence or word.

        Default value is sentence.

    • service string Required

      The service type

    • service_settings object Required

      Settings specific to the service

    • task_settings object

      Task settings specific to the service and task type

    • inference_id string Required

      The inference Id

    • task_type string Required

      The task type

      Values are sparse_embedding, text_embedding, rerank, completion, or chat_completion.

PUT /_inference/{task_type}/{inference_id}
PUT _inference/rerank/my-rerank-model
{
 "service": "cohere",
 "service_settings": {
   "model_id": "rerank-english-v3.0",
   "api_key": "{{COHERE_API_KEY}}"
 }
}
resp = client.inference.put(
    task_type="rerank",
    inference_id="my-rerank-model",
    inference_config={
        "service": "cohere",
        "service_settings": {
            "model_id": "rerank-english-v3.0",
            "api_key": "{{COHERE_API_KEY}}"
        }
    },
)
const response = await client.inference.put({
  task_type: "rerank",
  inference_id: "my-rerank-model",
  inference_config: {
    service: "cohere",
    service_settings: {
      model_id: "rerank-english-v3.0",
      api_key: "{{COHERE_API_KEY}}",
    },
  },
});
response = client.inference.put(
  task_type: "rerank",
  inference_id: "my-rerank-model",
  body: {
    "service": "cohere",
    "service_settings": {
      "model_id": "rerank-english-v3.0",
      "api_key": "{{COHERE_API_KEY}}"
    }
  }
)
$resp = $client->inference()->put([
    "task_type" => "rerank",
    "inference_id" => "my-rerank-model",
    "body" => [
        "service" => "cohere",
        "service_settings" => [
            "model_id" => "rerank-english-v3.0",
            "api_key" => "{{COHERE_API_KEY}}",
        ],
    ],
]);
curl -X PUT -H "Authorization: ApiKey $ELASTIC_API_KEY" -H "Content-Type: application/json" -d '{"service":"cohere","service_settings":{"model_id":"rerank-english-v3.0","api_key":"{{COHERE_API_KEY}}"}}' "$ELASTICSEARCH_URL/_inference/rerank/my-rerank-model"
client.inference().put(p -> p
    .inferenceId("my-rerank-model")
    .taskType(TaskType.Rerank)
    .inferenceConfig(i -> i
        .service("cohere")
        .serviceSettings(JsonData.fromJson("{\"model_id\":\"rerank-english-v3.0\",\"api_key\":\"{{COHERE_API_KEY}}\"}"))
    )
);
Request example
An example body for a `PUT _inference/rerank/my-rerank-model` request.
{
 "service": "cohere",
 "service_settings": {
   "model_id": "rerank-english-v3.0",
   "api_key": "{{COHERE_API_KEY}}"
 }
}

Perform inference on the service Generally available; Added in 8.11.0

POST /_inference/{task_type}/{inference_id}

All methods and paths for this operation:

POST /_inference/{inference_id}

POST /_inference/{task_type}/{inference_id}

This API enables you to use machine learning models to perform specific tasks on data that you provide as an input. It returns a response with the results of the tasks. The inference endpoint you use can perform one specific task that has been defined when the endpoint was created with the create inference API.

For details about using this API with a service, such as Amazon Bedrock, Anthropic, or HuggingFace, refer to the service-specific documentation.


The inference APIs enable you to use certain services, such as built-in machine learning models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI, Anthropic, Watsonx.ai, or Hugging Face. For built-in models and models uploaded through Eland, the inference APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the inference APIs to use these models or if you want to use non-NLP models, use the machine learning trained model APIs.

Required authorization

  • Cluster privileges: monitor_inference

Path parameters

  • task_type string Required

    The type of inference task that the model performs.

    Values are sparse_embedding, text_embedding, rerank, completion, or chat_completion.

  • inference_id string Required

    The unique identifier for the inference endpoint.

Query parameters

  • timeout string

    The amount of time to wait for the inference request to complete.

    Values are -1 or 0.

application/json

Body

  • query string

    The query input, which is required only for the rerank task. It is not required for other tasks.

  • input string | array[string] Required

    The text on which you want to perform the inference task. It can be a single string or an array.


    Inference endpoints for the completion task type currently only support a single string as input.

  • input_type string

    Specifies the input data type for the text embedding model. The input_type parameter only applies to Inference Endpoints with the text_embedding task type. Possible values include:

    • SEARCH
    • INGEST
    • CLASSIFICATION
    • CLUSTERING Not all services support all values. Unsupported values will trigger a validation exception. Accepted values depend on the configured inference service, refer to the relevant service-specific documentation for more info.


    The input_type parameter specified on the root level of the request body will take precedence over the input_type parameter specified in task_settings.

  • task_settings object

    Task settings for the individual inference request. These settings are specific to the task type you specified and override the task settings specified when initializing the service.

Responses

  • 200 application/json
    Hide response attributes Show response attributes object
    • text_embedding_bytes array[object]

      The text embedding result object for byte representation

      Hide text_embedding_bytes attribute Show text_embedding_bytes attribute object
      • embedding array[number] Required

        Text Embedding results containing bytes are represented as Dense Vectors of bytes.

    • text_embedding_bits array[object]

      The text embedding result object for byte representation

      Hide text_embedding_bits attribute Show text_embedding_bits attribute object
      • embedding array[number] Required

        Text Embedding results containing bytes are represented as Dense Vectors of bytes.

    • text_embedding array[object]

      The text embedding result object

      Hide text_embedding attribute Show text_embedding attribute object
      • embedding array[number] Required

        Text Embedding results are represented as Dense Vectors of floats.

    • sparse_embedding array[object]
      Hide sparse_embedding attribute Show sparse_embedding attribute object
      • embedding object Required

        Sparse Embedding tokens are represented as a dictionary of string to double.

        Hide embedding attribute Show embedding attribute object
        • * number Additional properties
    • completion array[object]

      The completion result object

      Hide completion attribute Show completion attribute object
      • result string Required
    • rerank array[object]

      The rerank result object representing a single ranked document id: the original index of the document in the request relevance_score: the relevance_score of the document relative to the query text: Optional, the text of the document, if requested

      Hide rerank attributes Show rerank attributes object
      • index number Required
      • relevance_score number Required
      • text string
POST /_inference/{task_type}/{inference_id}
curl \
 --request POST 'http://api.example.com/_inference/{task_type}/{inference_id}' \
 --header "Authorization: $API_KEY" \
 --header "Content-Type: application/json" \
 --data '{"query":"string","input":"string","input_type":"string","task_settings":{}}'








































Create an Elasticsearch inference endpoint Generally available; Added in 8.13.0

PUT /_inference/{task_type}/{elasticsearch_inference_id}

Create an inference endpoint to perform an inference task with the elasticsearch service.


Your Elasticsearch deployment contains preconfigured ELSER and E5 inference endpoints, you only need to create the enpoints using the API if you want to customize the settings.

If you use the ELSER or the E5 model through the elasticsearch service, the API request will automatically download and deploy the model if it isn't downloaded yet.


You might see a 502 bad gateway error in the response when using the Kibana Console. This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the Machine Learning UI. If using the Python client, you can set the timeout parameter to a higher value.

After creating the endpoint, wait for the model deployment to complete before using it. To verify the deployment status, use the get trained model statistics API. Look for "state": "fully_allocated" in the response and ensure that the "allocation_count" matches the "target_allocation_count". Avoid creating multiple endpoints for the same model unless required, as each endpoint consumes significant resources.

Required authorization

  • Cluster privileges: manage_inference

Path parameters

  • task_type string

    The type of the inference task that the model will perform.

    Values are rerank, sparse_embedding, or text_embedding.

  • elasticsearch_inference_id string Required

    The unique identifier of the inference endpoint. The must not match the model_id.

Query parameters

  • timeout string

    Specifies the amount of time to wait for the inference endpoint to be created.

    Values are -1 or 0.

application/json

Body

  • chunking_settings object

    The chunking configuration object.

    Hide chunking_settings attributes Show chunking_settings attributes object
    • max_chunk_size number

      The maximum size of a chunk in words. This value cannot be higher than 300 or lower than 20 (for sentence strategy) or 10 (for word strategy).

      Default value is 250.

    • overlap number

      The number of overlapping words for chunks. It is applicable only to a word chunking strategy. This value cannot be higher than half the max_chunk_size value.

      Default value is 100.

    • sentence_overlap number

      The number of overlapping sentences for chunks. It is applicable only for a sentence chunking strategy. It can be either 1 or 0.

      Default value is 1.

    • strategy string

      The chunking strategy: sentence or word.

      Default value is sentence.

  • service string Required

    The type of service supported for the specified task type. In this case, elasticsearch.

    Value is elasticsearch.

  • service_settings object Required

    Settings used to install the inference model. These settings are specific to the elasticsearch service.

    Hide service_settings attributes Show service_settings attributes object
    • adaptive_allocations object

      Adaptive allocations configuration details. If enabled is true, the number of allocations of the model is set based on the current load the process gets. When the load is high, a new model allocation is automatically created, respecting the value of max_number_of_allocations if it's set. When the load is low, a model allocation is automatically removed, respecting the value of min_number_of_allocations if it's set. If enabled is true, do not set the number of allocations manually.

      Hide adaptive_allocations attributes Show adaptive_allocations attributes object
      • enabled boolean

        Turn on adaptive_allocations.

        Default value is false.

      • max_number_of_allocations number

        The maximum number of allocations to scale to. If set, it must be greater than or equal to min_number_of_allocations.

      • min_number_of_allocations number

        The minimum number of allocations to scale to. If set, it must be greater than or equal to 0. If not defined, the deployment scales to 0.

    • deployment_id string

      The deployment identifier for a trained model deployment. When deployment_id is used the model_id is optional.

    • model_id string Required

      The name of the model to use for the inference task. It can be the ID of a built-in model (for example, .multilingual-e5-small for E5) or a text embedding model that was uploaded by using the Eland client.

      External documentation
    • num_allocations number

      The total number of allocations that are assigned to the model across machine learning nodes. Increasing this value generally increases the throughput. If adaptive allocations are enabled, do not set this value because it's automatically set.

    • num_threads number Required

      The number of threads used by each model allocation during inference. This setting generally increases the speed per inference request. The inference process is a compute-bound process; threads_per_allocations must not exceed the number of available allocated processors per node. The value must be a power of 2. The maximum value is 32.

  • task_settings object

    Settings to configure the inference task. These settings are specific to the task type you specified.

    Hide task_settings attribute Show task_settings attribute object
    • return_documents boolean

      For a rerank task, return the document instead of only the index.

      Default value is true.

Responses

  • 200 application/json
    Hide response attributes Show response attributes object
    • chunking_settings object

      Chunking configuration object

      Hide chunking_settings attributes Show chunking_settings attributes object
      • max_chunk_size number

        The maximum size of a chunk in words. This value cannot be higher than 300 or lower than 20 (for sentence strategy) or 10 (for word strategy).

        Default value is 250.

      • overlap number

        The number of overlapping words for chunks. It is applicable only to a word chunking strategy. This value cannot be higher than half the max_chunk_size value.

        Default value is 100.

      • sentence_overlap number

        The number of overlapping sentences for chunks. It is applicable only for a sentence chunking strategy. It can be either 1 or 0.

        Default value is 1.

      • strategy string

        The chunking strategy: sentence or word.

        Default value is sentence.

    • service string Required

      The service type

    • service_settings object Required

      Settings specific to the service

    • task_settings object

      Task settings specific to the service and task type

    • inference_id string Required

      The inference Id

    • task_type string Required

      The task type

      Values are sparse_embedding, text_embedding, or rerank.

PUT /_inference/{task_type}/{elasticsearch_inference_id}
PUT _inference/sparse_embedding/my-elser-model
{
    "service": "elasticsearch",
    "service_settings": {
        "adaptive_allocations": { 
        "enabled": true,
        "min_number_of_allocations": 1,
        "max_number_of_allocations": 4
        },
        "num_threads": 1,
        "model_id": ".elser_model_2" 
    }
}
resp = client.inference.put(
    task_type="sparse_embedding",
    inference_id="my-elser-model",
    inference_config={
        "service": "elasticsearch",
        "service_settings": {
            "adaptive_allocations": {
                "enabled": True,
                "min_number_of_allocations": 1,
                "max_number_of_allocations": 4
            },
            "num_threads": 1,
            "model_id": ".elser_model_2"
        }
    },
)
const response = await client.inference.put({
  task_type: "sparse_embedding",
  inference_id: "my-elser-model",
  inference_config: {
    service: "elasticsearch",
    service_settings: {
      adaptive_allocations: {
        enabled: true,
        min_number_of_allocations: 1,
        max_number_of_allocations: 4,
      },
      num_threads: 1,
      model_id: ".elser_model_2",
    },
  },
});
response = client.inference.put(
  task_type: "sparse_embedding",
  inference_id: "my-elser-model",
  body: {
    "service": "elasticsearch",
    "service_settings": {
      "adaptive_allocations": {
        "enabled": true,
        "min_number_of_allocations": 1,
        "max_number_of_allocations": 4
      },
      "num_threads": 1,
      "model_id": ".elser_model_2"
    }
  }
)
$resp = $client->inference()->put([
    "task_type" => "sparse_embedding",
    "inference_id" => "my-elser-model",
    "body" => [
        "service" => "elasticsearch",
        "service_settings" => [
            "adaptive_allocations" => [
                "enabled" => true,
                "min_number_of_allocations" => 1,
                "max_number_of_allocations" => 4,
            ],
            "num_threads" => 1,
            "model_id" => ".elser_model_2",
        ],
    ],
]);
curl -X PUT -H "Authorization: ApiKey $ELASTIC_API_KEY" -H "Content-Type: application/json" -d '{"service":"elasticsearch","service_settings":{"adaptive_allocations":{"enabled":true,"min_number_of_allocations":1,"max_number_of_allocations":4},"num_threads":1,"model_id":".elser_model_2"}}' "$ELASTICSEARCH_URL/_inference/sparse_embedding/my-elser-model"
client.inference().put(p -> p
    .inferenceId("my-elser-model")
    .taskType(TaskType.SparseEmbedding)
    .inferenceConfig(i -> i
        .service("elasticsearch")
        .serviceSettings(JsonData.fromJson("{\"adaptive_allocations\":{\"enabled\":true,\"min_number_of_allocations\":1,\"max_number_of_allocations\":4},\"num_threads\":1,\"model_id\":\".elser_model_2\"}"))
    )
);
Request examples
Run `PUT _inference/sparse_embedding/my-elser-model` to create an inference endpoint that performs a `sparse_embedding` task. The `model_id` must be the ID of one of the built-in ELSER models. The API will automatically download the ELSER model if it isn't already downloaded and then deploy the model.
{
    "service": "elasticsearch",
    "service_settings": {
        "adaptive_allocations": { 
        "enabled": true,
        "min_number_of_allocations": 1,
        "max_number_of_allocations": 4
        },
        "num_threads": 1,
        "model_id": ".elser_model_2" 
    }
}
Run `PUT _inference/rerank/my-elastic-rerank` to create an inference endpoint that performs a rerank task using the built-in Elastic Rerank cross-encoder model. The `model_id` must be `.rerank-v1`, which is the ID of the built-in Elastic Rerank model. The API will automatically download the Elastic Rerank model if it isn't already downloaded and then deploy the model. Once deployed, the model can be used for semantic re-ranking with a `text_similarity_reranker` retriever.
{
    "service": "elasticsearch",
    "service_settings": {
        "model_id": ".rerank-v1", 
        "num_threads": 1,
        "adaptive_allocations": { 
        "enabled": true,
        "min_number_of_allocations": 1,
        "max_number_of_allocations": 4
        }
    }
}
Run `PUT _inference/text_embedding/my-e5-model` to create an inference endpoint that performs a `text_embedding` task. The `model_id` must be the ID of one of the built-in E5 models. The API will automatically download the E5 model if it isn't already downloaded and then deploy the model.
{
    "service": "elasticsearch",
    "service_settings": {
        "num_allocations": 1,
        "num_threads": 1,
        "model_id": ".multilingual-e5-small" 
    }
}
Run `PUT _inference/text_embedding/my-msmarco-minilm-model` to create an inference endpoint that performs a `text_embedding` task with a model that was uploaded by Eland.
{
    "service": "elasticsearch",
    "service_settings": {
        "num_allocations": 1,
        "num_threads": 1,
        "model_id": "msmarco-MiniLM-L12-cos-v5" 
    }
}
Run `PUT _inference/text_embedding/my-e5-model` to create an inference endpoint that performs a `text_embedding` task and to configure adaptive allocations. The API request will automatically download the E5 model if it isn't already downloaded and then deploy the model.
{
    "service": "elasticsearch",
    "service_settings": {
        "adaptive_allocations": {
        "enabled": true,
        "min_number_of_allocations": 3,
        "max_number_of_allocations": 10
        },
        "num_threads": 1,
        "model_id": ".multilingual-e5-small"
    }
}
Run `PUT _inference/sparse_embedding/use_existing_deployment` to use an already existing model deployment when creating an inference endpoint.
{
    "service": "elasticsearch",
    "service_settings": {
        "deployment_id": ".elser_model_2"
    }
}
Response examples (200)
A successful response from `PUT _inference/sparse_embedding/use_existing_deployment`. It contains the model ID and the threads and allocations settings from the model deployment.
{
  "inference_id": "use_existing_deployment",
  "task_type": "sparse_embedding",
  "service": "elasticsearch",
  "service_settings": {
    "num_allocations": 2,
    "num_threads": 1,
    "model_id": ".elser_model_2",
    "deployment_id": ".elser_model_2"
  },
  "chunking_settings": {
    "strategy": "sentence",
    "max_chunk_size": 250,
    "sentence_overlap": 1
  }
}












Create a Hugging Face inference endpoint Generally available; Added in 8.12.0

PUT /_inference/{task_type}/{huggingface_inference_id}

Create an inference endpoint to perform an inference task with the hugging_face service. Supported tasks include: text_embedding, completion, and chat_completion.

To configure the endpoint, first visit the Hugging Face Inference Endpoints page and create a new endpoint. Select a model that supports the task you intend to use.

For Elastic's text_embedding task: The selected model must support the Sentence Embeddings task. On the new endpoint creation page, select the Sentence Embeddings task under the Advanced Configuration section. After the endpoint has initialized, copy the generated endpoint URL. Recommended models for text_embedding task:

  • all-MiniLM-L6-v2
  • all-MiniLM-L12-v2
  • all-mpnet-base-v2
  • e5-base-v2
  • e5-small-v2
  • multilingual-e5-base
  • multilingual-e5-small

For Elastic's chat_completion and completion tasks: The selected model must support the Text Generation task and expose OpenAI API. HuggingFace supports both serverless and dedicated endpoints for Text Generation. When creating dedicated endpoint select the Text Generation task. After the endpoint is initialized (for dedicated) or ready (for serverless), ensure it supports the OpenAI API and includes /v1/chat/completions part in URL. Then, copy the full endpoint URL for use. Recommended models for chat_completion and completion tasks:

  • Mistral-7B-Instruct-v0.2
  • QwQ-32B
  • Phi-3-mini-128k-instruct

For Elastic's rerank task: The selected model must support the sentence-ranking task and expose OpenAI API. HuggingFace supports only dedicated (not serverless) endpoints for Rerank so far. After the endpoint is initialized, copy the full endpoint URL for use. Tested models for rerank task:

  • bge-reranker-base
  • jina-reranker-v1-turbo-en-GGUF

Required authorization

  • Cluster privileges: manage_inference

Path parameters

  • task_type string

    The type of the inference task that the model will perform.

    Values are chat_completion, completion, rerank, or text_embedding.

  • huggingface_inference_id string Required

    The unique identifier of the inference endpoint.

Query parameters

  • timeout string

    Specifies the amount of time to wait for the inference endpoint to be created.

    Values are -1 or 0.

application/json

Body

  • chunking_settings object

    The chunking configuration object.

    Hide chunking_settings attributes Show chunking_settings attributes object
    • max_chunk_size number

      The maximum size of a chunk in words. This value cannot be higher than 300 or lower than 20 (for sentence strategy) or 10 (for word strategy).

      Default value is 250.

    • overlap number

      The number of overlapping words for chunks. It is applicable only to a word chunking strategy. This value cannot be higher than half the max_chunk_size value.

      Default value is 100.

    • sentence_overlap number

      The number of overlapping sentences for chunks. It is applicable only for a sentence chunking strategy. It can be either 1 or 0.

      Default value is 1.

    • strategy string

      The chunking strategy: sentence or word.

      Default value is sentence.

  • service string Required

    The type of service supported for the specified task type. In this case, hugging_face.

    Value is hugging_face.

  • service_settings object Required

    Settings used to install the inference model. These settings are specific to the hugging_face service.

    Hide service_settings attributes Show service_settings attributes object
    • api_key string Required

      A valid access token for your HuggingFace account. You can create or find your access tokens on the HuggingFace settings page.

      IMPORTANT: You need to provide the API key only once, during the inference model creation. The get inference endpoint API does not retrieve your API key. After creating the inference model, you cannot change the associated API key. If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.

      External documentation
    • rate_limit object

      This setting helps to minimize the number of rate limit errors returned from Hugging Face. By default, the hugging_face service sets the number of requests allowed per minute to 3000 for all supported tasks. Hugging Face does not publish a universal rate limit — actual limits may vary. It is recommended to adjust this value based on the capacity and limits of your specific deployment environment.

      Hide rate_limit attribute Show rate_limit attribute object
      • requests_per_minute number

        The number of requests allowed per minute.

    • url string Required

      The URL endpoint to use for the requests. For completion and chat_completion tasks, the deployed model must be compatible with the Hugging Face Chat Completion interface (see the linked external documentation for details). The endpoint URL for the request must include /v1/chat/completions. If the model supports the OpenAI Chat Completion schema, a toggle should appear in the interface. Enabling this toggle doesn't change any model behavior, it reveals the full endpoint URL needed (which should include /v1/chat/completions) when configuring the inference endpoint in Elasticsearch. If the model doesn't support this schema, the toggle may not be shown.

      External documentation
    • model_id string

      The name of the HuggingFace model to use for the inference task. For completion and chat_completion tasks, this field is optional but may be required for certain models — particularly when using serverless inference endpoints. For the text_embedding task, this field should not be included. Otherwise, the request will fail.

  • task_settings object

    Settings to configure the inference task. These settings are specific to the task type you specified.

    Hide task_settings attributes Show task_settings attributes object
    • return_documents boolean

      For a rerank task, return doc text within the results.

    • top_n number

      For a rerank task, the number of most relevant documents to return. It defaults to the number of the documents.

Responses

  • 200 application/json
    Hide response attributes Show response attributes object
    • chunking_settings object

      Chunking configuration object

      Hide chunking_settings attributes Show chunking_settings attributes object
      • max_chunk_size number

        The maximum size of a chunk in words. This value cannot be higher than 300 or lower than 20 (for sentence strategy) or 10 (for word strategy).

        Default value is 250.

      • overlap number

        The number of overlapping words for chunks. It is applicable only to a word chunking strategy. This value cannot be higher than half the max_chunk_size value.

        Default value is 100.

      • sentence_overlap number

        The number of overlapping sentences for chunks. It is applicable only for a sentence chunking strategy. It can be either 1 or 0.

        Default value is 1.

      • strategy string

        The chunking strategy: sentence or word.

        Default value is sentence.

    • service string Required

      The service type

    • service_settings object Required

      Settings specific to the service

    • task_settings object

      Task settings specific to the service and task type

    • inference_id string Required

      The inference Id

    • task_type string Required

      The task type

      Value is text_embedding.

PUT /_inference/{task_type}/{huggingface_inference_id}
PUT _inference/text_embedding/hugging-face-embeddings
{
    "service": "hugging_face",
    "service_settings": {
        "api_key": "hugging-face-access-token", 
        "url": "url-endpoint" 
    }
}
resp = client.inference.put(
    task_type="text_embedding",
    inference_id="hugging-face-embeddings",
    inference_config={
        "service": "hugging_face",
        "service_settings": {
            "api_key": "hugging-face-access-token",
            "url": "url-endpoint"
        }
    },
)
const response = await client.inference.put({
  task_type: "text_embedding",
  inference_id: "hugging-face-embeddings",
  inference_config: {
    service: "hugging_face",
    service_settings: {
      api_key: "hugging-face-access-token",
      url: "url-endpoint",
    },
  },
});
response = client.inference.put(
  task_type: "text_embedding",
  inference_id: "hugging-face-embeddings",
  body: {
    "service": "hugging_face",
    "service_settings": {
      "api_key": "hugging-face-access-token",
      "url": "url-endpoint"
    }
  }
)
$resp = $client->inference()->put([
    "task_type" => "text_embedding",
    "inference_id" => "hugging-face-embeddings",
    "body" => [
        "service" => "hugging_face",
        "service_settings" => [
            "api_key" => "hugging-face-access-token",
            "url" => "url-endpoint",
        ],
    ],
]);
curl -X PUT -H "Authorization: ApiKey $ELASTIC_API_KEY" -H "Content-Type: application/json" -d '{"service":"hugging_face","service_settings":{"api_key":"hugging-face-access-token","url":"url-endpoint"}}' "$ELASTICSEARCH_URL/_inference/text_embedding/hugging-face-embeddings"
client.inference().put(p -> p
    .inferenceId("hugging-face-embeddings")
    .taskType(TaskType.TextEmbedding)
    .inferenceConfig(i -> i
        .service("hugging_face")
        .serviceSettings(JsonData.fromJson("{\"api_key\":\"hugging-face-access-token\",\"url\":\"url-endpoint\"}"))
    )
);
Request examples
Run `PUT _inference/text_embedding/hugging-face-embeddings` to create an inference endpoint that performs a `text_embedding` task type.
{
    "service": "hugging_face",
    "service_settings": {
        "api_key": "hugging-face-access-token", 
        "url": "url-endpoint" 
    }
}
Run `PUT _inference/rerank/hugging-face-rerank` to create an inference endpoint that performs a `rerank` task type.
{
    "service": "hugging_face",
    "service_settings": {
        "api_key": "hugging-face-access-token", 
        "url": "url-endpoint" 
    },
    "task_settings": {
        "return_documents": true,
        "top_n": 3
    }
}

Create an JinaAI inference endpoint Generally available; Added in 8.18.0

PUT /_inference/{task_type}/{jinaai_inference_id}

Create an inference endpoint to perform an inference task with the jinaai service.

To review the available rerank models, refer to https://jina.ai/reranker. To review the available text_embedding models, refer to the https://jina.ai/embeddings/.

Required authorization

  • Cluster privileges: manage_inference

Path parameters

  • task_type string

    The type of the inference task that the model will perform.

    Values are rerank or text_embedding.

  • jinaai_inference_id string Required

    The unique identifier of the inference endpoint.

Query parameters

  • timeout string

    Specifies the amount of time to wait for the inference endpoint to be created.

    Values are -1 or 0.

application/json

Body

  • chunking_settings object

    The chunking configuration object.

    Hide chunking_settings attributes Show chunking_settings attributes object
    • max_chunk_size number

      The maximum size of a chunk in words. This value cannot be higher than 300 or lower than 20 (for sentence strategy) or 10 (for word strategy).

      Default value is 250.

    • overlap number

      The number of overlapping words for chunks. It is applicable only to a word chunking strategy. This value cannot be higher than half the max_chunk_size value.

      Default value is 100.

    • sentence_overlap number

      The number of overlapping sentences for chunks. It is applicable only for a sentence chunking strategy. It can be either 1 or 0.

      Default value is 1.

    • strategy string

      The chunking strategy: sentence or word.

      Default value is sentence.

  • service string Required

    The type of service supported for the specified task type. In this case, jinaai.

    Value is jinaai.

  • service_settings object Required

    Settings used to install the inference model. These settings are specific to the jinaai service.

    Hide service_settings attributes Show service_settings attributes object
    • api_key string Required

      A valid API key of your JinaAI account.

      IMPORTANT: You need to provide the API key only once, during the inference model creation. The get inference endpoint API does not retrieve your API key. After creating the inference model, you cannot change the associated API key. If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.

      External documentation
    • model_id string

      The name of the model to use for the inference task. For a rerank task, it is required. For a text_embedding task, it is optional.

    • rate_limit object

      This setting helps to minimize the number of rate limit errors returned from JinaAI. By default, the jinaai service sets the number of requests allowed per minute to 2000 for all task types.

      Hide rate_limit attribute Show rate_limit attribute object
      • requests_per_minute number

        The number of requests allowed per minute.

    • similarity string

      For a text_embedding task, the similarity measure. One of cosine, dot_product, l2_norm. The default values varies with the embedding type. For example, a float embedding type uses a dot_product similarity measure by default.

      Values are cosine, dot_product, or l2_norm.

  • task_settings object

    Settings to configure the inference task. These settings are specific to the task type you specified.

    Hide task_settings attributes Show task_settings attributes object
    • return_documents boolean

      For a rerank task, return the doc text within the results.

    • task string

      For a text_embedding task, the task passed to the model. Valid values are:

      • classification: Use it for embeddings passed through a text classifier.
      • clustering: Use it for the embeddings run through a clustering algorithm.
      • ingest: Use it for storing document embeddings in a vector database.
      • search: Use it for storing embeddings of search queries run against a vector database to find relevant documents.

      Values are classification, clustering, ingest, or search.

    • top_n number

      For a rerank task, the number of most relevant documents to return. It defaults to the number of the documents. If this inference endpoint is used in a text_similarity_reranker retriever query and top_n is set, it must be greater than or equal to rank_window_size in the query.

Responses

  • 200 application/json
    Hide response attributes Show response attributes object
    • chunking_settings object

      Chunking configuration object

      Hide chunking_settings attributes Show chunking_settings attributes object
      • max_chunk_size number

        The maximum size of a chunk in words. This value cannot be higher than 300 or lower than 20 (for sentence strategy) or 10 (for word strategy).

        Default value is 250.

      • overlap number

        The number of overlapping words for chunks. It is applicable only to a word chunking strategy. This value cannot be higher than half the max_chunk_size value.

        Default value is 100.

      • sentence_overlap number

        The number of overlapping sentences for chunks. It is applicable only for a sentence chunking strategy. It can be either 1 or 0.

        Default value is 1.

      • strategy string

        The chunking strategy: sentence or word.

        Default value is sentence.

    • service string Required

      The service type

    • service_settings object Required

      Settings specific to the service

    • task_settings object

      Task settings specific to the service and task type

    • inference_id string Required

      The inference Id

    • task_type string Required

      The task type

      Values are text_embedding or rerank.

PUT /_inference/{task_type}/{jinaai_inference_id}
PUT _inference/text_embedding/jinaai-embeddings
{
    "service": "jinaai",
    "service_settings": {
        "model_id": "jina-embeddings-v3",
        "api_key": "JinaAi-Api-key"
    }
}
resp = client.inference.put(
    task_type="text_embedding",
    inference_id="jinaai-embeddings",
    inference_config={
        "service": "jinaai",
        "service_settings": {
            "model_id": "jina-embeddings-v3",
            "api_key": "JinaAi-Api-key"
        }
    },
)
const response = await client.inference.put({
  task_type: "text_embedding",
  inference_id: "jinaai-embeddings",
  inference_config: {
    service: "jinaai",
    service_settings: {
      model_id: "jina-embeddings-v3",
      api_key: "JinaAi-Api-key",
    },
  },
});
response = client.inference.put(
  task_type: "text_embedding",
  inference_id: "jinaai-embeddings",
  body: {
    "service": "jinaai",
    "service_settings": {
      "model_id": "jina-embeddings-v3",
      "api_key": "JinaAi-Api-key"
    }
  }
)
$resp = $client->inference()->put([
    "task_type" => "text_embedding",
    "inference_id" => "jinaai-embeddings",
    "body" => [
        "service" => "jinaai",
        "service_settings" => [
            "model_id" => "jina-embeddings-v3",
            "api_key" => "JinaAi-Api-key",
        ],
    ],
]);
curl -X PUT -H "Authorization: ApiKey $ELASTIC_API_KEY" -H "Content-Type: application/json" -d '{"service":"jinaai","service_settings":{"model_id":"jina-embeddings-v3","api_key":"JinaAi-Api-key"}}' "$ELASTICSEARCH_URL/_inference/text_embedding/jinaai-embeddings"
client.inference().put(p -> p
    .inferenceId("jinaai-embeddings")
    .taskType(TaskType.TextEmbedding)
    .inferenceConfig(i -> i
        .service("jinaai")
        .serviceSettings(JsonData.fromJson("{\"model_id\":\"jina-embeddings-v3\",\"api_key\":\"JinaAi-Api-key\"}"))
    )
);
Request examples
Run `PUT _inference/text_embedding/jinaai-embeddings` to create an inference endpoint for text embedding tasks using the JinaAI service.
{
    "service": "jinaai",
    "service_settings": {
        "model_id": "jina-embeddings-v3",
        "api_key": "JinaAi-Api-key"
    }
}
Run `PUT _inference/rerank/jinaai-rerank` to create an inference endpoint for rerank tasks using the JinaAI service.
{
    "service": "jinaai",
    "service_settings": {
        "api_key": "JinaAI-Api-key",
        "model_id": "jina-reranker-v2-base-multilingual"
    },
    "task_settings": {
        "top_n": 10,
        "return_documents": true
    }
}






























































Delete IP geolocation database configurations Generally available; Added in 8.15.0

DELETE /_ingest/ip_location/database/{id}

Required authorization

  • Cluster privileges: manage

Path parameters

  • id string | array[string] Required

    A comma-separated list of IP location database configurations.

Query parameters

  • master_timeout string

    The period to wait for a connection to the master node. If no response is received before the timeout expires, the request fails and returns an error. A value of -1 indicates that the request should never time out.

    Values are -1 or 0.

  • timeout string

    The period to wait for a response. If no response is received before the timeout expires, the request fails and returns an error. A value of -1 indicates that the request should never time out.

    Values are -1 or 0.

Responses

  • 200 application/json
    Hide response attribute Show response attribute object
    • acknowledged boolean Required

      For a successful response, this value is always true. On failure, an exception is returned instead.

DELETE /_ingest/ip_location/database/{id}
DELETE /_ingest/ip_location/database/my-database-id
resp = client.ingest.delete_ip_location_database(
    id="my-database-id",
)
const response = await client.ingest.deleteIpLocationDatabase({
  id: "my-database-id",
});
response = client.ingest.delete_ip_location_database(
  id: "my-database-id"
)
$resp = $client->ingest()->deleteIpLocationDatabase([
    "id" => "my-database-id",
]);
curl -X DELETE -H "Authorization: ApiKey $ELASTIC_API_KEY" "$ELASTICSEARCH_URL/_ingest/ip_location/database/my-database-id"
client.ingest().deleteIpLocationDatabase(d -> d
    .id("my-database-id")
);

























































Logstash

Logstash APIs enable you to manage pipelines that are used by Logstash Central Management.

Learn more about centralized pipeline management






















































Get datafeeds configuration info Generally available; Added in 5.5.0

GET /_ml/datafeeds/{datafeed_id}

All methods and paths for this operation:

GET /_ml/datafeeds

GET /_ml/datafeeds/{datafeed_id}

You can get information for multiple datafeeds in a single API request by using a comma-separated list of datafeeds or a wildcard expression. You can get information for all datafeeds by using _all, by specifying * as the <feed_id>, or by omitting the <feed_id>. This API returns a maximum of 10,000 datafeeds.

Required authorization

  • Cluster privileges: monitor_ml

Path parameters

  • datafeed_id string | array[string] Required

    Identifier for the datafeed. It can be a datafeed identifier or a wildcard expression. If you do not specify one of these options, the API returns information about all datafeeds.

Query parameters

  • allow_no_match boolean

    Specifies what to do when the request:

    1. Contains wildcard expressions and there are no datafeeds that match.
    2. Contains the _all string or no identifiers and there are no matches.
    3. Contains wildcard expressions and there are only partial matches.

    The default value is true, which returns an empty datafeeds array when there are no matches and the subset of results when there are partial matches. If this parameter is false, the request returns a 404 status code when there are no matches or only partial matches.

  • exclude_generated boolean

    Indicates if certain fields should be removed from the configuration on retrieval. This allows the configuration to be in an acceptable format to be retrieved and then added to another cluster.

Responses

  • 200 application/json
    Hide response attributes Show response attributes object
    • count number Required
    • datafeeds array[object] Required
      Hide datafeeds attributes Show datafeeds attributes object
      • aggregations object
      • authorization object

        The security privileges that the datafeed uses to run its queries. If Elastic Stack security features were disabled at the time of the most recent update to the datafeed, this property is omitted.

        Hide authorization attributes Show authorization attributes object
        • api_key object

          If an API key was used for the most recent update to the datafeed, its name and identifier are listed in the response.

        • roles array[string]

          If a user ID was used for the most recent update to the datafeed, its roles at the time of the update are listed in the response.

        • service_account string

          If a service account was used for the most recent update to the datafeed, the account name is listed in the response.

      • chunking_config object
        Hide chunking_config attributes Show chunking_config attributes object
        • mode string Required

          If the mode is auto, the chunk size is dynamically calculated; this is the recommended value when the datafeed does not use aggregations. If the mode is manual, chunking is applied according to the specified time_span; use this mode when the datafeed uses aggregations. If the mode is off, no chunking is applied.

          Values are auto, manual, or off.

        • time_span string

          The time span that each search will be querying. This setting is applicable only when the mode is set to manual.

      • datafeed_id string Required
      • frequency string

        A duration. Units can be nanos, micros, ms (milliseconds), s (seconds), m (minutes), h (hours) and d (days). Also accepts "0" without a unit and "-1" to indicate an unspecified value.

      • indices array[string] Required
      • indexes array[string]
      • job_id string Required
      • max_empty_searches number
      • query_delay string

        A duration. Units can be nanos, micros, ms (milliseconds), s (seconds), m (minutes), h (hours) and d (days). Also accepts "0" without a unit and "-1" to indicate an unspecified value.

      • script_fields object
        Hide script_fields attribute Show script_fields attribute object
        • * object Additional properties
          Hide * attributes Show * attributes object
          • script object Required
          • ignore_failure boolean
      • scroll_size number
      • delayed_data_check_config object Required
        Hide delayed_data_check_config attributes Show delayed_data_check_config attributes object
        • check_window string

          The window of time that is searched for late data. This window of time ends with the latest finalized bucket. It defaults to null, which causes an appropriate check_window to be calculated when the real-time datafeed runs. In particular, the default check_window span calculation is based on the maximum of 2h or 8 * bucket_span.

        • enabled boolean Required

          Specifies whether the datafeed periodically checks for delayed data.

      • runtime_mappings object
        Hide runtime_mappings attribute Show runtime_mappings attribute object
        • * object Additional properties
          Hide * attributes Show * attributes object
          • fields object

            For type composite

          • fetch_fields array[object]

            For type lookup

          • format string

            A custom format for date type runtime fields.

      • indices_options object

        Controls how to deal with unavailable concrete indices (closed or missing), how wildcard expressions are expanded to actual indices (all, closed or open indices) and how to deal with wildcard expressions that resolve to no indices.

        Hide indices_options attributes Show indices_options attributes object
        • allow_no_indices boolean

          If false, the request returns an error if any wildcard expression, index alias, or _all value targets only missing or closed indices. This behavior applies even if the request targets other open indices. For example, a request targeting foo*,bar* returns an error if an index starts with foo but no index starts with bar.

        • expand_wildcards string | array[string]

          Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such as open,hidden.

          Supported values include:

          • all: Match any data stream or index, including hidden ones.
          • open: Match open, non-hidden indices. Also matches any non-hidden data stream.
          • closed: Match closed, non-hidden indices. Also matches any non-hidden data stream. Data streams cannot be closed.
          • hidden: Match hidden data streams and hidden indices. Must be combined with open, closed, or both.
          • none: Wildcard expressions are not accepted.
        • ignore_unavailable boolean

          If true, missing or closed indices are not included in the response.

          Default value is false.

        • ignore_throttled boolean

          If true, concrete, expanded or aliased indices are ignored when frozen.

          Default value is true.

      • query object Required

        The Elasticsearch query domain-specific language (DSL). This value corresponds to the query object in an Elasticsearch search POST body. All the options that are supported by Elasticsearch can be used, as this object is passed verbatim to Elasticsearch. By default, this property has the following value: {"match_all": {"boost": 1}}.

        Query DSL
GET /_ml/datafeeds/{datafeed_id}
GET _ml/datafeeds/datafeed-high_sum_total_sales
resp = client.ml.get_datafeeds(
    datafeed_id="datafeed-high_sum_total_sales",
)
const response = await client.ml.getDatafeeds({
  datafeed_id: "datafeed-high_sum_total_sales",
});
response = client.ml.get_datafeeds(
  datafeed_id: "datafeed-high_sum_total_sales"
)
$resp = $client->ml()->getDatafeeds([
    "datafeed_id" => "datafeed-high_sum_total_sales",
]);
curl -X GET -H "Authorization: ApiKey $ELASTIC_API_KEY" "$ELASTICSEARCH_URL/_ml/datafeeds/datafeed-high_sum_total_sales"
client.ml().getDatafeeds(g -> g
    .datafeedId("datafeed-high_sum_total_sales")
);

Create a datafeed Generally available; Added in 5.4.0

PUT /_ml/datafeeds/{datafeed_id}

Datafeeds retrieve data from Elasticsearch for analysis by an anomaly detection job. You can associate only one datafeed with each anomaly detection job. The datafeed contains a query that runs at a defined interval (frequency). If you are concerned about delayed data, you can add a delay (query_delay') at each interval. By default, the datafeed uses the following query:{"match_all": {"boost": 1}}`.

When Elasticsearch security features are enabled, your datafeed remembers which roles the user who created it had at the time of creation and runs the query using those same roles. If you provide secondary authorization headers, those credentials are used instead. You must use Kibana, this API, or the create anomaly detection jobs API to create a datafeed. Do not add a datafeed directly to the .ml-config index. Do not give users write privileges on the .ml-config index.

Required authorization

  • Index privileges: read
  • Cluster privileges: manage_ml

Path parameters

  • datafeed_id string Required

    A numerical character string that uniquely identifies the datafeed. This identifier can contain lowercase alphanumeric characters (a-z and 0-9), hyphens, and underscores. It must start and end with alphanumeric characters.

Query parameters

  • allow_no_indices boolean

    If true, wildcard indices expressions that resolve into no concrete indices are ignored. This includes the _all string or when no indices are specified.

  • expand_wildcards string | array[string]

    Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values.

    Supported values include:

    • all: Match any data stream or index, including hidden ones.
    • open: Match open, non-hidden indices. Also matches any non-hidden data stream.
    • closed: Match closed, non-hidden indices. Also matches any non-hidden data stream. Data streams cannot be closed.
    • hidden: Match hidden data streams and hidden indices. Must be combined with open, closed, or both.
    • none: Wildcard expressions are not accepted.

    Values are all, open, closed, hidden, or none.

  • ignore_throttled boolean Deprecated

    If true, concrete, expanded, or aliased indices are ignored when frozen.

  • ignore_unavailable boolean

    If true, unavailable indices (missing or closed) are ignored.

application/json

Body Required

  • aggregations object

    If set, the datafeed performs aggregation searches. Support for aggregations is limited and should be used only with low cardinality data.

  • chunking_config object

    Datafeeds might be required to search over long time periods, for several months or years. This search is split into time chunks in order to ensure the load on Elasticsearch is managed. Chunking configuration controls how the size of these time chunks are calculated; it is an advanced configuration option.

    Hide chunking_config attributes Show chunking_config attributes object
    • mode string Required

      If the mode is auto, the chunk size is dynamically calculated; this is the recommended value when the datafeed does not use aggregations. If the mode is manual, chunking is applied according to the specified time_span; use this mode when the datafeed uses aggregations. If the mode is off, no chunking is applied.

      Values are auto, manual, or off.

    • time_span string

      The time span that each search will be querying. This setting is applicable only when the mode is set to manual.

  • delayed_data_check_config object

    Specifies whether the datafeed checks for missing data and the size of the window. The datafeed can optionally search over indices that have already been read in an effort to determine whether any data has subsequently been added to the index. If missing data is found, it is a good indication that the query_delay is set too low and the data is being indexed after the datafeed has passed that moment in time. This check runs only on real-time datafeeds.

    Hide delayed_data_check_config attributes Show delayed_data_check_config attributes object
    • check_window string

      The window of time that is searched for late data. This window of time ends with the latest finalized bucket. It defaults to null, which causes an appropriate check_window to be calculated when the real-time datafeed runs. In particular, the default check_window span calculation is based on the maximum of 2h or 8 * bucket_span.

    • enabled boolean Required

      Specifies whether the datafeed periodically checks for delayed data.

  • frequency string

    The interval at which scheduled queries are made while the datafeed runs in real time. The default value is either the bucket span for short bucket spans, or, for longer bucket spans, a sensible fraction of the bucket span. When frequency is shorter than the bucket span, interim results for the last (partial) bucket are written then eventually overwritten by the full bucket results. If the datafeed uses aggregations, this value must be divisible by the interval of the date histogram aggregation.

  • indices string | array[string]

    An array of index names. Wildcards are supported. If any of the indices are in remote clusters, the master nodes and the machine learning nodes must have the remote_cluster_client role.

  • indices_options object

    Specifies index expansion options that are used during search

    Hide indices_options attributes Show indices_options attributes object
    • allow_no_indices boolean

      If false, the request returns an error if any wildcard expression, index alias, or _all value targets only missing or closed indices. This behavior applies even if the request targets other open indices. For example, a request targeting foo*,bar* returns an error if an index starts with foo but no index starts with bar.

    • expand_wildcards string | array[string]

      Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such as open,hidden.

      Supported values include:

      • all: Match any data stream or index, including hidden ones.
      • open: Match open, non-hidden indices. Also matches any non-hidden data stream.
      • closed: Match closed, non-hidden indices. Also matches any non-hidden data stream. Data streams cannot be closed.
      • hidden: Match hidden data streams and hidden indices. Must be combined with open, closed, or both.
      • none: Wildcard expressions are not accepted.
    • ignore_unavailable boolean

      If true, missing or closed indices are not included in the response.

      Default value is false.

    • ignore_throttled boolean

      If true, concrete, expanded or aliased indices are ignored when frozen.

      Default value is true.

  • job_id string

    Identifier for the anomaly detection job.

  • max_empty_searches number

    If a real-time datafeed has never seen any data (including during any initial training period), it automatically stops and closes the associated job after this many real-time searches return no documents. In other words, it stops after frequency times max_empty_searches of real-time operation. If not set, a datafeed with no end time that sees no data remains started until it is explicitly stopped. By default, it is not set.

  • query object

    The Elasticsearch query domain-specific language (DSL). This value corresponds to the query object in an Elasticsearch search POST body. All the options that are supported by Elasticsearch can be used, as this object is passed verbatim to Elasticsearch.

    External documentation
  • query_delay string

    The number of seconds behind real time that data is queried. For example, if data from 10:04 a.m. might not be searchable in Elasticsearch until 10:06 a.m., set this property to 120 seconds. The default value is randomly selected between 60s and 120s. This randomness improves the query performance when there are multiple jobs running on the same node.

  • runtime_mappings object

    Specifies runtime fields for the datafeed search.

    Hide runtime_mappings attribute Show runtime_mappings attribute object
    • * object Additional properties
      Hide * attributes Show * attributes object
      • fields object

        For type composite

        Hide fields attribute Show fields attribute object
        • * object Additional properties
          Hide * attribute Show * attribute object
          • type string Required

            Values are boolean, composite, date, double, geo_point, geo_shape, ip, keyword, long, or lookup.

      • fetch_fields array[object]

        For type lookup

        Hide fetch_fields attributes Show fetch_fields attributes object
        • field string Required

          Path to field or array of paths. Some API's support wildcards in the path to select multiple fields.

        • format string
      • format string

        A custom format for date type runtime fields.

      • input_field string

        For type lookup

      • target_field string

        For type lookup

      • target_index string

        For type lookup

      • script object

        Painless script executed at query time.

        Hide script attributes Show script attributes object
        • source string

          The script source.

        • id string

          The id for a stored script.

        • params object

          Specifies any named parameters that are passed into the script as variables. Use parameters instead of hard-coded values to decrease compile time.

          Hide params attribute Show params attribute object
          • * object Additional properties
        • lang
        • options object
          Hide options attribute Show options attribute object
          • * string Additional properties
      • type string Required

        Field type, which can be: boolean, composite, date, double, geo_point, ip,keyword, long, or lookup.

        Values are boolean, composite, date, double, geo_point, geo_shape, ip, keyword, long, or lookup.

  • script_fields object

    Specifies scripts that evaluate custom expressions and returns script fields to the datafeed. The detector configuration objects in a job can contain functions that use these script fields.

    Hide script_fields attribute Show script_fields attribute object
    • * object Additional properties
      Hide * attributes Show * attributes object
      • script object Required
        Hide script attributes Show script attributes object
        • source string

          The script source.

        • id string

          The id for a stored script.

        • params object

          Specifies any named parameters that are passed into the script as variables. Use parameters instead of hard-coded values to decrease compile time.

          Hide params attribute Show params attribute object
          • * object Additional properties
        • lang string

          Specifies the language the script is written in.

          Supported values include:

          • painless: Painless scripting language, purpose-built for Elasticsearch.
          • expression: Lucene’s expressions language, compiles a JavaScript expression to bytecode.
          • mustache: Mustache templated, used for templates.
          • java: Expert Java API
          Any of:

          Specifies the language the script is written in.

          Supported values include:

          • painless: Painless scripting language, purpose-built for Elasticsearch.
          • expression: Lucene’s expressions language, compiles a JavaScript expression to bytecode.
          • mustache: Mustache templated, used for templates.
          • java: Expert Java API

          Values are painless, expression, mustache, or java.

        • options object
          Hide options attribute Show options attribute object
          • * string Additional properties
      • ignore_failure boolean
  • scroll_size number

    The size parameter that is used in Elasticsearch searches when the datafeed does not use aggregations. The maximum value is the value of index.max_result_window, which is 10,000 by default.

    Default value is 1000.

  • headers object

Responses

  • 200 application/json
    Hide response attributes Show response attributes object
    • aggregations object
    • authorization object
      Hide authorization attributes Show authorization attributes object
      • api_key object

        If an API key was used for the most recent update to the datafeed, its name and identifier are listed in the response.

        Hide api_key attributes Show api_key attributes object
        • id string Required

          The identifier for the API key.

        • name string Required

          The name of the API key.

      • roles array[string]

        If a user ID was used for the most recent update to the datafeed, its roles at the time of the update are listed in the response.

      • service_account string

        If a service account was used for the most recent update to the datafeed, the account name is listed in the response.

    • chunking_config object Required
      Hide chunking_config attributes Show chunking_config attributes object
      • mode string Required

        If the mode is auto, the chunk size is dynamically calculated; this is the recommended value when the datafeed does not use aggregations. If the mode is manual, chunking is applied according to the specified time_span; use this mode when the datafeed uses aggregations. If the mode is off, no chunking is applied.

        Values are auto, manual, or off.

      • time_span string

        The time span that each search will be querying. This setting is applicable only when the mode is set to manual.

    • delayed_data_check_config object
      Hide delayed_data_check_config attributes Show delayed_data_check_config attributes object
      • check_window string

        The window of time that is searched for late data. This window of time ends with the latest finalized bucket. It defaults to null, which causes an appropriate check_window to be calculated when the real-time datafeed runs. In particular, the default check_window span calculation is based on the maximum of 2h or 8 * bucket_span.

      • enabled boolean Required

        Specifies whether the datafeed periodically checks for delayed data.

    • datafeed_id string Required
    • frequency string

      A duration. Units can be nanos, micros, ms (milliseconds), s (seconds), m (minutes), h (hours) and d (days). Also accepts "0" without a unit and "-1" to indicate an unspecified value.

    • indices array[string] Required
    • job_id string Required
    • indices_options object

      Controls how to deal with unavailable concrete indices (closed or missing), how wildcard expressions are expanded to actual indices (all, closed or open indices) and how to deal with wildcard expressions that resolve to no indices.

      Hide indices_options attributes Show indices_options attributes object
      • allow_no_indices boolean

        If false, the request returns an error if any wildcard expression, index alias, or _all value targets only missing or closed indices. This behavior applies even if the request targets other open indices. For example, a request targeting foo*,bar* returns an error if an index starts with foo but no index starts with bar.

      • expand_wildcards string | array[string]

        Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such as open,hidden.

        Supported values include:

        • all: Match any data stream or index, including hidden ones.
        • open: Match open, non-hidden indices. Also matches any non-hidden data stream.
        • closed: Match closed, non-hidden indices. Also matches any non-hidden data stream. Data streams cannot be closed.
        • hidden: Match hidden data streams and hidden indices. Must be combined with open, closed, or both.
        • none: Wildcard expressions are not accepted.
      • ignore_unavailable boolean

        If true, missing or closed indices are not included in the response.

        Default value is false.

      • ignore_throttled boolean

        If true, concrete, expanded or aliased indices are ignored when frozen.

        Default value is true.

    • max_empty_searches number
    • query object Required

      An Elasticsearch Query DSL (Domain Specific Language) object that defines a query.

      External documentation
    • query_delay string Required

      A duration. Units can be nanos, micros, ms (milliseconds), s (seconds), m (minutes), h (hours) and d (days). Also accepts "0" without a unit and "-1" to indicate an unspecified value.

    • runtime_mappings object
      Hide runtime_mappings attribute Show runtime_mappings attribute object
      • * object Additional properties
        Hide * attributes Show * attributes object
        • fields object

          For type composite

          Hide fields attribute Show fields attribute object
          • * object Additional properties
        • fetch_fields array[object]

          For type lookup

          Hide fetch_fields attributes Show fetch_fields attributes object
          • field
          • format string
        • format string

          A custom format for date type runtime fields.

        • input_field string

          For type lookup

        • target_field string

          For type lookup

        • target_index string

          For type lookup

        • script object

          Painless script executed at query time.

          Hide script attributes Show script attributes object
          • source string

            The script source.

          • params object

            Specifies any named parameters that are passed into the script as variables. Use parameters instead of hard-coded values to decrease compile time.

          • options object
        • type string Required

          Field type, which can be: boolean, composite, date, double, geo_point, ip,keyword, long, or lookup.

          Values are boolean, composite, date, double, geo_point, geo_shape, ip, keyword, long, or lookup.

    • script_fields object
      Hide script_fields attribute Show script_fields attribute object
      • * object Additional properties
        Hide * attributes Show * attributes object
        • script object Required
          Hide script attributes Show script attributes object
          • source string

            The script source.

          • id string

            The id for a stored script.

          • params object

            Specifies any named parameters that are passed into the script as variables. Use parameters instead of hard-coded values to decrease compile time.

            Hide params attribute Show params attribute object
            • * object Additional properties
          • lang
          • options object
            Hide options attribute Show options attribute object
            • * string Additional properties
        • ignore_failure boolean
    • scroll_size number Required
PUT /_ml/datafeeds/{datafeed_id}
PUT _ml/datafeeds/datafeed-test-job?pretty
{
  "indices": [
    "kibana_sample_data_logs"
  ],
  "query": {
    "bool": {
      "must": [
        {
          "match_all": {}
        }
      ]
    }
  },
  "job_id": "test-job"
}
resp = client.ml.put_datafeed(
    datafeed_id="datafeed-test-job",
    pretty=True,
    indices=[
        "kibana_sample_data_logs"
    ],
    query={
        "bool": {
            "must": [
                {
                    "match_all": {}
                }
            ]
        }
    },
    job_id="test-job",
)
const response = await client.ml.putDatafeed({
  datafeed_id: "datafeed-test-job",
  pretty: "true",
  indices: ["kibana_sample_data_logs"],
  query: {
    bool: {
      must: [
        {
          match_all: {},
        },
      ],
    },
  },
  job_id: "test-job",
});
response = client.ml.put_datafeed(
  datafeed_id: "datafeed-test-job",
  pretty: "true",
  body: {
    "indices": [
      "kibana_sample_data_logs"
    ],
    "query": {
      "bool": {
        "must": [
          {
            "match_all": {}
          }
        ]
      }
    },
    "job_id": "test-job"
  }
)
$resp = $client->ml()->putDatafeed([
    "datafeed_id" => "datafeed-test-job",
    "pretty" => "true",
    "body" => [
        "indices" => array(
            "kibana_sample_data_logs",
        ),
        "query" => [
            "bool" => [
                "must" => array(
                    [
                        "match_all" => new ArrayObject([]),
                    ],
                ),
            ],
        ],
        "job_id" => "test-job",
    ],
]);
curl -X PUT -H "Authorization: ApiKey $ELASTIC_API_KEY" -H "Content-Type: application/json" -d '{"indices":["kibana_sample_data_logs"],"query":{"bool":{"must":[{"match_all":{}}]}},"job_id":"test-job"}' "$ELASTICSEARCH_URL/_ml/datafeeds/datafeed-test-job?pretty"
Request example
An example body for a `PUT _ml/datafeeds/datafeed-test-job?pretty` request.
{
  "indices": [
    "kibana_sample_data_logs"
  ],
  "query": {
    "bool": {
      "must": [
        {
          "match_all": {}
        }
      ]
    }
  },
  "job_id": "test-job"
}




























Create an anomaly detection job Generally available; Added in 5.4.0

PUT /_ml/anomaly_detectors/{job_id}

If you include a datafeed_config, you must have read index privileges on the source index. If you include a datafeed_config but do not provide a query, the datafeed uses {"match_all": {"boost": 1}}.

Required authorization

  • Index privileges: read
  • Cluster privileges: manage_ml

Path parameters

  • job_id string Required

    The identifier for the anomaly detection job. This identifier can contain lowercase alphanumeric characters (a-z and 0-9), hyphens, and underscores. It must start and end with alphanumeric characters.

Query parameters

  • allow_no_indices boolean

    If true, wildcard indices expressions that resolve into no concrete indices are ignored. This includes the _all string or when no indices are specified.

  • expand_wildcards string | array[string]

    Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values.

    Supported values include:

    • all: Match any data stream or index, including hidden ones.
    • open: Match open, non-hidden indices. Also matches any non-hidden data stream.
    • closed: Match closed, non-hidden indices. Also matches any non-hidden data stream. Data streams cannot be closed.
    • hidden: Match hidden data streams and hidden indices. Must be combined with open, closed, or both.
    • none: Wildcard expressions are not accepted.

    Values are all, open, closed, hidden, or none.

  • ignore_throttled boolean Deprecated

    If true, concrete, expanded or aliased indices are ignored when frozen.

  • ignore_unavailable boolean

    If true, unavailable indices (missing or closed) are ignored.

application/json

Body Required

  • allow_lazy_open boolean

    Advanced configuration option. Specifies whether this job can open when there is insufficient machine learning node capacity for it to be immediately assigned to a node. By default, if a machine learning node with capacity to run the job cannot immediately be found, the open anomaly detection jobs API returns an error. However, this is also subject to the cluster-wide xpack.ml.max_lazy_ml_nodes setting. If this option is set to true, the open anomaly detection jobs API does not return an error and the job waits in the opening state until sufficient machine learning node capacity is available.

    Default value is false.

  • analysis_config object Required

    Specifies how to analyze the data. After you create a job, you cannot change the analysis configuration; all the properties are informational.

    Hide analysis_config attributes Show analysis_config attributes object
    • bucket_span string

      The size of the interval that the analysis is aggregated into, typically between 5m and 1h. This value should be either a whole number of days or equate to a whole number of buckets in one day. If the anomaly detection job uses a datafeed with aggregations, this value must also be divisible by the interval of the date histogram aggregation.

    • categorization_analyzer string | object

      If categorization_field_name is specified, you can also define the analyzer that is used to interpret the categorization field. This property cannot be used at the same time as categorization_filters. The categorization analyzer specifies how the categorization_field is interpreted by the categorization process. The categorization_analyzer field can be specified either as a string or as an object. If it is a string, it must refer to a built-in analyzer or one added by another plugin.

      One of:

      If categorization_field_name is specified, you can also define the analyzer that is used to interpret the categorization field. This property cannot be used at the same time as categorization_filters. The categorization analyzer specifies how the categorization_field is interpreted by the categorization process. The categorization_analyzer field can be specified either as a string or as an object. If it is a string, it must refer to a built-in analyzer or one added by another plugin.

    • categorization_field_name string

      If this property is specified, the values of the specified field will be categorized. The resulting categories must be used in a detector by setting by_field_name, over_field_name, or partition_field_name to the keyword mlcategory.

    • categorization_filters array[string]

      If categorization_field_name is specified, you can also define optional filters. This property expects an array of regular expressions. The expressions are used to filter out matching sequences from the categorization field values. You can use this functionality to fine tune the categorization by excluding sequences from consideration when categories are defined. For example, you can exclude SQL statements that appear in your log files. This property cannot be used at the same time as categorization_analyzer. If you only want to define simple regular expression filters that are applied prior to tokenization, setting this property is the easiest method. If you also want to customize the tokenizer or post-tokenization filtering, use the categorization_analyzer property instead and include the filters as pattern_replace character filters. The effect is exactly the same.

    • detectors array[object] Required

      Detector configuration objects specify which data fields a job analyzes. They also specify which analytical functions are used. You can specify multiple detectors for a job. If the detectors array does not contain at least one detector, no analysis can occur and an error is returned.

      Hide detectors attributes Show detectors attributes object
      • by_field_name string

        The field used to split the data. In particular, this property is used for analyzing the splits with respect to their own history. It is used for finding unusual values in the context of the split.

      • custom_rules array[object]

        Custom rules enable you to customize the way detectors operate. For example, a rule may dictate conditions under which results should be skipped. Kibana refers to custom rules as job rules.

        Hide custom_rules attributes Show custom_rules attributes object
        • actions array[string]

          The set of actions to be triggered when the rule applies. If more than one action is specified the effects of all actions are combined.

          Supported values include:

          • skip_result: The result will not be created. Unless you also specify skip_model_update, the model will be updated as usual with the corresponding series value.
          • skip_model_update: The value for that series will not be used to update the model. Unless you also specify skip_result, the results will be created as usual. This action is suitable when certain values are expected to be consistently anomalous and they affect the model in a way that negatively impacts the rest of the results.

          Values are skip_result or skip_model_update. Default value is ["skip_result"].

        • conditions array[object]

          An array of numeric conditions when the rule applies. A rule must either have a non-empty scope or at least one condition. Multiple conditions are combined together with a logical AND.

        • scope object

          A scope of series where the rule applies. A rule must either have a non-empty scope or at least one condition. By default, the scope includes all series. Scoping is allowed for any of the fields that are also specified in by_field_name, over_field_name, or partition_field_name.

      • detector_description string

        A description of the detector.

      • detector_index number

        A unique identifier for the detector. This identifier is based on the order of the detectors in the analysis_config, starting at zero. If you specify a value for this property, it is ignored.

      • exclude_frequent string

        If set, frequent entities are excluded from influencing the anomaly results. Entities can be considered frequent over time or frequent in a population. If you are working with both over and by fields, you can set exclude_frequent to all for both fields, or to by or over for those specific fields.

        Values are all, none, by, or over.

      • field_name string

        The field that the detector uses in the function. If you use an event rate function such as count or rare, do not specify this field. The field_name cannot contain double quotes or backslashes.

      • function string

        The analysis function that is used. For example, count, rare, mean, min, max, or sum.

      • over_field_name string

        The field used to split the data. In particular, this property is used for analyzing the splits with respect to the history of all splits. It is used for finding unusual values in the population of all splits.

      • partition_field_name string

        The field used to segment the analysis. When you use this property, you have completely independent baselines for each value of this field.

      • use_null boolean

        Defines whether a new series is used as the null series when there is no value for the by or partition fields.

        Default value is false.

    • influencers array[string]

      A comma separated list of influencer field names. Typically these can be the by, over, or partition fields that are used in the detector configuration. You might also want to use a field name that is not specifically named in a detector, but is available as part of the input data. When you use multiple detectors, the use of influencers is recommended as it aggregates results for each influencer entity.

    • latency string

      The size of the window in which to expect data that is out of time order. If you specify a non-zero value, it must be greater than or equal to one second. NOTE: Latency is applicable only when you send data by using the post data API.

    • model_prune_window string

      Advanced configuration option. Affects the pruning of models that have not been updated for the given time duration. The value must be set to a multiple of the bucket_span. If set too low, important information may be removed from the model. For jobs created in 8.1 and later, the default value is the greater of 30d or 20 times bucket_span.

    • multivariate_by_fields boolean

      This functionality is reserved for internal use. It is not supported for use in customer environments and is not subject to the support SLA of official GA features. If set to true, the analysis will automatically find correlations between metrics for a given by field value and report anomalies when those correlations cease to hold. For example, suppose CPU and memory usage on host A is usually highly correlated with the same metrics on host B. Perhaps this correlation occurs because they are running a load-balanced application. If you enable this property, anomalies will be reported when, for example, CPU usage on host A is high and the value of CPU usage on host B is low. That is to say, you’ll see an anomaly when the CPU of host A is unusual given the CPU of host B. To use the multivariate_by_fields property, you must also specify by_field_name in your detector.

    • per_partition_categorization object

      Settings related to how categorization interacts with partition fields.

      Hide per_partition_categorization attributes Show per_partition_categorization attributes object
      • enabled boolean

        To enable this setting, you must also set the partition_field_name property to the same value in every detector that uses the keyword mlcategory. Otherwise, job creation fails.

      • stop_on_warn boolean

        This setting can be set to true only if per-partition categorization is enabled. If true, both categorization and subsequent anomaly detection stops for partitions where the categorization status changes to warn. This setting makes it viable to have a job where it is expected that categorization works well for some partitions but not others; you do not pay the cost of bad categorization forever in the partitions where it works badly.

    • summary_count_field_name string

      If this property is specified, the data that is fed to the job is expected to be pre-summarized. This property value is the name of the field that contains the count of raw data points that have been summarized. The same summary_count_field_name applies to all detectors in the job. NOTE: The summary_count_field_name property cannot be used with the metric function.

  • analysis_limits object

    Limits can be applied for the resources required to hold the mathematical models in memory. These limits are approximate and can be set per job. They do not control the memory used by other processes, for example the Elasticsearch Java processes.

    Hide analysis_limits attributes Show analysis_limits attributes object
    • categorization_examples_limit number

      The maximum number of examples stored per category in memory and in the results data store. If you increase this value, more examples are available, however it requires that you have more storage available. If you set this value to 0, no examples are stored. NOTE: The categorization_examples_limit applies only to analysis that uses categorization.

      Default value is 4.

    • model_memory_limit number | string

      The approximate maximum amount of memory resources that are required for analytical processing. Once this limit is approached, data pruning becomes more aggressive. Upon exceeding this limit, new entities are not modeled. If the xpack.ml.max_model_memory_limit setting has a value greater than 0 and less than 1024mb, that value is used instead of the default. The default value is relatively small to ensure that high resource usage is a conscious decision. If you have jobs that are expected to analyze high cardinality fields, you will likely need to use a higher value. If you specify a number instead of a string, the units are assumed to be MiB. Specifying a string is recommended for clarity. If you specify a byte size unit of b or kb and the number does not equate to a discrete number of megabytes, it is rounded down to the closest MiB. The minimum valid value is 1 MiB. If you specify a value less than 1 MiB, an error occurs. If you specify a value for the xpack.ml.max_model_memory_limit setting, an error occurs when you try to create jobs that have model_memory_limit values greater than that setting value.

      One of:

      The approximate maximum amount of memory resources that are required for analytical processing. Once this limit is approached, data pruning becomes more aggressive. Upon exceeding this limit, new entities are not modeled. If the xpack.ml.max_model_memory_limit setting has a value greater than 0 and less than 1024mb, that value is used instead of the default. The default value is relatively small to ensure that high resource usage is a conscious decision. If you have jobs that are expected to analyze high cardinality fields, you will likely need to use a higher value. If you specify a number instead of a string, the units are assumed to be MiB. Specifying a string is recommended for clarity. If you specify a byte size unit of b or kb and the number does not equate to a discrete number of megabytes, it is rounded down to the closest MiB. The minimum valid value is 1 MiB. If you specify a value less than 1 MiB, an error occurs. If you specify a value for the xpack.ml.max_model_memory_limit setting, an error occurs when you try to create jobs that have model_memory_limit values greater than that setting value.

  • background_persist_interval string

    Advanced configuration option. The time between each periodic persistence of the model. The default value is a randomized value between 3 to 4 hours, which avoids all jobs persisting at exactly the same time. The smallest allowed value is 1 hour. For very large models (several GB), persistence could take 10-20 minutes, so do not set the background_persist_interval value too low.

  • custom_settings object

    Advanced configuration option. Contains custom meta data about the job.

  • daily_model_snapshot_retention_after_days number

    Advanced configuration option, which affects the automatic removal of old model snapshots for this job. It specifies a period of time (in days) after which only the first snapshot per day is retained. This period is relative to the timestamp of the most recent snapshot for this job. Valid values range from 0 to model_snapshot_retention_days.

    Default value is 1.

  • data_description object Required

    Defines the format of the input data when you send data to the job by using the post data API. Note that when configure a datafeed, these properties are automatically set. When data is received via the post data API, it is not stored in Elasticsearch. Only the results for anomaly detection are retained.

    Hide data_description attributes Show data_description attributes object
    • format string

      Only JSON format is supported at this time.

    • time_field string

      The name of the field that contains the timestamp.

    • time_format string

      The time format, which can be epoch, epoch_ms, or a custom pattern. The value epoch refers to UNIX or Epoch time (the number of seconds since 1 Jan 1970). The value epoch_ms indicates that time is measured in milliseconds since the epoch. The epoch and epoch_ms time formats accept either integer or real values. Custom patterns must conform to the Java DateTimeFormatter class. When you use date-time formatting patterns, it is recommended that you provide the full date, time and time zone. For example: yyyy-MM-dd'T'HH:mm:ssX. If the pattern that you specify is not sufficient to produce a complete timestamp, job creation fails.

      Default value is epoch.

    • field_delimiter string
  • datafeed_config object

    Defines a datafeed for the anomaly detection job. If Elasticsearch security features are enabled, your datafeed remembers which roles the user who created it had at the time of creation and runs the query using those same roles. If you provide secondary authorization headers, those credentials are used instead.

    Hide datafeed_config attributes Show datafeed_config attributes object
    • aggregations object

      If set, the datafeed performs aggregation searches. Support for aggregations is limited and should be used only with low cardinality data.

    • chunking_config object

      Datafeeds might be required to search over long time periods, for several months or years. This search is split into time chunks in order to ensure the load on Elasticsearch is managed. Chunking configuration controls how the size of these time chunks are calculated and is an advanced configuration option.

      Hide chunking_config attributes Show chunking_config attributes object
      • mode string Required

        If the mode is auto, the chunk size is dynamically calculated; this is the recommended value when the datafeed does not use aggregations. If the mode is manual, chunking is applied according to the specified time_span; use this mode when the datafeed uses aggregations. If the mode is off, no chunking is applied.

        Values are auto, manual, or off.

      • time_span string

        The time span that each search will be querying. This setting is applicable only when the mode is set to manual.

    • datafeed_id string

      A numerical character string that uniquely identifies the datafeed. This identifier can contain lowercase alphanumeric characters (a-z and 0-9), hyphens, and underscores. It must start and end with alphanumeric characters. The default value is the job identifier.

    • delayed_data_check_config object

      Specifies whether the datafeed checks for missing data and the size of the window. The datafeed can optionally search over indices that have already been read in an effort to determine whether any data has subsequently been added to the index. If missing data is found, it is a good indication that the query_delay option is set too low and the data is being indexed after the datafeed has passed that moment in time. This check runs only on real-time datafeeds.

      Hide delayed_data_check_config attributes Show delayed_data_check_config attributes object
      • check_window string

        The window of time that is searched for late data. This window of time ends with the latest finalized bucket. It defaults to null, which causes an appropriate check_window to be calculated when the real-time datafeed runs. In particular, the default check_window span calculation is based on the maximum of 2h or 8 * bucket_span.

      • enabled boolean Required

        Specifies whether the datafeed periodically checks for delayed data.

    • frequency string

      The interval at which scheduled queries are made while the datafeed runs in real time. The default value is either the bucket span for short bucket spans, or, for longer bucket spans, a sensible fraction of the bucket span. For example: 150s. When frequency is shorter than the bucket span, interim results for the last (partial) bucket are written then eventually overwritten by the full bucket results. If the datafeed uses aggregations, this value must be divisible by the interval of the date histogram aggregation.

    • indices string | array[string]

      An array of index names. Wildcards are supported. If any indices are in remote clusters, the machine learning nodes must have the remote_cluster_client role.

    • indices_options object

      Specifies index expansion options that are used during search.

      Hide indices_options attributes Show indices_options attributes object
      • allow_no_indices boolean

        If false, the request returns an error if any wildcard expression, index alias, or _all value targets only missing or closed indices. This behavior applies even if the request targets other open indices. For example, a request targeting foo*,bar* returns an error if an index starts with foo but no index starts with bar.

      • expand_wildcards string | array[string]

        Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such as open,hidden.

        Supported values include:

        • all: Match any data stream or index, including hidden ones.
        • open: Match open, non-hidden indices. Also matches any non-hidden data stream.
        • closed: Match closed, non-hidden indices. Also matches any non-hidden data stream. Data streams cannot be closed.
        • hidden: Match hidden data streams and hidden indices. Must be combined with open, closed, or both.
        • none: Wildcard expressions are not accepted.
      • ignore_unavailable boolean

        If true, missing or closed indices are not included in the response.

        Default value is false.

      • ignore_throttled boolean

        If true, concrete, expanded or aliased indices are ignored when frozen.

        Default value is true.

    • job_id string
    • max_empty_searches number

      If a real-time datafeed has never seen any data (including during any initial training period) then it will automatically stop itself and close its associated job after this many real-time searches that return no documents. In other words, it will stop after frequency times max_empty_searches of real-time operation. If not set then a datafeed with no end time that sees no data will remain started until it is explicitly stopped.

    • query object

      The Elasticsearch query domain-specific language (DSL). This value corresponds to the query object in an Elasticsearch search POST body. All the options that are supported by Elasticsearch can be used, as this object is passed verbatim to Elasticsearch.

      External documentation
    • query_delay string

      The number of seconds behind real time that data is queried. For example, if data from 10:04 a.m. might not be searchable in Elasticsearch until 10:06 a.m., set this property to 120 seconds. The default value is randomly selected between 60s and 120s. This randomness improves the query performance when there are multiple jobs running on the same node.

    • runtime_mappings object

      Specifies runtime fields for the datafeed search.

      Hide runtime_mappings attribute Show runtime_mappings attribute object
      • * object Additional properties
        Hide * attributes Show * attributes object
        • fields object

          For type composite

          Hide fields attribute Show fields attribute object
          • * object Additional properties
        • fetch_fields array[object]

          For type lookup

        • format string

          A custom format for date type runtime fields.

        • input_field string

          For type lookup

        • target_field string

          For type lookup

        • target_index string

          For type lookup

        • script object

          Painless script executed at query time.

        • type string Required

          Field type, which can be: boolean, composite, date, double, geo_point, ip,keyword, long, or lookup.

          Values are boolean, composite, date, double, geo_point, geo_shape, ip, keyword, long, or lookup.

    • script_fields object

      Specifies scripts that evaluate custom expressions and returns script fields to the datafeed. The detector configuration objects in a job can contain functions that use these script fields.

      Hide script_fields attribute Show script_fields attribute object
      • * object Additional properties
        Hide * attributes Show * attributes object
        • script object Required
          Hide script attributes Show script attributes object
          • source string

            The script source.

          • params object

            Specifies any named parameters that are passed into the script as variables. Use parameters instead of hard-coded values to decrease compile time.

          • options object
        • ignore_failure boolean
    • scroll_size number

      The size parameter that is used in Elasticsearch searches when the datafeed does not use aggregations. The maximum value is the value of index.max_result_window, which is 10,000 by default.

      Default value is 1000.

  • description string

    A description of the job.

  • job_id string

    The identifier for the anomaly detection job. This identifier can contain lowercase alphanumeric characters (a-z and 0-9), hyphens, and underscores. It must start and end with alphanumeric characters.

  • groups array[string]

    A list of job groups. A job can belong to no groups or many.

  • model_plot_config object

    This advanced configuration option stores model information along with the results. It provides a more detailed view into anomaly detection. If you enable model plot it can add considerable overhead to the performance of the system; it is not feasible for jobs with many entities. Model plot provides a simplified and indicative view of the model and its bounds. It does not display complex features such as multivariate correlations or multimodal data. As such, anomalies may occasionally be reported which cannot be seen in the model plot. Model plot config can be configured when the job is created or updated later. It must be disabled if performance issues are experienced.

    Hide model_plot_config attributes Show model_plot_config attributes object
    • annotations_enabled boolean Generally available; Added in 7.9.0

      If true, enables calculation and storage of the model change annotations for each entity that is being analyzed.

      Default value is true.

    • enabled boolean

      If true, enables calculation and storage of the model bounds for each entity that is being analyzed.

      Default value is false.

    • terms string

      Limits data collection to this comma separated list of partition or by field values. If terms are not specified or it is an empty string, no filtering is applied. Wildcards are not supported. Only the specified terms can be viewed when using the Single Metric Viewer.

  • model_snapshot_retention_days number

    Advanced configuration option, which affects the automatic removal of old model snapshots for this job. It specifies the maximum period of time (in days) that snapshots are retained. This period is relative to the timestamp of the most recent snapshot for this job. By default, snapshots ten days older than the newest snapshot are deleted.

    Default value is 10.

  • renormalization_window_days number

    Advanced configuration option. The period over which adjustments to the score are applied, as new data is seen. The default value is the longer of 30 days or 100 bucket spans.

  • results_index_name string

    A text string that affects the name of the machine learning results index. By default, the job generates an index named .ml-anomalies-shared.

  • results_retention_days number

    Advanced configuration option. The period of time (in days) that results are retained. Age is calculated relative to the timestamp of the latest bucket result. If this property has a non-null value, once per day at 00:30 (server time), results that are the specified number of days older than the latest bucket result are deleted from Elasticsearch. The default value is null, which means all results are retained. Annotations generated by the system also count as results for retention purposes; they are deleted after the same number of days as results. Annotations added by users are retained forever.

Responses

  • 200 application/json
    Hide response attributes Show response attributes object
    • allow_lazy_open boolean Required
    • analysis_config object Required
      Hide analysis_config attributes Show analysis_config attributes object
      • bucket_span string Required

        The size of the interval that the analysis is aggregated into, typically between 5m and 1h.

      • categorization_analyzer string | object

        If categorization_field_name is specified, you can also define the analyzer that is used to interpret the categorization field. This property cannot be used at the same time as categorization_filters. The categorization analyzer specifies how the categorization_field is interpreted by the categorization process.

        One of:

        If categorization_field_name is specified, you can also define the analyzer that is used to interpret the categorization field. This property cannot be used at the same time as categorization_filters. The categorization analyzer specifies how the categorization_field is interpreted by the categorization process.

      • categorization_field_name string

        If this property is specified, the values of the specified field will be categorized. The resulting categories must be used in a detector by setting by_field_name, over_field_name, or partition_field_name to the keyword mlcategory.

      • categorization_filters array[string]

        If categorization_field_name is specified, you can also define optional filters. This property expects an array of regular expressions. The expressions are used to filter out matching sequences from the categorization field values.

      • detectors array[object] Required

        An array of detector configuration objects. Detector configuration objects specify which data fields a job analyzes. They also specify which analytical functions are used. You can specify multiple detectors for a job.

        Hide detectors attributes Show detectors attributes object
        • by_field_name string

          The field used to split the data. In particular, this property is used for analyzing the splits with respect to their own history. It is used for finding unusual values in the context of the split.

        • custom_rules array[object]

          An array of custom rule objects, which enable you to customize the way detectors operate. For example, a rule may dictate to the detector conditions under which results should be skipped. Kibana refers to custom rules as job rules.

        • detector_description string

          A description of the detector.

        • detector_index number

          A unique identifier for the detector. This identifier is based on the order of the detectors in the analysis_config, starting at zero.

        • exclude_frequent string

          Contains one of the following values: all, none, by, or over. If set, frequent entities are excluded from influencing the anomaly results. Entities can be considered frequent over time or frequent in a population. If you are working with both over and by fields, then you can set exclude_frequent to all for both fields, or to by or over for those specific fields.

          Values are all, none, by, or over.

        • field_name string

          The field that the detector uses in the function. If you use an event rate function such as count or rare, do not specify this field.

        • function string Required

          The analysis function that is used. For example, count, rare, mean, min, max, and sum.

        • over_field_name string

          The field used to split the data. In particular, this property is used for analyzing the splits with respect to the history of all splits. It is used for finding unusual values in the population of all splits.

        • partition_field_name string

          The field used to segment the analysis. When you use this property, you have completely independent baselines for each value of this field.

        • use_null boolean

          Defines whether a new series is used as the null series when there is no value for the by or partition fields.

          Default value is false.

      • influencers array[string] Required

        A comma separated list of influencer field names. Typically these can be the by, over, or partition fields that are used in the detector configuration. You might also want to use a field name that is not specifically named in a detector, but is available as part of the input data. When you use multiple detectors, the use of influencers is recommended as it aggregates results for each influencer entity.

      • model_prune_window string

        Advanced configuration option. Affects the pruning of models that have not been updated for the given time duration. The value must be set to a multiple of the bucket_span. If set too low, important information may be removed from the model. Typically, set to 30d or longer. If not set, model pruning only occurs if the model memory status reaches the soft limit or the hard limit. For jobs created in 8.1 and later, the default value is the greater of 30d or 20 times bucket_span.

      • latency string

        The size of the window in which to expect data that is out of time order. Defaults to no latency. If you specify a non-zero value, it must be greater than or equal to one second.

      • multivariate_by_fields boolean

        This functionality is reserved for internal use. It is not supported for use in customer environments and is not subject to the support SLA of official GA features. If set to true, the analysis will automatically find correlations between metrics for a given by field value and report anomalies when those correlations cease to hold.

      • per_partition_categorization object

        Settings related to how categorization interacts with partition fields.

        Hide per_partition_categorization attributes Show per_partition_categorization attributes object
        • enabled boolean

          To enable this setting, you must also set the partition_field_name property to the same value in every detector that uses the keyword mlcategory. Otherwise, job creation fails.

        • stop_on_warn boolean

          This setting can be set to true only if per-partition categorization is enabled. If true, both categorization and subsequent anomaly detection stops for partitions where the categorization status changes to warn. This setting makes it viable to have a job where it is expected that categorization works well for some partitions but not others; you do not pay the cost of bad categorization forever in the partitions where it works badly.

      • summary_count_field_name string

        If this property is specified, the data that is fed to the job is expected to be pre-summarized. This property value is the name of the field that contains the count of raw data points that have been summarized. The same summary_count_field_name applies to all detectors in the job.

    • analysis_limits object Required
      Hide analysis_limits attributes Show analysis_limits attributes object
      • categorization_examples_limit number

        The maximum number of examples stored per category in memory and in the results data store. If you increase this value, more examples are available, however it requires that you have more storage available. If you set this value to 0, no examples are stored. NOTE: The categorization_examples_limit applies only to analysis that uses categorization.

        Default value is 4.

      • model_memory_limit number | string

        The approximate maximum amount of memory resources that are required for analytical processing. Once this limit is approached, data pruning becomes more aggressive. Upon exceeding this limit, new entities are not modeled. If the xpack.ml.max_model_memory_limit setting has a value greater than 0 and less than 1024mb, that value is used instead of the default. The default value is relatively small to ensure that high resource usage is a conscious decision. If you have jobs that are expected to analyze high cardinality fields, you will likely need to use a higher value. If you specify a number instead of a string, the units are assumed to be MiB. Specifying a string is recommended for clarity. If you specify a byte size unit of b or kb and the number does not equate to a discrete number of megabytes, it is rounded down to the closest MiB. The minimum valid value is 1 MiB. If you specify a value less than 1 MiB, an error occurs. If you specify a value for the xpack.ml.max_model_memory_limit setting, an error occurs when you try to create jobs that have model_memory_limit values greater than that setting value.

        One of:

        The approximate maximum amount of memory resources that are required for analytical processing. Once this limit is approached, data pruning becomes more aggressive. Upon exceeding this limit, new entities are not modeled. If the xpack.ml.max_model_memory_limit setting has a value greater than 0 and less than 1024mb, that value is used instead of the default. The default value is relatively small to ensure that high resource usage is a conscious decision. If you have jobs that are expected to analyze high cardinality fields, you will likely need to use a higher value. If you specify a number instead of a string, the units are assumed to be MiB. Specifying a string is recommended for clarity. If you specify a byte size unit of b or kb and the number does not equate to a discrete number of megabytes, it is rounded down to the closest MiB. The minimum valid value is 1 MiB. If you specify a value less than 1 MiB, an error occurs. If you specify a value for the xpack.ml.max_model_memory_limit setting, an error occurs when you try to create jobs that have model_memory_limit values greater than that setting value.

    • background_persist_interval string

      A duration. Units can be nanos, micros, ms (milliseconds), s (seconds), m (minutes), h (hours) and d (days). Also accepts "0" without a unit and "-1" to indicate an unspecified value.

    • create_time string | number

      One of:
    • custom_settings object

      Custom metadata about the job

    • daily_model_snapshot_retention_after_days number Required
    • data_description object Required
      Hide data_description attributes Show data_description attributes object
      • format string

        Only JSON format is supported at this time.

      • time_field string

        The name of the field that contains the timestamp.

      • time_format string

        The time format, which can be epoch, epoch_ms, or a custom pattern. The value epoch refers to UNIX or Epoch time (the number of seconds since 1 Jan 1970). The value epoch_ms indicates that time is measured in milliseconds since the epoch. The epoch and epoch_ms time formats accept either integer or real values. Custom patterns must conform to the Java DateTimeFormatter class. When you use date-time formatting patterns, it is recommended that you provide the full date, time and time zone. For example: yyyy-MM-dd'T'HH:mm:ssX. If the pattern that you specify is not sufficient to produce a complete timestamp, job creation fails.

        Default value is epoch.

      • field_delimiter string
    • datafeed_config object
      Hide datafeed_config attributes Show datafeed_config attributes object
      • aggregations object
      • authorization object

        The security privileges that the datafeed uses to run its queries. If Elastic Stack security features were disabled at the time of the most recent update to the datafeed, this property is omitted.

        Hide authorization attributes Show authorization attributes object
        • api_key object

          If an API key was used for the most recent update to the datafeed, its name and identifier are listed in the response.

        • roles array[string]

          If a user ID was used for the most recent update to the datafeed, its roles at the time of the update are listed in the response.

        • service_account string

          If a service account was used for the most recent update to the datafeed, the account name is listed in the response.

      • chunking_config object
        Hide chunking_config attributes Show chunking_config attributes object
        • mode string Required

          If the mode is auto, the chunk size is dynamically calculated; this is the recommended value when the datafeed does not use aggregations. If the mode is manual, chunking is applied according to the specified time_span; use this mode when the datafeed uses aggregations. If the mode is off, no chunking is applied.

          Values are auto, manual, or off.

        • time_span string

          The time span that each search will be querying. This setting is applicable only when the mode is set to manual.

      • datafeed_id string Required
      • frequency string

        A duration. Units can be nanos, micros, ms (milliseconds), s (seconds), m (minutes), h (hours) and d (days). Also accepts "0" without a unit and "-1" to indicate an unspecified value.

      • indices array[string] Required
      • indexes array[string]
      • job_id string Required
      • max_empty_searches number
      • query_delay string

        A duration. Units can be nanos, micros, ms (milliseconds), s (seconds), m (minutes), h (hours) and d (days). Also accepts "0" without a unit and "-1" to indicate an unspecified value.

      • script_fields object
        Hide script_fields attribute Show script_fields attribute object
        • * object Additional properties
          Hide * attributes Show * attributes object
          • script object Required
          • ignore_failure boolean
      • scroll_size number
      • delayed_data_check_config object Required
        Hide delayed_data_check_config attributes Show delayed_data_check_config attributes object
        • check_window string

          The window of time that is searched for late data. This window of time ends with the latest finalized bucket. It defaults to null, which causes an appropriate check_window to be calculated when the real-time datafeed runs. In particular, the default check_window span calculation is based on the maximum of 2h or 8 * bucket_span.

        • enabled boolean Required

          Specifies whether the datafeed periodically checks for delayed data.

      • runtime_mappings object
        Hide runtime_mappings attribute Show runtime_mappings attribute object
        • * object Additional properties
          Hide * attributes Show * attributes object
          • fields object

            For type composite

          • fetch_fields array[object]

            For type lookup

          • format string

            A custom format for date type runtime fields.

      • indices_options object

        Controls how to deal with unavailable concrete indices (closed or missing), how wildcard expressions are expanded to actual indices (all, closed or open indices) and how to deal with wildcard expressions that resolve to no indices.

        Hide indices_options attributes Show indices_options attributes object
        • allow_no_indices boolean

          If false, the request returns an error if any wildcard expression, index alias, or _all value targets only missing or closed indices. This behavior applies even if the request targets other open indices. For example, a request targeting foo*,bar* returns an error if an index starts with foo but no index starts with bar.

        • expand_wildcards string | array[string]

          Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such as open,hidden.

          Supported values include:

          • all: Match any data stream or index, including hidden ones.
          • open: Match open, non-hidden indices. Also matches any non-hidden data stream.
          • closed: Match closed, non-hidden indices. Also matches any non-hidden data stream. Data streams cannot be closed.
          • hidden: Match hidden data streams and hidden indices. Must be combined with open, closed, or both.
          • none: Wildcard expressions are not accepted.
        • ignore_unavailable boolean

          If true, missing or closed indices are not included in the response.

          Default value is false.

        • ignore_throttled boolean

          If true, concrete, expanded or aliased indices are ignored when frozen.

          Default value is true.

      • query object Required

        The Elasticsearch query domain-specific language (DSL). This value corresponds to the query object in an Elasticsearch search POST body. All the options that are supported by Elasticsearch can be used, as this object is passed verbatim to Elasticsearch. By default, this property has the following value: {"match_all": {"boost": 1}}.

        Query DSL
    • description string
    • groups array[string]
    • job_id string Required
    • job_type string Required
    • job_version string Required
    • model_plot_config object
      Hide model_plot_config attributes Show model_plot_config attributes object
      • annotations_enabled boolean Generally available; Added in 7.9.0

        If true, enables calculation and storage of the model change annotations for each entity that is being analyzed.

        Default value is true.

      • enabled boolean

        If true, enables calculation and storage of the model bounds for each entity that is being analyzed.

        Default value is false.

      • terms string

        Limits data collection to this comma separated list of partition or by field values. If terms are not specified or it is an empty string, no filtering is applied. Wildcards are not supported. Only the specified terms can be viewed when using the Single Metric Viewer.

    • model_snapshot_id string
    • model_snapshot_retention_days number Required
    • renormalization_window_days number
    • results_index_name string Required
    • results_retention_days number
PUT /_ml/anomaly_detectors/{job_id}
PUT /_ml/anomaly_detectors/job-01
{
  "analysis_config": {
    "bucket_span": "15m",
    "detectors": [
      {
        "detector_description": "Sum of bytes",
        "function": "sum",
        "field_name": "bytes"
      }
    ]
  },
  "data_description": {
    "time_field": "timestamp",
    "time_format": "epoch_ms"
  },
  "analysis_limits": {
    "model_memory_limit": "11MB"
  },
  "model_plot_config": {
    "enabled": true,
    "annotations_enabled": true
  },
  "results_index_name": "test-job1",
  "datafeed_config": {
    "indices": [
      "kibana_sample_data_logs"
    ],
    "query": {
      "bool": {
        "must": [
          {
            "match_all": {}
          }
        ]
      }
    },
    "runtime_mappings": {
      "hour_of_day": {
        "type": "long",
        "script": {
          "source": "emit(doc['timestamp'].value.getHour());"
        }
      }
    },
    "datafeed_id": "datafeed-test-job1"
  }
}
resp = client.ml.put_job(
    job_id="job-01",
    analysis_config={
        "bucket_span": "15m",
        "detectors": [
            {
                "detector_description": "Sum of bytes",
                "function": "sum",
                "field_name": "bytes"
            }
        ]
    },
    data_description={
        "time_field": "timestamp",
        "time_format": "epoch_ms"
    },
    analysis_limits={
        "model_memory_limit": "11MB"
    },
    model_plot_config={
        "enabled": True,
        "annotations_enabled": True
    },
    results_index_name="test-job1",
    datafeed_config={
        "indices": [
            "kibana_sample_data_logs"
        ],
        "query": {
            "bool": {
                "must": [
                    {
                        "match_all": {}
                    }
                ]
            }
        },
        "runtime_mappings": {
            "hour_of_day": {
                "type": "long",
                "script": {
                    "source": "emit(doc['timestamp'].value.getHour());"
                }
            }
        },
        "datafeed_id": "datafeed-test-job1"
    },
)
const response = await client.ml.putJob({
  job_id: "job-01",
  analysis_config: {
    bucket_span: "15m",
    detectors: [
      {
        detector_description: "Sum of bytes",
        function: "sum",
        field_name: "bytes",
      },
    ],
  },
  data_description: {
    time_field: "timestamp",
    time_format: "epoch_ms",
  },
  analysis_limits: {
    model_memory_limit: "11MB",
  },
  model_plot_config: {
    enabled: true,
    annotations_enabled: true,
  },
  results_index_name: "test-job1",
  datafeed_config: {
    indices: ["kibana_sample_data_logs"],
    query: {
      bool: {
        must: [
          {
            match_all: {},
          },
        ],
      },
    },
    runtime_mappings: {
      hour_of_day: {
        type: "long",
        script: {
          source: "emit(doc['timestamp'].value.getHour());",
        },
      },
    },
    datafeed_id: "datafeed-test-job1",
  },
});
response = client.ml.put_job(
  job_id: "job-01",
  body: {
    "analysis_config": {
      "bucket_span": "15m",
      "detectors": [
        {
          "detector_description": "Sum of bytes",
          "function": "sum",
          "field_name": "bytes"
        }
      ]
    },
    "data_description": {
      "time_field": "timestamp",
      "time_format": "epoch_ms"
    },
    "analysis_limits": {
      "model_memory_limit": "11MB"
    },
    "model_plot_config": {
      "enabled": true,
      "annotations_enabled": true
    },
    "results_index_name": "test-job1",
    "datafeed_config": {
      "indices": [
        "kibana_sample_data_logs"
      ],
      "query": {
        "bool": {
          "must": [
            {
              "match_all": {}
            }
          ]
        }
      },
      "runtime_mappings": {
        "hour_of_day": {
          "type": "long",
          "script": {
            "source": "emit(doc['timestamp'].value.getHour());"
          }
        }
      },
      "datafeed_id": "datafeed-test-job1"
    }
  }
)
$resp = $client->ml()->putJob([
    "job_id" => "job-01",
    "body" => [
        "analysis_config" => [
            "bucket_span" => "15m",
            "detectors" => array(
                [
                    "detector_description" => "Sum of bytes",
                    "function" => "sum",
                    "field_name" => "bytes",
                ],
            ),
        ],
        "data_description" => [
            "time_field" => "timestamp",
            "time_format" => "epoch_ms",
        ],
        "analysis_limits" => [
            "model_memory_limit" => "11MB",
        ],
        "model_plot_config" => [
            "enabled" => true,
            "annotations_enabled" => true,
        ],
        "results_index_name" => "test-job1",
        "datafeed_config" => [
            "indices" => array(
                "kibana_sample_data_logs",
            ),
            "query" => [
                "bool" => [
                    "must" => array(
                        [
                            "match_all" => new ArrayObject([]),
                        ],
                    ),
                ],
            ],
            "runtime_mappings" => [
                "hour_of_day" => [
                    "type" => "long",
                    "script" => [
                        "source" => "emit(doc['timestamp'].value.getHour());",
                    ],
                ],
            ],
            "datafeed_id" => "datafeed-test-job1",
        ],
    ],
]);
curl -X PUT -H "Authorization: ApiKey $ELASTIC_API_KEY" -H "Content-Type: application/json" -d '{"analysis_config":{"bucket_span":"15m","detectors":[{"detector_description":"Sum of bytes","function":"sum","field_name":"bytes"}]},"data_description":{"time_field":"timestamp","time_format":"epoch_ms"},"analysis_limits":{"model_memory_limit":"11MB"},"model_plot_config":{"enabled":true,"annotations_enabled":true},"results_index_name":"test-job1","datafeed_config":{"indices":["kibana_sample_data_logs"],"query":{"bool":{"must":[{"match_all":{}}]}},"runtime_mappings":{"hour_of_day":{"type":"long","script":{"source":"emit(doc['"'"'timestamp'"'"'].value.getHour());"}}},"datafeed_id":"datafeed-test-job1"}}' "$ELASTICSEARCH_URL/_ml/anomaly_detectors/job-01"
client.ml().putJob(p -> p
    .analysisConfig(a -> a
        .bucketSpan(b -> b
            .time("15m")
        )
        .detectors(d -> d
            .detectorDescription("Sum of bytes")
            .fieldName("bytes")
            .function("sum")
        )
    )
    .analysisLimits(an -> an
        .modelMemoryLimit("11MB")
    )
    .dataDescription(d -> d
        .timeField("timestamp")
        .timeFormat("epoch_ms")
    )
    .datafeedConfig(d -> d
        .datafeedId("datafeed-test-job1")
        .indices("kibana_sample_data_logs")
        .query(q -> q
            .bool(b -> b
                .must(m -> m
                    .matchAll(ma -> ma)
                )
            )
        )
        .runtimeMappings("hour_of_day", r -> r
            .script(s -> s
                .source(so -> so
                    .scriptString("emit(doc['timestamp'].value.getHour());")
                )
            )
            .type(RuntimeFieldType.Long)
        )
    )
    .jobId("job-01")
    .modelPlotConfig(m -> m
        .annotationsEnabled(true)
        .enabled(true)
    )
    .resultsIndexName("test-job1")
);
Request example
A request to create an anomaly detection job and datafeed.
{
  "analysis_config": {
    "bucket_span": "15m",
    "detectors": [
      {
        "detector_description": "Sum of bytes",
        "function": "sum",
        "field_name": "bytes"
      }
    ]
  },
  "data_description": {
    "time_field": "timestamp",
    "time_format": "epoch_ms"
  },
  "analysis_limits": {
    "model_memory_limit": "11MB"
  },
  "model_plot_config": {
    "enabled": true,
    "annotations_enabled": true
  },
  "results_index_name": "test-job1",
  "datafeed_config": {
    "indices": [
      "kibana_sample_data_logs"
    ],
    "query": {
      "bool": {
        "must": [
          {
            "match_all": {}
          }
        ]
      }
    },
    "runtime_mappings": {
      "hour_of_day": {
        "type": "long",
        "script": {
          "source": "emit(doc['timestamp'].value.getHour());"
        }
      }
    },
    "datafeed_id": "datafeed-test-job1"
  }
}
Response examples (200)
A successful response when creating an anomaly detection job and datafeed.
{
  "job_id": "test-job1",
  "job_type": "anomaly_detector",
  "job_version": "8.4.0",
  "create_time": 1656087283340,
  "datafeed_config": {
    "datafeed_id": "datafeed-test-job1",
    "job_id": "test-job1",
    "authorization": {
      "roles": [
        "superuser"
      ]
    },
    "query_delay": "61499ms",
    "chunking_config": {
      "mode": "auto"
    },
    "indices_options": {
      "expand_wildcards": [
        "open"
      ],
      "ignore_unavailable": false,
      "allow_no_indices": true,
      "ignore_throttled": true
    },
    "query": {
      "bool": {
        "must": [
          {
            "match_all": {}
          }
        ]
      }
    },
    "indices": [
      "kibana_sample_data_logs"
    ],
    "scroll_size": 1000,
    "delayed_data_check_config": {
      "enabled": true
    },
    "runtime_mappings": {
      "hour_of_day": {
        "type": "long",
        "script": {
          "source": "emit(doc['timestamp'].value.getHour());"
        }
      }
    }
  },
  "analysis_config": {
    "bucket_span": "15m",
    "detectors": [
      {
        "detector_description": "Sum of bytes",
        "function": "sum",
        "field_name": "bytes",
        "detector_index": 0
      }
    ],
    "influencers": [],
    "model_prune_window": "30d"
  },
  "analysis_limits": {
    "model_memory_limit": "11mb",
    "categorization_examples_limit": 4
  },
  "data_description": {
    "time_field": "timestamp",
    "time_format": "epoch_ms"
  },
  "model_plot_config": {
    "enabled": true,
    "annotations_enabled": true
  },
  "model_snapshot_retention_days": 10,
  "daily_model_snapshot_retention_after_days": 1,
  "results_index_name": "custom-test-job1",
  "allow_lazy_open": false
}




































Get anomaly detection job results for categories Generally available; Added in 5.4.0

POST /_ml/anomaly_detectors/{job_id}/results/categories/{category_id}

All methods and paths for this operation:

GET /_ml/anomaly_detectors/{job_id}/results/categories

POST /_ml/anomaly_detectors/{job_id}/results/categories
GET /_ml/anomaly_detectors/{job_id}/results/categories/{category_id}
POST /_ml/anomaly_detectors/{job_id}/results/categories/{category_id}

Required authorization

  • Cluster privileges: monitor_ml

Path parameters

  • job_id string Required

    Identifier for the anomaly detection job.

  • category_id string

    Identifier for the category, which is unique in the job. If you specify neither the category ID nor the partition_field_value, the API returns information about all categories. If you specify only the partition_field_value, it returns information about all categories for the specified partition.

Query parameters

  • from number

    Skips the specified number of categories.

  • partition_field_value string

    Only return categories for the specified partition.

  • size number

    Specifies the maximum number of categories to obtain.

application/json

Body

  • page object

    Configures pagination. This parameter has the from and size properties.

    Hide page attributes Show page attributes object
    • from number

      Skips the specified number of items.

      Default value is 0.

    • size number

      Specifies the maximum number of items to obtain.

      Default value is 10000.

Responses

  • 200 application/json
    Hide response attributes Show response attributes object
    • categories array[object] Required
      Hide categories attributes Show categories attributes object
      • category_id number Required

        A unique identifier for the category. category_id is unique at the job level, even when per-partition categorization is enabled.

      • examples array[string] Required

        A list of examples of actual values that matched the category.

      • grok_pattern string

        [experimental] A Grok pattern that could be used in Logstash or an ingest pipeline to extract fields from messages that match the category. This field is experimental and may be changed or removed in a future release. The Grok patterns that are found are not optimal, but are often a good starting point for manual tweaking.

      • job_id string Required

        Identifier for the anomaly detection job.

      • max_matching_length number Required

        The maximum length of the fields that matched the category. The value is increased by 10% to enable matching for similar fields that have not been analyzed.

      • partition_field_name string

        If per-partition categorization is enabled, this property identifies the field used to segment the categorization. It is not present when per-partition categorization is disabled.

      • partition_field_value string

        If per-partition categorization is enabled, this property identifies the value of the partition_field_name for the category. It is not present when per-partition categorization is disabled.

      • regex string Required

        A regular expression that is used to search for values that match the category.

      • terms string Required

        A space separated list of the common tokens that are matched in values of the category.

      • num_matches number

        The number of messages that have been matched by this category. This is only guaranteed to have the latest accurate count after a job _flush or _close

      • preferred_to_categories array[string]

        A list of category_id entries that this current category encompasses. Any new message that is processed by the categorizer will match against this category and not any of the categories in this list. This is only guaranteed to have the latest accurate list of categories after a job _flush or _close

      • p string
      • result_type string Required
      • mlcategory string Required
    • count number Required
POST /_ml/anomaly_detectors/{job_id}/results/categories/{category_id}
GET _ml/anomaly_detectors/esxi_log/results/categories
{
  "page":{
    "size": 1
  }
}
resp = client.ml.get_categories(
    job_id="esxi_log",
    page={
        "size": 1
    },
)
const response = await client.ml.getCategories({
  job_id: "esxi_log",
  page: {
    size: 1,
  },
});
response = client.ml.get_categories(
  job_id: "esxi_log",
  body: {
    "page": {
      "size": 1
    }
  }
)
$resp = $client->ml()->getCategories([
    "job_id" => "esxi_log",
    "body" => [
        "page" => [
            "size" => 1,
        ],
    ],
]);
curl -X GET -H "Authorization: ApiKey $ELASTIC_API_KEY" -H "Content-Type: application/json" -d '{"page":{"size":1}}' "$ELASTICSEARCH_URL/_ml/anomaly_detectors/esxi_log/results/categories"
client.ml().getCategories(g -> g
    .jobId("esxi_log")
    .page(p -> p
        .size(1)
    )
);
Request example
An example body for a `GET _ml/anomaly_detectors/esxi_log/results/categories` request.
{
  "page":{
    "size": 1
  }
}








































Revert to a snapshot Generally available; Added in 5.4.0

POST /_ml/anomaly_detectors/{job_id}/model_snapshots/{snapshot_id}/_revert

The machine learning features react quickly to anomalous input, learning new behaviors in data. Highly anomalous input increases the variance in the models whilst the system learns whether this is a new step-change in behavior or a one-off event. In the case where this anomalous input is known to be a one-off, then it might be appropriate to reset the model state to a time before this event. For example, you might consider reverting to a saved snapshot after Black Friday or a critical system failure.

Required authorization

  • Cluster privileges: manage_ml

Path parameters

  • job_id string Required

    Identifier for the anomaly detection job.

  • snapshot_id string Required

    You can specify empty as the . Reverting to the empty snapshot means the anomaly detection job starts learning a new model from scratch when it is started.

Query parameters

  • delete_intervening_results boolean

    If true, deletes the results in the time period between the latest results and the time of the reverted snapshot. It also resets the model to accept records for this time period. If you choose not to delete intervening results when reverting a snapshot, the job will not accept input data that is older than the current time. If you want to resend data, then delete the intervening results.

application/json

Body

  • delete_intervening_results boolean

    Refer to the description for the delete_intervening_results query parameter.

    Default value is false.

Responses

  • 200 application/json
    Hide response attribute Show response attribute object
    • model object Required
      Hide model attributes Show model attributes object
      • description string

        An optional description of the job.

      • job_id string Required

        A numerical character string that uniquely identifies the job that the snapshot was created for.

      • latest_record_time_stamp number

        The timestamp of the latest processed record.

      • latest_result_time_stamp number

        The timestamp of the latest bucket result.

      • min_version string Required

        The minimum version required to be able to restore the model snapshot.

      • model_size_stats object

        Summary information describing the model.

        Hide model_size_stats attributes Show model_size_stats attributes object
        • bucket_allocation_failures_count number Required
        • job_id string Required
        • log_time
        • memory_status string Required

          Values are ok, soft_limit, or hard_limit.

        • model_bytes
        • model_bytes_exceeded
        • model_bytes_memory_limit
        • output_memory_allocator_bytes
        • peak_model_bytes
        • assignment_memory_basis string
        • result_type string Required
        • total_by_field_count number Required
        • total_over_field_count number Required
        • total_partition_field_count number Required
        • categorization_status string Required

          Values are ok or warn.

        • categorized_doc_count number Required
        • dead_category_count number Required
        • failed_category_count number Required
        • frequent_category_count number Required
        • rare_category_count number Required
        • total_category_count number Required
        • timestamp number
      • retain boolean Required

        If true, this snapshot will not be deleted during automatic cleanup of snapshots older than model_snapshot_retention_days. However, this snapshot will be deleted when the job is deleted. The default value is false.

      • snapshot_doc_count number Required

        For internal use only.

      • snapshot_id string Required

        A numerical character string that uniquely identifies the model snapshot.

      • timestamp number Required

        The creation timestamp for the snapshot.

POST /_ml/anomaly_detectors/{job_id}/model_snapshots/{snapshot_id}/_revert
POST _ml/anomaly_detectors/low_request_rate/model_snapshots/1637092688/_revert
{
  "delete_intervening_results": true
}
resp = client.ml.revert_model_snapshot(
    job_id="low_request_rate",
    snapshot_id="1637092688",
    delete_intervening_results=True,
)
const response = await client.ml.revertModelSnapshot({
  job_id: "low_request_rate",
  snapshot_id: 1637092688,
  delete_intervening_results: true,
});
response = client.ml.revert_model_snapshot(
  job_id: "low_request_rate",
  snapshot_id: "1637092688",
  body: {
    "delete_intervening_results": true
  }
)
$resp = $client->ml()->revertModelSnapshot([
    "job_id" => "low_request_rate",
    "snapshot_id" => "1637092688",
    "body" => [
        "delete_intervening_results" => true,
    ],
]);
curl -X POST -H "Authorization: ApiKey $ELASTIC_API_KEY" -H "Content-Type: application/json" -d '{"delete_intervening_results":true}' "$ELASTICSEARCH_URL/_ml/anomaly_detectors/low_request_rate/model_snapshots/1637092688/_revert"
client.ml().revertModelSnapshot(r -> r
    .deleteInterveningResults(true)
    .jobId("low_request_rate")
    .snapshotId("1637092688")
);
Request example
An example body for a `POST _ml/anomaly_detectors/low_request_rate/model_snapshots/1637092688/_revert` request.
{
  "delete_intervening_results": true
}

Start datafeeds Generally available; Added in 5.5.0

POST /_ml/datafeeds/{datafeed_id}/_start

A datafeed must be started in order to retrieve data from Elasticsearch. A datafeed can be started and stopped multiple times throughout its lifecycle.

Before you can start a datafeed, the anomaly detection job must be open. Otherwise, an error occurs.

If you restart a stopped datafeed, it continues processing input data from the next millisecond after it was stopped. If new data was indexed for that exact millisecond between stopping and starting, it will be ignored.

When Elasticsearch security features are enabled, your datafeed remembers which roles the last user to create or update it had at the time of creation or update and runs the query using those same roles. If you provided secondary authorization headers when you created or updated the datafeed, those credentials are used instead.

Required authorization

  • Cluster privileges: manage_ml

Path parameters

  • datafeed_id string Required

    A numerical character string that uniquely identifies the datafeed. This identifier can contain lowercase alphanumeric characters (a-z and 0-9), hyphens, and underscores. It must start and end with alphanumeric characters.

Query parameters

  • end string | number

    The time that the datafeed should end, which can be specified by using one of the following formats:

    • ISO 8601 format with milliseconds, for example 2017-01-22T06:00:00.000Z
    • ISO 8601 format without milliseconds, for example 2017-01-22T06:00:00+00:00
    • Milliseconds since the epoch, for example 1485061200000

    Date-time arguments using either of the ISO 8601 formats must have a time zone designator, where Z is accepted as an abbreviation for UTC time. When a URL is expected (for example, in browsers), the + used in time zone designators must be encoded as %2B. The end time value is exclusive. If you do not specify an end time, the datafeed runs continuously.

  • start string | number

    The time that the datafeed should begin, which can be specified by using the same formats as the end parameter. This value is inclusive. If you do not specify a start time and the datafeed is associated with a new anomaly detection job, the analysis starts from the earliest time for which data is available. If you restart a stopped datafeed and specify a start value that is earlier than the timestamp of the latest processed record, the datafeed continues from 1 millisecond after the timestamp of the latest processed record.

  • timeout string

    Specifies the amount of time to wait until a datafeed starts.

    Values are -1 or 0.

application/json

Body

  • end string | number

    Refer to the description for the end query parameter.

    One of:

    Refer to the description for the end query parameter.

  • start string | number

    Refer to the description for the start query parameter.

    One of:

    Refer to the description for the start query parameter.

  • timeout string

    Refer to the description for the timeout query parameter.

Responses

  • 200 application/json
    Hide response attributes Show response attributes object
    • node string | array[string] Required

      The ID of the node that the job was started on. In serverless this will be the "serverless". If the job is allowed to open lazily and has not yet been assigned to a node, this value is an empty string.

    • started boolean Required

      For a successful response, this value is always true. On failure, an exception is returned instead.

POST /_ml/datafeeds/{datafeed_id}/_start
POST _ml/datafeeds/datafeed-low_request_rate/_start
{
  "start": "2019-04-07T18:22:16Z"
}
resp = client.ml.start_datafeed(
    datafeed_id="datafeed-low_request_rate",
    start="2019-04-07T18:22:16Z",
)
const response = await client.ml.startDatafeed({
  datafeed_id: "datafeed-low_request_rate",
  start: "2019-04-07T18:22:16Z",
});
response = client.ml.start_datafeed(
  datafeed_id: "datafeed-low_request_rate",
  body: {
    "start": "2019-04-07T18:22:16Z"
  }
)
$resp = $client->ml()->startDatafeed([
    "datafeed_id" => "datafeed-low_request_rate",
    "body" => [
        "start" => "2019-04-07T18:22:16Z",
    ],
]);
curl -X POST -H "Authorization: ApiKey $ELASTIC_API_KEY" -H "Content-Type: application/json" -d '{"start":"2019-04-07T18:22:16Z"}' "$ELASTICSEARCH_URL/_ml/datafeeds/datafeed-low_request_rate/_start"
client.ml().startDatafeed(s -> s
    .datafeedId("datafeed-low_request_rate")
    .start(DateTime.of("2019-04-07T18:22:16Z"))
);
Request example
An example body for a `POST _ml/datafeeds/datafeed-low_request_rate/_start` request.
{
  "start": "2019-04-07T18:22:16Z"
}












Update an anomaly detection job Generally available; Added in 5.5.0

POST /_ml/anomaly_detectors/{job_id}/_update

Updates certain properties of an anomaly detection job.

Required authorization

  • Cluster privileges: manage_ml

Path parameters

  • job_id string Required

    Identifier for the job.

application/json

Body Required

  • allow_lazy_open boolean

    Advanced configuration option. Specifies whether this job can open when there is insufficient machine learning node capacity for it to be immediately assigned to a node. If false and a machine learning node with capacity to run the job cannot immediately be found, the open anomaly detection jobs API returns an error. However, this is also subject to the cluster-wide xpack.ml.max_lazy_ml_nodes setting. If this option is set to true, the open anomaly detection jobs API does not return an error and the job waits in the opening state until sufficient machine learning node capacity is available.

    Default value is false.

  • analysis_limits object
    Hide analysis_limits attribute Show analysis_limits attribute object
    • model_memory_limit string Required

      Limits can be applied for the resources required to hold the mathematical models in memory. These limits are approximate and can be set per job. They do not control the memory used by other processes, for example the Elasticsearch Java processes.

  • background_persist_interval string

    Advanced configuration option. The time between each periodic persistence of the model. The default value is a randomized value between 3 to 4 hours, which avoids all jobs persisting at exactly the same time. The smallest allowed value is 1 hour. For very large models (several GB), persistence could take 10-20 minutes, so do not set the value too low. If the job is open when you make the update, you must stop the datafeed, close the job, then reopen the job and restart the datafeed for the changes to take effect.

  • custom_settings object

    Advanced configuration option. Contains custom meta data about the job. For example, it can contain custom URL information as shown in Adding custom URLs to machine learning results.

    Hide custom_settings attribute Show custom_settings attribute object
    • * object Additional properties
  • categorization_filters array[string]
  • description string

    A description of the job.

  • model_plot_config object
    Hide model_plot_config attributes Show model_plot_config attributes object
    • annotations_enabled boolean Generally available; Added in 7.9.0

      If true, enables calculation and storage of the model change annotations for each entity that is being analyzed.

      Default value is true.

    • enabled boolean

      If true, enables calculation and storage of the model bounds for each entity that is being analyzed.

      Default value is false.

    • terms string

      Limits data collection to this comma separated list of partition or by field values. If terms are not specified or it is an empty string, no filtering is applied. Wildcards are not supported. Only the specified terms can be viewed when using the Single Metric Viewer.

  • model_prune_window string

    A duration. Units can be nanos, micros, ms (milliseconds), s (seconds), m (minutes), h (hours) and d (days). Also accepts "0" without a unit and "-1" to indicate an unspecified value.

  • daily_model_snapshot_retention_after_days number

    Advanced configuration option, which affects the automatic removal of old model snapshots for this job. It specifies a period of time (in days) after which only the first snapshot per day is retained. This period is relative to the timestamp of the most recent snapshot for this job. Valid values range from 0 to model_snapshot_retention_days. For jobs created before version 7.8.0, the default value matches model_snapshot_retention_days.

    Default value is 1.

  • model_snapshot_retention_days number

    Advanced configuration option, which affects the automatic removal of old model snapshots for this job. It specifies the maximum period of time (in days) that snapshots are retained. This period is relative to the timestamp of the most recent snapshot for this job.

    Default value is 10.

  • renormalization_window_days number

    Advanced configuration option. The period over which adjustments to the score are applied, as new data is seen.

  • results_retention_days number

    Advanced configuration option. The period of time (in days) that results are retained. Age is calculated relative to the timestamp of the latest bucket result. If this property has a non-null value, once per day at 00:30 (server time), results that are the specified number of days older than the latest bucket result are deleted from Elasticsearch. The default value is null, which means all results are retained.

  • groups array[string]

    A list of job groups. A job can belong to no groups or many.

  • detectors array[object]

    An array of detector update objects.

    Hide detectors attributes Show detectors attributes object
    • detector_index number Required

      A unique identifier for the detector. This identifier is based on the order of the detectors in the analysis_config, starting at zero.

    • description string

      A description of the detector.

    • custom_rules array[object]

      An array of custom rule objects, which enable you to customize the way detectors operate. For example, a rule may dictate to the detector conditions under which results should be skipped. Kibana refers to custom rules as job rules.

      Hide custom_rules attributes Show custom_rules attributes object
      • actions array[string]

        The set of actions to be triggered when the rule applies. If more than one action is specified the effects of all actions are combined.

        Supported values include:

        • skip_result: The result will not be created. Unless you also specify skip_model_update, the model will be updated as usual with the corresponding series value.
        • skip_model_update: The value for that series will not be used to update the model. Unless you also specify skip_result, the results will be created as usual. This action is suitable when certain values are expected to be consistently anomalous and they affect the model in a way that negatively impacts the rest of the results.

        Values are skip_result or skip_model_update. Default value is ["skip_result"].

      • conditions array[object]

        An array of numeric conditions when the rule applies. A rule must either have a non-empty scope or at least one condition. Multiple conditions are combined together with a logical AND.

        Hide conditions attributes Show conditions attributes object
        • applies_to
        • operator
        • value number Required

          The value that is compared against the applies_to field using the operator.

      • scope object

        A scope of series where the rule applies. A rule must either have a non-empty scope or at least one condition. By default, the scope includes all series. Scoping is allowed for any of the fields that are also specified in by_field_name, over_field_name, or partition_field_name.

        Hide scope attribute Show scope attribute object
        • * object Additional properties
          Hide * attributes Show * attributes object
          • filter_id
          • filter_type
  • per_partition_categorization object

    Settings related to how categorization interacts with partition fields.

    Hide per_partition_categorization attributes Show per_partition_categorization attributes object
    • enabled boolean

      To enable this setting, you must also set the partition_field_name property to the same value in every detector that uses the keyword mlcategory. Otherwise, job creation fails.

    • stop_on_warn boolean

      This setting can be set to true only if per-partition categorization is enabled. If true, both categorization and subsequent anomaly detection stops for partitions where the categorization status changes to warn. This setting makes it viable to have a job where it is expected that categorization works well for some partitions but not others; you do not pay the cost of bad categorization forever in the partitions where it works badly.

Responses

  • 200 application/json
    Hide response attributes Show response attributes object
    • allow_lazy_open boolean Required
    • analysis_config object Required
      Hide analysis_config attributes Show analysis_config attributes object
      • bucket_span string Required

        The size of the interval that the analysis is aggregated into, typically between 5m and 1h.

      • categorization_analyzer string | object

        If categorization_field_name is specified, you can also define the analyzer that is used to interpret the categorization field. This property cannot be used at the same time as categorization_filters. The categorization analyzer specifies how the categorization_field is interpreted by the categorization process.

        One of:

        If categorization_field_name is specified, you can also define the analyzer that is used to interpret the categorization field. This property cannot be used at the same time as categorization_filters. The categorization analyzer specifies how the categorization_field is interpreted by the categorization process.

      • categorization_field_name string

        If this property is specified, the values of the specified field will be categorized. The resulting categories must be used in a detector by setting by_field_name, over_field_name, or partition_field_name to the keyword mlcategory.

      • categorization_filters array[string]

        If categorization_field_name is specified, you can also define optional filters. This property expects an array of regular expressions. The expressions are used to filter out matching sequences from the categorization field values.

      • detectors array[object] Required

        An array of detector configuration objects. Detector configuration objects specify which data fields a job analyzes. They also specify which analytical functions are used. You can specify multiple detectors for a job.

        Hide detectors attributes Show detectors attributes object
        • by_field_name string

          The field used to split the data. In particular, this property is used for analyzing the splits with respect to their own history. It is used for finding unusual values in the context of the split.

        • custom_rules array[object]

          An array of custom rule objects, which enable you to customize the way detectors operate. For example, a rule may dictate to the detector conditions under which results should be skipped. Kibana refers to custom rules as job rules.

        • detector_description string

          A description of the detector.

        • detector_index number

          A unique identifier for the detector. This identifier is based on the order of the detectors in the analysis_config, starting at zero.

        • exclude_frequent string

          Contains one of the following values: all, none, by, or over. If set, frequent entities are excluded from influencing the anomaly results. Entities can be considered frequent over time or frequent in a population. If you are working with both over and by fields, then you can set exclude_frequent to all for both fields, or to by or over for those specific fields.

          Values are all, none, by, or over.

        • field_name string

          The field that the detector uses in the function. If you use an event rate function such as count or rare, do not specify this field.

        • function string Required

          The analysis function that is used. For example, count, rare, mean, min, max, and sum.

        • over_field_name string

          The field used to split the data. In particular, this property is used for analyzing the splits with respect to the history of all splits. It is used for finding unusual values in the population of all splits.

        • partition_field_name string

          The field used to segment the analysis. When you use this property, you have completely independent baselines for each value of this field.

        • use_null boolean

          Defines whether a new series is used as the null series when there is no value for the by or partition fields.

          Default value is false.

      • influencers array[string] Required

        A comma separated list of influencer field names. Typically these can be the by, over, or partition fields that are used in the detector configuration. You might also want to use a field name that is not specifically named in a detector, but is available as part of the input data. When you use multiple detectors, the use of influencers is recommended as it aggregates results for each influencer entity.

      • model_prune_window string

        Advanced configuration option. Affects the pruning of models that have not been updated for the given time duration. The value must be set to a multiple of the bucket_span. If set too low, important information may be removed from the model. Typically, set to 30d or longer. If not set, model pruning only occurs if the model memory status reaches the soft limit or the hard limit. For jobs created in 8.1 and later, the default value is the greater of 30d or 20 times bucket_span.

      • latency string

        The size of the window in which to expect data that is out of time order. Defaults to no latency. If you specify a non-zero value, it must be greater than or equal to one second.

      • multivariate_by_fields boolean

        This functionality is reserved for internal use. It is not supported for use in customer environments and is not subject to the support SLA of official GA features. If set to true, the analysis will automatically find correlations between metrics for a given by field value and report anomalies when those correlations cease to hold.

      • per_partition_categorization object

        Settings related to how categorization interacts with partition fields.

        Hide per_partition_categorization attributes Show per_partition_categorization attributes object
        • enabled boolean

          To enable this setting, you must also set the partition_field_name property to the same value in every detector that uses the keyword mlcategory. Otherwise, job creation fails.

        • stop_on_warn boolean

          This setting can be set to true only if per-partition categorization is enabled. If true, both categorization and subsequent anomaly detection stops for partitions where the categorization status changes to warn. This setting makes it viable to have a job where it is expected that categorization works well for some partitions but not others; you do not pay the cost of bad categorization forever in the partitions where it works badly.

      • summary_count_field_name string

        If this property is specified, the data that is fed to the job is expected to be pre-summarized. This property value is the name of the field that contains the count of raw data points that have been summarized. The same summary_count_field_name applies to all detectors in the job.

    • analysis_limits object Required
      Hide analysis_limits attributes Show analysis_limits attributes object
      • categorization_examples_limit number

        The maximum number of examples stored per category in memory and in the results data store. If you increase this value, more examples are available, however it requires that you have more storage available. If you set this value to 0, no examples are stored. NOTE: The categorization_examples_limit applies only to analysis that uses categorization.

        Default value is 4.

      • model_memory_limit number | string

        The approximate maximum amount of memory resources that are required for analytical processing. Once this limit is approached, data pruning becomes more aggressive. Upon exceeding this limit, new entities are not modeled. If the xpack.ml.max_model_memory_limit setting has a value greater than 0 and less than 1024mb, that value is used instead of the default. The default value is relatively small to ensure that high resource usage is a conscious decision. If you have jobs that are expected to analyze high cardinality fields, you will likely need to use a higher value. If you specify a number instead of a string, the units are assumed to be MiB. Specifying a string is recommended for clarity. If you specify a byte size unit of b or kb and the number does not equate to a discrete number of megabytes, it is rounded down to the closest MiB. The minimum valid value is 1 MiB. If you specify a value less than 1 MiB, an error occurs. If you specify a value for the xpack.ml.max_model_memory_limit setting, an error occurs when you try to create jobs that have model_memory_limit values greater than that setting value.

        One of:

        The approximate maximum amount of memory resources that are required for analytical processing. Once this limit is approached, data pruning becomes more aggressive. Upon exceeding this limit, new entities are not modeled. If the xpack.ml.max_model_memory_limit setting has a value greater than 0 and less than 1024mb, that value is used instead of the default. The default value is relatively small to ensure that high resource usage is a conscious decision. If you have jobs that are expected to analyze high cardinality fields, you will likely need to use a higher value. If you specify a number instead of a string, the units are assumed to be MiB. Specifying a string is recommended for clarity. If you specify a byte size unit of b or kb and the number does not equate to a discrete number of megabytes, it is rounded down to the closest MiB. The minimum valid value is 1 MiB. If you specify a value less than 1 MiB, an error occurs. If you specify a value for the xpack.ml.max_model_memory_limit setting, an error occurs when you try to create jobs that have model_memory_limit values greater than that setting value.

    • background_persist_interval string

      A duration. Units can be nanos, micros, ms (milliseconds), s (seconds), m (minutes), h (hours) and d (days). Also accepts "0" without a unit and "-1" to indicate an unspecified value.

    • create_time number

      Time unit for milliseconds

    • finished_time number

      Time unit for milliseconds

    • custom_settings object
      Hide custom_settings attribute Show custom_settings attribute object
      • * string Additional properties
    • daily_model_snapshot_retention_after_days number Required
    • data_description object Required
      Hide data_description attributes Show data_description attributes object
      • format string

        Only JSON format is supported at this time.

      • time_field string

        The name of the field that contains the timestamp.

      • time_format string

        The time format, which can be epoch, epoch_ms, or a custom pattern. The value epoch refers to UNIX or Epoch time (the number of seconds since 1 Jan 1970). The value epoch_ms indicates that time is measured in milliseconds since the epoch. The epoch and epoch_ms time formats accept either integer or real values. Custom patterns must conform to the Java DateTimeFormatter class. When you use date-time formatting patterns, it is recommended that you provide the full date, time and time zone. For example: yyyy-MM-dd'T'HH:mm:ssX. If the pattern that you specify is not sufficient to produce a complete timestamp, job creation fails.

        Default value is epoch.

      • field_delimiter string
    • datafeed_config object
      Hide datafeed_config attributes Show datafeed_config attributes object
      • aggregations object
      • authorization object

        The security privileges that the datafeed uses to run its queries. If Elastic Stack security features were disabled at the time of the most recent update to the datafeed, this property is omitted.

        Hide authorization attributes Show authorization attributes object
        • api_key object

          If an API key was used for the most recent update to the datafeed, its name and identifier are listed in the response.

        • roles array[string]

          If a user ID was used for the most recent update to the datafeed, its roles at the time of the update are listed in the response.

        • service_account string

          If a service account was used for the most recent update to the datafeed, the account name is listed in the response.

      • chunking_config object
        Hide chunking_config attributes Show chunking_config attributes object
        • mode string Required

          If the mode is auto, the chunk size is dynamically calculated; this is the recommended value when the datafeed does not use aggregations. If the mode is manual, chunking is applied according to the specified time_span; use this mode when the datafeed uses aggregations. If the mode is off, no chunking is applied.

          Values are auto, manual, or off.

        • time_span string

          The time span that each search will be querying. This setting is applicable only when the mode is set to manual.

      • datafeed_id string Required
      • frequency string

        A duration. Units can be nanos, micros, ms (milliseconds), s (seconds), m (minutes), h (hours) and d (days). Also accepts "0" without a unit and "-1" to indicate an unspecified value.

      • indices array[string] Required
      • indexes array[string]
      • job_id string Required
      • max_empty_searches number
      • query_delay string

        A duration. Units can be nanos, micros, ms (milliseconds), s (seconds), m (minutes), h (hours) and d (days). Also accepts "0" without a unit and "-1" to indicate an unspecified value.

      • script_fields object
        Hide script_fields attribute Show script_fields attribute object
        • * object Additional properties
          Hide * attributes Show * attributes object
          • script object Required
          • ignore_failure boolean
      • scroll_size number
      • delayed_data_check_config object Required
        Hide delayed_data_check_config attributes Show delayed_data_check_config attributes object
        • check_window string

          The window of time that is searched for late data. This window of time ends with the latest finalized bucket. It defaults to null, which causes an appropriate check_window to be calculated when the real-time datafeed runs. In particular, the default check_window span calculation is based on the maximum of 2h or 8 * bucket_span.

        • enabled boolean Required

          Specifies whether the datafeed periodically checks for delayed data.

      • runtime_mappings object
        Hide runtime_mappings attribute Show runtime_mappings attribute object
        • * object Additional properties
          Hide * attributes Show * attributes object
          • fields object

            For type composite

          • fetch_fields array[object]

            For type lookup

          • format string

            A custom format for date type runtime fields.

      • indices_options object

        Controls how to deal with unavailable concrete indices (closed or missing), how wildcard expressions are expanded to actual indices (all, closed or open indices) and how to deal with wildcard expressions that resolve to no indices.

        Hide indices_options attributes Show indices_options attributes object
        • allow_no_indices boolean

          If false, the request returns an error if any wildcard expression, index alias, or _all value targets only missing or closed indices. This behavior applies even if the request targets other open indices. For example, a request targeting foo*,bar* returns an error if an index starts with foo but no index starts with bar.

        • expand_wildcards string | array[string]

          Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such as open,hidden.

          Supported values include:

          • all: Match any data stream or index, including hidden ones.
          • open: Match open, non-hidden indices. Also matches any non-hidden data stream.
          • closed: Match closed, non-hidden indices. Also matches any non-hidden data stream. Data streams cannot be closed.
          • hidden: Match hidden data streams and hidden indices. Must be combined with open, closed, or both.
          • none: Wildcard expressions are not accepted.
        • ignore_unavailable boolean

          If true, missing or closed indices are not included in the response.

          Default value is false.

        • ignore_throttled boolean

          If true, concrete, expanded or aliased indices are ignored when frozen.

          Default value is true.

      • query object Required

        The Elasticsearch query domain-specific language (DSL). This value corresponds to the query object in an Elasticsearch search POST body. All the options that are supported by Elasticsearch can be used, as this object is passed verbatim to Elasticsearch. By default, this property has the following value: {"match_all": {"boost": 1}}.

        Query DSL
    • description string
    • groups array[string]
    • job_id string Required
    • job_type string Required
    • job_version string Required
    • model_plot_config object
      Hide model_plot_config attributes Show model_plot_config attributes object
      • annotations_enabled boolean Generally available; Added in 7.9.0

        If true, enables calculation and storage of the model change annotations for each entity that is being analyzed.

        Default value is true.

      • enabled boolean

        If true, enables calculation and storage of the model bounds for each entity that is being analyzed.

        Default value is false.

      • terms string

        Limits data collection to this comma separated list of partition or by field values. If terms are not specified or it is an empty string, no filtering is applied. Wildcards are not supported. Only the specified terms can be viewed when using the Single Metric Viewer.

    • model_snapshot_id string
    • model_snapshot_retention_days number Required
    • renormalization_window_days number
    • results_index_name string Required
    • results_retention_days number
POST /_ml/anomaly_detectors/{job_id}/_update
POST _ml/anomaly_detectors/low_request_rate/_update
{
  "description":"An updated job",
  "detectors": {
    "detector_index": 0,
    "description": "An updated detector description"
  },
  "groups": ["kibana_sample_data","kibana_sample_web_logs"],
  "model_plot_config": {
    "enabled": true
  },
  "renormalization_window_days": 30,
  "background_persist_interval": "2h",
  "model_snapshot_retention_days": 7,
  "results_retention_days": 60
}
resp = client.ml.update_job(
    job_id="low_request_rate",
    description="An updated job",
    detectors={
        "detector_index": 0,
        "description": "An updated detector description"
    },
    groups=[
        "kibana_sample_data",
        "kibana_sample_web_logs"
    ],
    model_plot_config={
        "enabled": True
    },
    renormalization_window_days=30,
    background_persist_interval="2h",
    model_snapshot_retention_days=7,
    results_retention_days=60,
)
const response = await client.ml.updateJob({
  job_id: "low_request_rate",
  description: "An updated job",
  detectors: {
    detector_index: 0,
    description: "An updated detector description",
  },
  groups: ["kibana_sample_data", "kibana_sample_web_logs"],
  model_plot_config: {
    enabled: true,
  },
  renormalization_window_days: 30,
  background_persist_interval: "2h",
  model_snapshot_retention_days: 7,
  results_retention_days: 60,
});
response = client.ml.update_job(
  job_id: "low_request_rate",
  body: {
    "description": "An updated job",
    "detectors": {
      "detector_index": 0,
      "description": "An updated detector description"
    },
    "groups": [
      "kibana_sample_data",
      "kibana_sample_web_logs"
    ],
    "model_plot_config": {
      "enabled": true
    },
    "renormalization_window_days": 30,
    "background_persist_interval": "2h",
    "model_snapshot_retention_days": 7,
    "results_retention_days": 60
  }
)
$resp = $client->ml()->updateJob([
    "job_id" => "low_request_rate",
    "body" => [
        "description" => "An updated job",
        "detectors" => [
            "detector_index" => 0,
            "description" => "An updated detector description",
        ],
        "groups" => array(
            "kibana_sample_data",
            "kibana_sample_web_logs",
        ),
        "model_plot_config" => [
            "enabled" => true,
        ],
        "renormalization_window_days" => 30,
        "background_persist_interval" => "2h",
        "model_snapshot_retention_days" => 7,
        "results_retention_days" => 60,
    ],
]);
curl -X POST -H "Authorization: ApiKey $ELASTIC_API_KEY" -H "Content-Type: application/json" -d '{"description":"An updated job","detectors":{"detector_index":0,"description":"An updated detector description"},"groups":["kibana_sample_data","kibana_sample_web_logs"],"model_plot_config":{"enabled":true},"renormalization_window_days":30,"background_persist_interval":"2h","model_snapshot_retention_days":7,"results_retention_days":60}' "$ELASTICSEARCH_URL/_ml/anomaly_detectors/low_request_rate/_update"
client.ml().updateJob(u -> u
    .backgroundPersistInterval(b -> b
        .time("2h")
    )
    .description("An updated job")
    .detectors(d -> d
        .detectorIndex(0)
        .description("An updated detector description")
    )
    .groups(List.of("kibana_sample_data","kibana_sample_web_logs"))
    .jobId("low_request_rate")
    .modelPlotConfig(m -> m
        .enabled(true)
    )
    .modelSnapshotRetentionDays(7L)
    .renormalizationWindowDays(30L)
    .resultsRetentionDays(60L)
);
Request example
An example body for a `POST _ml/anomaly_detectors/low_request_rate/_update` request.
{
  "description":"An updated job",
  "detectors": {
    "detector_index": 0,
    "description": "An updated detector description"
  },
  "groups": ["kibana_sample_data","kibana_sample_web_logs"],
  "model_plot_config": {
    "enabled": true
  },
  "renormalization_window_days": 30,
  "background_persist_interval": "2h",
  "model_snapshot_retention_days": 7,
  "results_retention_days": 60
}




Upgrade a snapshot Generally available; Added in 5.4.0

POST /_ml/anomaly_detectors/{job_id}/model_snapshots/{snapshot_id}/_upgrade

Upgrade an anomaly detection model snapshot to the latest major version. Over time, older snapshot formats are deprecated and removed. Anomaly detection jobs support only snapshots that are from the current or previous major version. This API provides a means to upgrade a snapshot to the current major version. This aids in preparing the cluster for an upgrade to the next major version. Only one snapshot per anomaly detection job can be upgraded at a time and the upgraded snapshot cannot be the current snapshot of the anomaly detection job.

Required authorization

  • Cluster privileges: manage_ml

Path parameters

  • job_id string Required

    Identifier for the anomaly detection job.

  • snapshot_id string Required

    A numerical character string that uniquely identifies the model snapshot.

Query parameters

  • wait_for_completion boolean

    When true, the API won’t respond until the upgrade is complete. Otherwise, it responds as soon as the upgrade task is assigned to a node.

  • timeout string

    Controls the time to wait for the request to complete.

    Values are -1 or 0.

Responses

  • 200 application/json
    Hide response attributes Show response attributes object
    • node string Required

      The ID of the node that the upgrade task was started on if it is still running. In serverless this will be the "serverless".

    • completed boolean Required

      When true, this means the task is complete. When false, it is still running.

POST /_ml/anomaly_detectors/{job_id}/model_snapshots/{snapshot_id}/_upgrade
POST _ml/anomaly_detectors/low_request_rate/model_snapshots/1828371/_upgrade?timeout=45m&wait_for_completion=true
resp = client.ml.upgrade_job_snapshot(
    job_id="low_request_rate",
    snapshot_id="1828371",
    timeout="45m",
    wait_for_completion=True,
)
const response = await client.ml.upgradeJobSnapshot({
  job_id: "low_request_rate",
  snapshot_id: 1828371,
  timeout: "45m",
  wait_for_completion: "true",
});
response = client.ml.upgrade_job_snapshot(
  job_id: "low_request_rate",
  snapshot_id: "1828371",
  timeout: "45m",
  wait_for_completion: "true"
)
$resp = $client->ml()->upgradeJobSnapshot([
    "job_id" => "low_request_rate",
    "snapshot_id" => "1828371",
    "timeout" => "45m",
    "wait_for_completion" => "true",
]);
curl -X POST -H "Authorization: ApiKey $ELASTIC_API_KEY" "$ELASTICSEARCH_URL/_ml/anomaly_detectors/low_request_rate/model_snapshots/1828371/_upgrade?timeout=45m&wait_for_completion=true"
client.ml().upgradeJobSnapshot(u -> u
    .jobId("low_request_rate")
    .snapshotId("1828371")
    .timeout(t -> t
        .offset(45)
    )
    .waitForCompletion(true)
);

















Explain data frame analytics config Generally available; Added in 7.3.0

POST /_ml/data_frame/analytics/{id}/_explain

All methods and paths for this operation:

GET /_ml/data_frame/analytics/_explain

POST /_ml/data_frame/analytics/_explain
GET /_ml/data_frame/analytics/{id}/_explain
POST /_ml/data_frame/analytics/{id}/_explain

This API provides explanations for a data frame analytics config that either exists already or one that has not been created yet. The following explanations are provided:

  • which fields are included or not in the analysis and why,
  • how much memory is estimated to be required. The estimate can be used when deciding the appropriate value for model_memory_limit setting later on. If you have object fields or fields that are excluded via source filtering, they are not included in the explanation.

Required authorization

  • Cluster privileges: monitor_ml

Path parameters

  • id string Required

    Identifier for the data frame analytics job. This identifier can contain lowercase alphanumeric characters (a-z and 0-9), hyphens, and underscores. It must start and end with alphanumeric characters.

application/json

Body

  • source object

    The configuration of how to source the analysis data. It requires an index. Optionally, query and _source may be specified.

    Hide source attributes Show source attributes object
    • index string | array[string] Required

      Index or indices on which to perform the analysis. It can be a single index or index pattern as well as an array of indices or patterns. NOTE: If your source indices contain documents with the same IDs, only the document that is indexed last appears in the destination index.

    • runtime_mappings object

      Definitions of runtime fields that will become part of the mapping of the destination index.

      Hide runtime_mappings attribute Show runtime_mappings attribute object
      • * object Additional properties
        Hide * attributes Show * attributes object
        • fields object

          For type composite

          Hide fields attribute Show fields attribute object
          • * object Additional properties
        • fetch_fields array[object]

          For type lookup

        • format string

          A custom format for date type runtime fields.

        • input_field string

          For type lookup

        • target_field string

          For type lookup

        • target_index string

          For type lookup

        • script object

          Painless script executed at query time.

        • type string Required

          Field type, which can be: boolean, composite, date, double, geo_point, ip,keyword, long, or lookup.

          Values are boolean, composite, date, double, geo_point, geo_shape, ip, keyword, long, or lookup.

    • _source object

      Specify includes and/or `excludes patterns to select which fields will be present in the destination. Fields that are excluded cannot be included in the analysis.

      Hide _source attributes Show _source attributes object
      • includes array[string] Required

        An array of strings that defines the fields that will be excluded from the analysis. You do not need to add fields with unsupported data types to excludes, these fields are excluded from the analysis automatically.

      • excludes array[string] Required

        An array of strings that defines the fields that will be included in the analysis.

    • query object

      The Elasticsearch query domain-specific language (DSL). This value corresponds to the query object in an Elasticsearch search POST body. All the options that are supported by Elasticsearch can be used, as this object is passed verbatim to Elasticsearch. By default, this property has the following value: {"match_all": {}}.

      Query DSL
  • dest object

    The destination configuration, consisting of index and optionally results_field (ml by default).

    Hide dest attributes Show dest attributes object
    • index string Required

      Defines the destination index to store the results of the data frame analytics job.

    • results_field string

      Defines the name of the field in which to store the results of the analysis. Defaults to ml.

  • analysis object

    The analysis configuration, which contains the information necessary to perform one of the following types of analysis: classification, outlier detection, or regression.

    Hide analysis attributes Show analysis attributes object
    • classification object

      The configuration information necessary to perform classification.

      Hide classification attributes Show classification attributes object
      • alpha number

        Advanced configuration option. Machine learning uses loss guided tree growing, which means that the decision trees grow where the regularized loss decreases most quickly. This parameter affects loss calculations by acting as a multiplier of the tree depth. Higher alpha values result in shallower trees and faster training times. By default, this value is calculated during hyperparameter optimization. It must be greater than or equal to zero.

      • dependent_variable string Required

        Defines which field of the document is to be predicted. It must match one of the fields in the index being used to train. If this field is missing from a document, then that document will not be used for training, but a prediction with the trained model will be generated for it. It is also known as continuous target variable. For classification analysis, the data type of the field must be numeric (integer, short, long, byte), categorical (ip or keyword), or boolean. There must be no more than 30 different values in this field. For regression analysis, the data type of the field must be numeric.

      • downsample_factor number

        Advanced configuration option. Controls the fraction of data that is used to compute the derivatives of the loss function for tree training. A small value results in the use of a small fraction of the data. If this value is set to be less than 1, accuracy typically improves. However, too small a value may result in poor convergence for the ensemble and so require more trees. By default, this value is calculated during hyperparameter optimization. It must be greater than zero and less than or equal to 1.

      • early_stopping_enabled boolean

        Advanced configuration option. Specifies whether the training process should finish if it is not finding any better performing models. If disabled, the training process can take significantly longer and the chance of finding a better performing model is unremarkable.

        Default value is true.

      • eta number

        Advanced configuration option. The shrinkage applied to the weights. Smaller values result in larger forests which have a better generalization error. However, larger forests cause slower training. By default, this value is calculated during hyperparameter optimization. It must be a value between 0.001 and 1.

      • eta_growth_rate_per_tree number

        Advanced configuration option. Specifies the rate at which eta increases for each new tree that is added to the forest. For example, a rate of 1.05 increases eta by 5% for each extra tree. By default, this value is calculated during hyperparameter optimization. It must be between 0.5 and 2.

      • feature_bag_fraction number

        Advanced configuration option. Defines the fraction of features that will be used when selecting a random bag for each candidate split. By default, this value is calculated during hyperparameter optimization.

      • feature_processors array[object]

        Advanced configuration option. A collection of feature preprocessors that modify one or more included fields. The analysis uses the resulting one or more features instead of the original document field. However, these features are ephemeral; they are not stored in the destination index. Multiple feature_processors entries can refer to the same document fields. Automatic categorical feature encoding still occurs for the fields that are unprocessed by a custom processor or that have categorical values. Use this property only if you want to override the automatic feature encoding of the specified fields.

      • gamma number

        Advanced configuration option. Regularization parameter to prevent overfitting on the training data set. Multiplies a linear penalty associated with the size of individual trees in the forest. A high gamma value causes training to prefer small trees. A small gamma value results in larger individual trees and slower training. By default, this value is calculated during hyperparameter optimization. It must be a nonnegative value.

      • lambda number

        Advanced configuration option. Regularization parameter to prevent overfitting on the training data set. Multiplies an L2 regularization term which applies to leaf weights of the individual trees in the forest. A high lambda value causes training to favor small leaf weights. This behavior makes the prediction function smoother at the expense of potentially not being able to capture relevant relationships between the features and the dependent variable. A small lambda value results in large individual trees and slower training. By default, this value is calculated during hyperparameter optimization. It must be a nonnegative value.

      • max_optimization_rounds_per_hyperparameter number

        Advanced configuration option. A multiplier responsible for determining the maximum number of hyperparameter optimization steps in the Bayesian optimization procedure. The maximum number of steps is determined based on the number of undefined hyperparameters times the maximum optimization rounds per hyperparameter. By default, this value is calculated during hyperparameter optimization.

      • max_trees number

        Advanced configuration option. Defines the maximum number of decision trees in the forest. The maximum value is 2000. By default, this value is calculated during hyperparameter optimization.

      • num_top_feature_importance_values number

        Advanced configuration option. Specifies the maximum number of feature importance values per document to return. By default, no feature importance calculation occurs.

        Default value is 0.

      • prediction_field_name string

        Defines the name of the prediction field in the results. Defaults to <dependent_variable>_prediction.

      • randomize_seed number

        Defines the seed for the random generator that is used to pick training data. By default, it is randomly generated. Set it to a specific value to use the same training data each time you start a job (assuming other related parameters such as source and analyzed_fields are the same).

      • soft_tree_depth_limit number

        Advanced configuration option. Machine learning uses loss guided tree growing, which means that the decision trees grow where the regularized loss decreases most quickly. This soft limit combines with the soft_tree_depth_tolerance to penalize trees that exceed the specified depth; the regularized loss increases quickly beyond this depth. By default, this value is calculated during hyperparameter optimization. It must be greater than or equal to 0.

      • soft_tree_depth_tolerance number

        Advanced configuration option. This option controls how quickly the regularized loss increases when the tree depth exceeds soft_tree_depth_limit. By default, this value is calculated during hyperparameter optimization. It must be greater than or equal to 0.01.

      • training_percent
      • class_assignment_objective string
      • num_top_classes number

        Defines the number of categories for which the predicted probabilities are reported. It must be non-negative or -1. If it is -1 or greater than the total number of categories, probabilities are reported for all categories; if you have a large number of categories, there could be a significant effect on the size of your destination index. NOTE: To use the AUC ROC evaluation method, num_top_classes must be set to -1 or a value greater than or equal to the total number of categories.

        Default value is 2.

    • outlier_detection object

      The configuration information necessary to perform outlier detection. NOTE: Advanced parameters are for fine-tuning classification analysis. They are set automatically by hyperparameter optimization to give the minimum validation error. It is highly recommended to use the default values unless you fully understand the function of these parameters.

      Hide outlier_detection attributes Show outlier_detection attributes object
      • compute_feature_influence boolean

        Specifies whether the feature influence calculation is enabled.

        Default value is true.

      • feature_influence_threshold number

        The minimum outlier score that a document needs to have in order to calculate its feature influence score. Value range: 0-1.

        Default value is 0.1.

      • method string

        The method that outlier detection uses. Available methods are lof, ldof, distance_kth_nn, distance_knn, and ensemble. The default value is ensemble, which means that outlier detection uses an ensemble of different methods and normalises and combines their individual outlier scores to obtain the overall outlier score.

        Default value is ensemble.

      • n_neighbors number

        Defines the value for how many nearest neighbors each method of outlier detection uses to calculate its outlier score. When the value is not set, different values are used for different ensemble members. This default behavior helps improve the diversity in the ensemble; only override it if you are confident that the value you choose is appropriate for the data set.

      • outlier_fraction number

        The proportion of the data set that is assumed to be outlying prior to outlier detection. For example, 0.05 means it is assumed that 5% of values are real outliers and 95% are inliers.

      • standardization_enabled boolean

        If true, the following operation is performed on the columns before computing outlier scores: (x_i - mean(x_i)) / sd(x_i).

        Default value is true.

    • regression object

      The configuration information necessary to perform regression. NOTE: Advanced parameters are for fine-tuning regression analysis. They are set automatically by hyperparameter optimization to give the minimum validation error. It is highly recommended to use the default values unless you fully understand the function of these parameters.

      Hide regression attributes Show regression attributes object
      • alpha number

        Advanced configuration option. Machine learning uses loss guided tree growing, which means that the decision trees grow where the regularized loss decreases most quickly. This parameter affects loss calculations by acting as a multiplier of the tree depth. Higher alpha values result in shallower trees and faster training times. By default, this value is calculated during hyperparameter optimization. It must be greater than or equal to zero.

      • dependent_variable string Required

        Defines which field of the document is to be predicted. It must match one of the fields in the index being used to train. If this field is missing from a document, then that document will not be used for training, but a prediction with the trained model will be generated for it. It is also known as continuous target variable. For classification analysis, the data type of the field must be numeric (integer, short, long, byte), categorical (ip or keyword), or boolean. There must be no more than 30 different values in this field. For regression analysis, the data type of the field must be numeric.

      • downsample_factor number

        Advanced configuration option. Controls the fraction of data that is used to compute the derivatives of the loss function for tree training. A small value results in the use of a small fraction of the data. If this value is set to be less than 1, accuracy typically improves. However, too small a value may result in poor convergence for the ensemble and so require more trees. By default, this value is calculated during hyperparameter optimization. It must be greater than zero and less than or equal to 1.

      • early_stopping_enabled boolean

        Advanced configuration option. Specifies whether the training process should finish if it is not finding any better performing models. If disabled, the training process can take significantly longer and the chance of finding a better performing model is unremarkable.

        Default value is true.

      • eta number

        Advanced configuration option. The shrinkage applied to the weights. Smaller values result in larger forests which have a better generalization error. However, larger forests cause slower training. By default, this value is calculated during hyperparameter optimization. It must be a value between 0.001 and 1.

      • eta_growth_rate_per_tree number

        Advanced configuration option. Specifies the rate at which eta increases for each new tree that is added to the forest. For example, a rate of 1.05 increases eta by 5% for each extra tree. By default, this value is calculated during hyperparameter optimization. It must be between 0.5 and 2.

      • feature_bag_fraction number

        Advanced configuration option. Defines the fraction of features that will be used when selecting a random bag for each candidate split. By default, this value is calculated during hyperparameter optimization.

      • feature_processors array[object]

        Advanced configuration option. A collection of feature preprocessors that modify one or more included fields. The analysis uses the resulting one or more features instead of the original document field. However, these features are ephemeral; they are not stored in the destination index. Multiple feature_processors entries can refer to the same document fields. Automatic categorical feature encoding still occurs for the fields that are unprocessed by a custom processor or that have categorical values. Use this property only if you want to override the automatic feature encoding of the specified fields.

      • gamma number

        Advanced configuration option. Regularization parameter to prevent overfitting on the training data set. Multiplies a linear penalty associated with the size of individual trees in the forest. A high gamma value causes training to prefer small trees. A small gamma value results in larger individual trees and slower training. By default, this value is calculated during hyperparameter optimization. It must be a nonnegative value.

      • lambda number

        Advanced configuration option. Regularization parameter to prevent overfitting on the training data set. Multiplies an L2 regularization term which applies to leaf weights of the individual trees in the forest. A high lambda value causes training to favor small leaf weights. This behavior makes the prediction function smoother at the expense of potentially not being able to capture relevant relationships between the features and the dependent variable. A small lambda value results in large individual trees and slower training. By default, this value is calculated during hyperparameter optimization. It must be a nonnegative value.

      • max_optimization_rounds_per_hyperparameter number

        Advanced configuration option. A multiplier responsible for determining the maximum number of hyperparameter optimization steps in the Bayesian optimization procedure. The maximum number of steps is determined based on the number of undefined hyperparameters times the maximum optimization rounds per hyperparameter. By default, this value is calculated during hyperparameter optimization.

      • max_trees number

        Advanced configuration option. Defines the maximum number of decision trees in the forest. The maximum value is 2000. By default, this value is calculated during hyperparameter optimization.

      • num_top_feature_importance_values number

        Advanced configuration option. Specifies the maximum number of feature importance values per document to return. By default, no feature importance calculation occurs.

        Default value is 0.

      • prediction_field_name string

        Defines the name of the prediction field in the results. Defaults to <dependent_variable>_prediction.

      • randomize_seed number

        Defines the seed for the random generator that is used to pick training data. By default, it is randomly generated. Set it to a specific value to use the same training data each time you start a job (assuming other related parameters such as source and analyzed_fields are the same).

      • soft_tree_depth_limit number

        Advanced configuration option. Machine learning uses loss guided tree growing, which means that the decision trees grow where the regularized loss decreases most quickly. This soft limit combines with the soft_tree_depth_tolerance to penalize trees that exceed the specified depth; the regularized loss increases quickly beyond this depth. By default, this value is calculated during hyperparameter optimization. It must be greater than or equal to 0.

      • soft_tree_depth_tolerance number

        Advanced configuration option. This option controls how quickly the regularized loss increases when the tree depth exceeds soft_tree_depth_limit. By default, this value is calculated during hyperparameter optimization. It must be greater than or equal to 0.01.

      • training_percent
      • loss_function string

        The loss function used during regression. Available options are mse (mean squared error), msle (mean squared logarithmic error), huber (Pseudo-Huber loss).

        Default value is mse.

      • loss_function_parameter number

        A positive number that is used as a parameter to the loss_function.

  • description string

    A description of the job.

  • model_memory_limit string

    The approximate maximum amount of memory resources that are permitted for analytical processing. If your elasticsearch.yml file contains an xpack.ml.max_model_memory_limit setting, an error occurs when you try to create data frame analytics jobs that have model_memory_limit values greater than that setting.

    Default value is 1gb.

  • max_num_threads number

    The maximum number of threads to be used by the analysis. Using more threads may decrease the time necessary to complete the analysis at the cost of using more CPU. Note that the process may use additional threads for operational functionality other than the analysis itself.

    Default value is 1.

  • analyzed_fields object

    Specify includes and/or excludes patterns to select which fields will be included in the analysis. The patterns specified in excludes are applied last, therefore excludes takes precedence. In other words, if the same field is specified in both includes and excludes, then the field will not be included in the analysis.

    Hide analyzed_fields attributes Show analyzed_fields attributes object
    • includes array[string] Required

      An array of strings that defines the fields that will be excluded from the analysis. You do not need to add fields with unsupported data types to excludes, these fields are excluded from the analysis automatically.

    • excludes array[string] Required

      An array of strings that defines the fields that will be included in the analysis.

  • allow_lazy_start boolean

    Specifies whether this job can start when there is insufficient machine learning node capacity for it to be immediately assigned to a node.

    Default value is false.

Responses

  • 200 application/json
    Hide response attributes Show response attributes object
    • field_selection array[object] Required

      An array of objects that explain selection for each field, sorted by the field names.

      Hide field_selection attributes Show field_selection attributes object
      • is_included boolean Required

        Whether the field is selected to be included in the analysis.

      • is_required boolean Required

        Whether the field is required.

      • feature_type string

        The feature type of this field for the analysis. May be categorical or numerical.

      • mapping_types array[string] Required

        The mapping types of the field.

      • name string Required

        The field name.

      • reason string

        The reason a field is not selected to be included in the analysis.

    • memory_estimation object Required

      An array of objects that explain selection for each field, sorted by the field names.

      Hide memory_estimation attributes Show memory_estimation attributes object
      • expected_memory_with_disk string Required

        Estimated memory usage under the assumption that overflowing to disk is allowed during data frame analytics. expected_memory_with_disk is usually smaller than expected_memory_without_disk as using disk allows to limit the main memory needed to perform data frame analytics.

      • expected_memory_without_disk string Required

        Estimated memory usage under the assumption that the whole data frame analytics should happen in memory (i.e. without overflowing to disk).

POST /_ml/data_frame/analytics/{id}/_explain
POST _ml/data_frame/analytics/_explain
{
  "source": {
    "index": "houses_sold_last_10_yrs"
  },
  "analysis": {
    "regression": {
      "dependent_variable": "price"
    }
  }
}
resp = client.ml.explain_data_frame_analytics(
    source={
        "index": "houses_sold_last_10_yrs"
    },
    analysis={
        "regression": {
            "dependent_variable": "price"
        }
    },
)
const response = await client.ml.explainDataFrameAnalytics({
  source: {
    index: "houses_sold_last_10_yrs",
  },
  analysis: {
    regression: {
      dependent_variable: "price",
    },
  },
});
response = client.ml.explain_data_frame_analytics(
  body: {
    "source": {
      "index": "houses_sold_last_10_yrs"
    },
    "analysis": {
      "regression": {
        "dependent_variable": "price"
      }
    }
  }
)
$resp = $client->ml()->explainDataFrameAnalytics([
    "body" => [
        "source" => [
            "index" => "houses_sold_last_10_yrs",
        ],
        "analysis" => [
            "regression" => [
                "dependent_variable" => "price",
            ],
        ],
    ],
]);
curl -X POST -H "Authorization: ApiKey $ELASTIC_API_KEY" -H "Content-Type: application/json" -d '{"source":{"index":"houses_sold_last_10_yrs"},"analysis":{"regression":{"dependent_variable":"price"}}}' "$ELASTICSEARCH_URL/_ml/data_frame/analytics/_explain"
client.ml().explainDataFrameAnalytics(e -> e
    .analysis(a -> a
        .regression(r -> r
            .dependentVariable("price")
        )
    )
    .source(s -> s
        .index("houses_sold_last_10_yrs")
    )
);
Request example
Run `POST _ml/data_frame/analytics/_explain` to explain a data frame analytics job configuration.
{
  "source": {
    "index": "houses_sold_last_10_yrs"
  },
  "analysis": {
    "regression": {
      "dependent_variable": "price"
    }
  }
}
Response examples (200)
A succesful response for explaining a data frame analytics job configuration.
{
  "field_selection": [
    {
      "field": "number_of_bedrooms",
      "mappings_types": [
        "integer"
      ],
      "is_included": true,
      "is_required": false,
      "feature_type": "numerical"
    },
    {
      "field": "postcode",
      "mappings_types": [
        "text"
      ],
      "is_included": false,
      "is_required": false,
      "reason": "[postcode.keyword] is preferred because it is aggregatable"
    },
    {
      "field": "postcode.keyword",
      "mappings_types": [
        "keyword"
      ],
      "is_included": true,
      "is_required": false,
      "feature_type": "categorical"
    },
    {
      "field": "price",
      "mappings_types": [
        "float"
      ],
      "is_included": true,
      "is_required": true,
      "feature_type": "numerical"
    }
  ],
  "memory_estimation": {
    "expected_memory_without_disk": "128MB",
    "expected_memory_with_disk": "32MB"
  }
}

































Delete an unreferenced trained model Generally available; Added in 7.10.0

DELETE /_ml/trained_models/{model_id}

The request deletes a trained inference model that is not referenced by an ingest pipeline.

Required authorization

  • Cluster privileges: manage_ml

Path parameters

  • model_id string Required

    The unique identifier of the trained model.

Query parameters

  • force boolean

    Forcefully deletes a trained model that is referenced by ingest pipelines or has a started deployment.

  • timeout string

    Period to wait for a response. If no response is received before the timeout expires, the request fails and returns an error.

    Values are -1 or 0.

Responses

  • 200 application/json
    Hide response attribute Show response attribute object
    • acknowledged boolean Required

      For a successful response, this value is always true. On failure, an exception is returned instead.

DELETE /_ml/trained_models/{model_id}
DELETE _ml/trained_models/regression-job-one-1574775307356
resp = client.ml.delete_trained_model(
    model_id="regression-job-one-1574775307356",
)
const response = await client.ml.deleteTrainedModel({
  model_id: "regression-job-one-1574775307356",
});
response = client.ml.delete_trained_model(
  model_id: "regression-job-one-1574775307356"
)
$resp = $client->ml()->deleteTrainedModel([
    "model_id" => "regression-job-one-1574775307356",
]);
curl -X DELETE -H "Authorization: ApiKey $ELASTIC_API_KEY" "$ELASTICSEARCH_URL/_ml/trained_models/regression-job-one-1574775307356"
client.ml().deleteTrainedModel(d -> d
    .modelId("regression-job-one-1574775307356")
);
Response examples (200)
A successful response when deleting an existing trained inference model.
{
  "acknowledged": true
}
















Create part of a trained model definition Generally available; Added in 8.0.0

PUT /_ml/trained_models/{model_id}/definition/{part}

Required authorization

  • Cluster privileges: manage_ml

Path parameters

  • model_id string Required

    The unique identifier of the trained model.

  • part number Required

    The definition part number. When the definition is loaded for inference the definition parts are streamed in the order of their part number. The first part must be 0 and the final part must be total_parts - 1.

application/json

Body Required

  • definition string Required

    The definition part for the model. Must be a base64 encoded string.

  • total_definition_length number Required

    The total uncompressed definition length in bytes. Not base64 encoded.

  • total_parts number Required

    The total number of parts that will be uploaded. Must be greater than 0.

Responses

  • 200 application/json
    Hide response attribute Show response attribute object
    • acknowledged boolean Required

      For a successful response, this value is always true. On failure, an exception is returned instead.

PUT /_ml/trained_models/{model_id}/definition/{part}
PUT _ml/trained_models/elastic__distilbert-base-uncased-finetuned-conll03-english/definition/0
{
    "definition": "...",
    "total_definition_length": 265632637,
    "total_parts": 64
}
resp = client.ml.put_trained_model_definition_part(
    model_id="elastic__distilbert-base-uncased-finetuned-conll03-english",
    part="0",
    definition="...",
    total_definition_length=265632637,
    total_parts=64,
)
const response = await client.ml.putTrainedModelDefinitionPart({
  model_id: "elastic__distilbert-base-uncased-finetuned-conll03-english",
  part: 0,
  definition: "...",
  total_definition_length: 265632637,
  total_parts: 64,
});
response = client.ml.put_trained_model_definition_part(
  model_id: "elastic__distilbert-base-uncased-finetuned-conll03-english",
  part: "0",
  body: {
    "definition": "...",
    "total_definition_length": 265632637,
    "total_parts": 64
  }
)
$resp = $client->ml()->putTrainedModelDefinitionPart([
    "model_id" => "elastic__distilbert-base-uncased-finetuned-conll03-english",
    "part" => "0",
    "body" => [
        "definition" => "...",
        "total_definition_length" => 265632637,
        "total_parts" => 64,
    ],
]);
curl -X PUT -H "Authorization: ApiKey $ELASTIC_API_KEY" -H "Content-Type: application/json" -d '{"definition":"...","total_definition_length":265632637,"total_parts":64}' "$ELASTICSEARCH_URL/_ml/trained_models/elastic__distilbert-base-uncased-finetuned-conll03-english/definition/0"
Request example
An example body for a `PUT _ml/trained_models/elastic__distilbert-base-uncased-finetuned-conll03-english/definition/0` request.
{
    "definition": "...",
    "total_definition_length": 265632637,
    "total_parts": 64
}