Check in a connector Technical preview

PUT /_connector/{connector_id}/_check_in

Update the last_seen field in the connector and set it to the current timestamp.

Path parameters

  • connector_id string Required

    The unique identifier of the connector to be checked in

Responses

  • 200 application/json
    Hide response attribute Show response attribute object
    • result string Required

      Values are created, updated, deleted, not_found, or noop.

PUT /_connector/{connector_id}/_check_in
PUT _connector/my-connector/_check_in
resp = client.connector.check_in(
    connector_id="my-connector",
)
const response = await client.connector.checkIn({
  connector_id: "my-connector",
});
response = client.connector.check_in(
  connector_id: "my-connector"
)
$resp = $client->connector()->checkIn([
    "connector_id" => "my-connector",
]);
curl -X PUT -H "Authorization: ApiKey $ELASTIC_API_KEY" "$ELASTICSEARCH_URL/_connector/my-connector/_check_in"
client.connector().checkIn(c -> c
    .connectorId("my-connector")
);
Response examples (200)
{
    "result": "updated"
}




Create or update a connector Beta

PUT /_connector/{connector_id}

All methods and paths for this operation:

PUT /_connector

PUT /_connector/{connector_id}

Path parameters

  • connector_id string Required

    The unique identifier of the connector to be created or updated. ID is auto-generated if not provided.

application/json

Body

  • description string
  • index_name string
  • is_native boolean
  • language string
  • name string
  • service_type string

Responses

  • 200 application/json
    Hide response attributes Show response attributes object
    • result string Required

      Values are created, updated, deleted, not_found, or noop.

    • id string Required
PUT _connector/my-connector
{
  "index_name": "search-google-drive",
  "name": "My Connector",
  "service_type": "google_drive"
}
resp = client.connector.put(
    connector_id="my-connector",
    index_name="search-google-drive",
    name="My Connector",
    service_type="google_drive",
)
const response = await client.connector.put({
  connector_id: "my-connector",
  index_name: "search-google-drive",
  name: "My Connector",
  service_type: "google_drive",
});
response = client.connector.put(
  connector_id: "my-connector",
  body: {
    "index_name": "search-google-drive",
    "name": "My Connector",
    "service_type": "google_drive"
  }
)
$resp = $client->connector()->put([
    "connector_id" => "my-connector",
    "body" => [
        "index_name" => "search-google-drive",
        "name" => "My Connector",
        "service_type" => "google_drive",
    ],
]);
curl -X PUT -H "Authorization: ApiKey $ELASTIC_API_KEY" -H "Content-Type: application/json" -d '{"index_name":"search-google-drive","name":"My Connector","service_type":"google_drive"}' "$ELASTICSEARCH_URL/_connector/my-connector"
client.connector().put(p -> p
    .connectorId("my-connector")
    .indexName("search-google-drive")
    .name("My Connector")
    .serviceType("google_drive")
);
Request examples
{
  "index_name": "search-google-drive",
  "name": "My Connector",
  "service_type": "google_drive"
}
{
  "index_name": "search-google-drive",
  "name": "My Connector",
  "description": "My Connector to sync data to Elastic index from Google Drive",
  "service_type": "google_drive",
  "language": "english"
}
Response examples (200)
{
  "result": "created",
  "id": "my-connector"
}




































Update the connector API key ID Beta

PUT /_connector/{connector_id}/_api_key_id

Update the api_key_id and api_key_secret_id fields of a connector. You can specify the ID of the API key used for authorization and the ID of the connector secret where the API key is stored. The connector secret ID is required only for Elastic managed (native) connectors. Self-managed connectors (connector clients) do not use this field.

Path parameters

  • connector_id string Required

    The unique identifier of the connector to be updated

application/json

Body Required

  • api_key_id string
  • api_key_secret_id string

Responses

  • 200 application/json
    Hide response attribute Show response attribute object
    • result string Required

      Values are created, updated, deleted, not_found, or noop.

PUT /_connector/{connector_id}/_api_key_id
PUT _connector/my-connector/_api_key_id
{
    "api_key_id": "my-api-key-id",
    "api_key_secret_id": "my-connector-secret-id"
}
resp = client.connector.update_api_key_id(
    connector_id="my-connector",
    api_key_id="my-api-key-id",
    api_key_secret_id="my-connector-secret-id",
)
const response = await client.connector.updateApiKeyId({
  connector_id: "my-connector",
  api_key_id: "my-api-key-id",
  api_key_secret_id: "my-connector-secret-id",
});
response = client.connector.update_api_key_id(
  connector_id: "my-connector",
  body: {
    "api_key_id": "my-api-key-id",
    "api_key_secret_id": "my-connector-secret-id"
  }
)
$resp = $client->connector()->updateApiKeyId([
    "connector_id" => "my-connector",
    "body" => [
        "api_key_id" => "my-api-key-id",
        "api_key_secret_id" => "my-connector-secret-id",
    ],
]);
curl -X PUT -H "Authorization: ApiKey $ELASTIC_API_KEY" -H "Content-Type: application/json" -d '{"api_key_id":"my-api-key-id","api_key_secret_id":"my-connector-secret-id"}' "$ELASTICSEARCH_URL/_connector/my-connector/_api_key_id"
client.connector().updateApiKeyId(u -> u
    .apiKeyId("my-api-key-id")
    .apiKeySecretId("my-connector-secret-id")
    .connectorId("my-connector")
);
Request example
{
    "api_key_id": "my-api-key-id",
    "api_key_secret_id": "my-connector-secret-id"
}
Response examples (200)
{
  "result": "updated"
}

Update the connector configuration Beta

PUT /_connector/{connector_id}/_configuration

Update the configuration field in the connector document.

Path parameters

  • connector_id string Required

    The unique identifier of the connector to be updated

application/json

Body Required

Responses

  • 200 application/json
    Hide response attribute Show response attribute object
    • result string Required

      Values are created, updated, deleted, not_found, or noop.

PUT /_connector/{connector_id}/_configuration
PUT _connector/my-spo-connector/_configuration
{
    "values": {
        "tenant_id": "my-tenant-id",
        "tenant_name": "my-sharepoint-site",
        "client_id": "foo",
        "secret_value": "bar",
        "site_collections": "*"
    }
}
resp = client.connector.update_configuration(
    connector_id="my-spo-connector",
    values={
        "tenant_id": "my-tenant-id",
        "tenant_name": "my-sharepoint-site",
        "client_id": "foo",
        "secret_value": "bar",
        "site_collections": "*"
    },
)
const response = await client.connector.updateConfiguration({
  connector_id: "my-spo-connector",
  values: {
    tenant_id: "my-tenant-id",
    tenant_name: "my-sharepoint-site",
    client_id: "foo",
    secret_value: "bar",
    site_collections: "*",
  },
});
response = client.connector.update_configuration(
  connector_id: "my-spo-connector",
  body: {
    "values": {
      "tenant_id": "my-tenant-id",
      "tenant_name": "my-sharepoint-site",
      "client_id": "foo",
      "secret_value": "bar",
      "site_collections": "*"
    }
  }
)
$resp = $client->connector()->updateConfiguration([
    "connector_id" => "my-spo-connector",
    "body" => [
        "values" => [
            "tenant_id" => "my-tenant-id",
            "tenant_name" => "my-sharepoint-site",
            "client_id" => "foo",
            "secret_value" => "bar",
            "site_collections" => "*",
        ],
    ],
]);
curl -X PUT -H "Authorization: ApiKey $ELASTIC_API_KEY" -H "Content-Type: application/json" -d '{"values":{"tenant_id":"my-tenant-id","tenant_name":"my-sharepoint-site","client_id":"foo","secret_value":"bar","site_collections":"*"}}' "$ELASTICSEARCH_URL/_connector/my-spo-connector/_configuration"
client.connector().updateConfiguration(u -> u
    .connectorId("my-spo-connector")
    .values(Map.of("tenant_id", JsonData.fromJson("\"my-tenant-id\""),"tenant_name", JsonData.fromJson("\"my-sharepoint-site\""),"secret_value", JsonData.fromJson("\"bar\""),"client_id", JsonData.fromJson("\"foo\""),"site_collections", JsonData.fromJson("\"*\"")))
);
{
    "values": {
        "tenant_id": "my-tenant-id",
        "tenant_name": "my-sharepoint-site",
        "client_id": "foo",
        "secret_value": "bar",
        "site_collections": "*"
    }
}
{
    "values": {
        "secret_value": "foo-bar"
    }
}
Response examples (200)
{
  "result": "updated"
}

Update the connector error field Technical preview

PUT /_connector/{connector_id}/_error

Set the error field for the connector. If the error provided in the request body is non-null, the connector’s status is updated to error. Otherwise, if the error is reset to null, the connector status is updated to connected.

Path parameters

  • connector_id string Required

    The unique identifier of the connector to be updated

application/json

Body Required

  • error string | null Required

    One of:

Responses

  • 200 application/json
    Hide response attribute Show response attribute object
    • result string Required

      Values are created, updated, deleted, not_found, or noop.

PUT /_connector/{connector_id}/_error
PUT _connector/my-connector/_error
{
    "error": "Houston, we have a problem!"
}
resp = client.connector.update_error(
    connector_id="my-connector",
    error="Houston, we have a problem!",
)
const response = await client.connector.updateError({
  connector_id: "my-connector",
  error: "Houston, we have a problem!",
});
response = client.connector.update_error(
  connector_id: "my-connector",
  body: {
    "error": "Houston, we have a problem!"
  }
)
$resp = $client->connector()->updateError([
    "connector_id" => "my-connector",
    "body" => [
        "error" => "Houston, we have a problem!",
    ],
]);
curl -X PUT -H "Authorization: ApiKey $ELASTIC_API_KEY" -H "Content-Type: application/json" -d '{"error":"Houston, we have a problem!"}' "$ELASTICSEARCH_URL/_connector/my-connector/_error"
client.connector().updateError(u -> u
    .connectorId("my-connector")
    .error("Houston, we have a problem!")
);
Request example
{
    "error": "Houston, we have a problem!"
}
Response examples (200)
{
  "result": "updated"
}





















































































Convert an index alias to a data stream Generally available

POST /_data_stream/_migrate/{name}

Converts an index alias to a data stream. You must have a matching index template that is data stream enabled. The alias must meet the following criteria: The alias must have a write index; All indices for the alias must have a @timestamp field mapping of a date or date_nanos field type; The alias must not have any filters; The alias must not use custom routing. If successful, the request removes the alias and creates a data stream with the same name. The indices for the alias become hidden backing indices for the stream. The write index for the alias becomes the write index for the stream.

Required authorization

  • Index privileges: manage

Path parameters

  • name string Required

    Name of the index alias to convert to a data stream.

Query parameters

  • master_timeout string

    Period to wait for a connection to the master node. If no response is received before the timeout expires, the request fails and returns an error.

    Values are -1 or 0.

  • timeout string

    Period to wait for a response. If no response is received before the timeout expires, the request fails and returns an error.

    Values are -1 or 0.

Responses

  • 200 application/json
    Hide response attribute Show response attribute object
    • acknowledged boolean Required

      For a successful response, this value is always true. On failure, an exception is returned instead.

POST /_data_stream/_migrate/{name}
POST _data_stream/_migrate/my-time-series-data
resp = client.indices.migrate_to_data_stream(
    name="my-time-series-data",
)
const response = await client.indices.migrateToDataStream({
  name: "my-time-series-data",
});
response = client.indices.migrate_to_data_stream(
  name: "my-time-series-data"
)
$resp = $client->indices()->migrateToDataStream([
    "name" => "my-time-series-data",
]);
curl -X POST -H "Authorization: ApiKey $ELASTIC_API_KEY" "$ELASTICSEARCH_URL/_data_stream/_migrate/my-time-series-data"
client.indices().migrateToDataStream(m -> m
    .name("my-time-series-data")
);





Bulk index or delete documents Generally available

PUT /{index}/_bulk

All methods and paths for this operation:

POST /_bulk

PUT /_bulk
POST /{index}/_bulk
PUT /{index}/_bulk

Perform multiple index, create, delete, and update actions in a single request. This reduces overhead and can greatly increase indexing speed.

If the Elasticsearch security features are enabled, you must have the following index privileges for the target data stream, index, or index alias:

  • To use the create action, you must have the create_doc, create, index, or write index privilege. Data streams support only the create action.
  • To use the index action, you must have the create, index, or write index privilege.
  • To use the delete action, you must have the delete or write index privilege.
  • To use the update action, you must have the index or write index privilege.
  • To automatically create a data stream or index with a bulk API request, you must have the auto_configure, create_index, or manage index privilege.
  • To make the result of a bulk operation visible to search using the refresh parameter, you must have the maintenance or manage index privilege.

Automatic data stream creation requires a matching index template with data stream enabled.

The actions are specified in the request body using a newline delimited JSON (NDJSON) structure:

action_and_meta_data\n
optional_source\n
action_and_meta_data\n
optional_source\n
....
action_and_meta_data\n
optional_source\n

The index and create actions expect a source on the next line and have the same semantics as the op_type parameter in the standard index API. A create action fails if a document with the same ID already exists in the target An index action adds or replaces a document as necessary.

NOTE: Data streams support only the create action. To update or delete a document in a data stream, you must target the backing index containing the document.

An update action expects that the partial doc, upsert, and script and its options are specified on the next line.

A delete action does not expect a source on the next line and has the same semantics as the standard delete API.

NOTE: The final line of data must end with a newline character (\n). Each newline character may be preceded by a carriage return (\r). When sending NDJSON data to the _bulk endpoint, use a Content-Type header of application/json or application/x-ndjson. Because this format uses literal newline characters (\n) as delimiters, make sure that the JSON actions and sources are not pretty printed.

If you provide a target in the request path, it is used for any actions that don't explicitly specify an _index argument.

A note on the format: the idea here is to make processing as fast as possible. As some of the actions are redirected to other shards on other nodes, only action_meta_data is parsed on the receiving node side.

Client libraries using this protocol should try and strive to do something similar on the client side, and reduce buffering as much as possible.

There is no "correct" number of actions to perform in a single bulk request. Experiment with different settings to find the optimal size for your particular workload. Note that Elasticsearch limits the maximum size of a HTTP request to 100mb by default so clients must ensure that no request exceeds this size. It is not possible to index a single document that exceeds the size limit, so you must pre-process any such documents into smaller pieces before sending them to Elasticsearch. For instance, split documents into pages or chapters before indexing them, or store raw binary data in a system outside Elasticsearch and replace the raw data with a link to the external system in the documents that you send to Elasticsearch.

Client suppport for bulk requests

Some of the officially supported clients provide helpers to assist with bulk requests and reindexing:

  • Go: Check out esutil.BulkIndexer
  • Perl: Check out Search::Elasticsearch::Client::5_0::Bulk and Search::Elasticsearch::Client::5_0::Scroll
  • Python: Check out elasticsearch.helpers.*
  • JavaScript: Check out client.helpers.*
  • .NET: Check out BulkAllObservable
  • PHP: Check out bulk indexing.
  • Ruby: Check out Elasticsearch::Helpers::BulkHelper

Submitting bulk requests with cURL

If you're providing text file input to curl, you must use the --data-binary flag instead of plain -d. The latter doesn't preserve newlines. For example:

$ cat requests
{ "index" : { "_index" : "test", "_id" : "1" } }
{ "field1" : "value1" }
$ curl -s -H "Content-Type: application/x-ndjson" -XPOST localhost:9200/_bulk --data-binary "@requests"; echo
{"took":7, "errors": false, "items":[{"index":{"_index":"test","_id":"1","_version":1,"result":"created","forced_refresh":false}}]}

Optimistic concurrency control

Each index and delete action within a bulk API call may include the if_seq_no and if_primary_term parameters in their respective action and meta data lines. The if_seq_no and if_primary_term parameters control how operations are run, based on the last modification to existing documents. See Optimistic concurrency control for more details.

Versioning

Each bulk item can include the version value using the version field. It automatically follows the behavior of the index or delete operation based on the _version mapping. It also support the version_type.

Routing

Each bulk item can include the routing value using the routing field. It automatically follows the behavior of the index or delete operation based on the _routing mapping.

NOTE: Data streams do not support custom routing unless they were created with the allow_custom_routing setting enabled in the template.

Wait for active shards

When making bulk calls, you can set the wait_for_active_shards parameter to require a minimum number of shard copies to be active before starting to process the bulk request.

Refresh

Control when the changes made by this request are visible to search.

NOTE: Only the shards that receive the bulk request will be affected by refresh. Imagine a _bulk?refresh=wait_for request with three documents in it that happen to be routed to different shards in an index with five shards. The request will only wait for those three shards to refresh. The other two shards that make up the index do not participate in the _bulk request at all.

You might want to disable the refresh interval temporarily to improve indexing throughput for large bulk requests. Refer to the linked documentation for step-by-step instructions using the index settings API.

External documentation

Path parameters

  • index string Required

    The name of the data stream, index, or index alias to perform bulk actions on.

Query parameters

  • include_source_on_error boolean

    True or false if to include the document source in the error message in case of parsing errors.

  • list_executed_pipelines boolean

    If true, the response will include the ingest pipelines that were run for each index or create.

  • pipeline string

    The pipeline identifier to use to preprocess incoming documents. If the index has a default ingest pipeline specified, setting the value to _none turns off the default ingest pipeline for this request. If a final pipeline is configured, it will always run regardless of the value of this parameter.

  • refresh string

    If true, Elasticsearch refreshes the affected shards to make this operation visible to search. If wait_for, wait for a refresh to make this operation visible to search. If false, do nothing with refreshes. Valid values: true, false, wait_for.

    Values are true, false, or wait_for.

  • routing string

    A custom value that is used to route operations to a specific shard.

  • _source boolean | string | array[string]

    Indicates whether to return the _source field (true or false) or contains a list of fields to return.

  • _source_excludes string | array[string]

    A comma-separated list of source fields to exclude from the response. You can also use this parameter to exclude fields from the subset specified in _source_includes query parameter. If the _source parameter is false, this parameter is ignored.

  • _source_includes string | array[string]

    A comma-separated list of source fields to include in the response. If this parameter is specified, only these source fields are returned. You can exclude fields from this subset using the _source_excludes query parameter. If the _source parameter is false, this parameter is ignored.

  • timeout string

    The period each action waits for the following operations: automatic index creation, dynamic mapping updates, and waiting for active shards. The default is 1m (one minute), which guarantees Elasticsearch waits for at least the timeout before failing. The actual wait time could be longer, particularly when multiple waits occur.

    Values are -1 or 0.

  • wait_for_active_shards number | string

    The number of shard copies that must be active before proceeding with the operation. Set to all or any positive integer up to the total number of shards in the index (number_of_replicas+1). The default is 1, which waits for each primary shard to be active.

    Values are all or index-setting.

  • require_alias boolean

    If true, the request's actions must target an index alias.

  • require_data_stream boolean

    If true, the request's actions must target a data stream (existing or to be created).

application/json

Body object Required

One of:
  • index object

    Index the specified document. If the document exists, it replaces the document and increments the version. The following line must contain the source data to be indexed.

    Hide index attributes Show index attributes object
    • if_primary_term number
    • dynamic_templates object

      A map from the full name of fields to the name of dynamic templates. It defaults to an empty map. If a name matches a dynamic template, that template will be applied regardless of other match predicates defined in the template. If a field is already defined in the mapping, then this parameter won't be used.

    • pipeline string

      The ID of the pipeline to use to preprocess incoming documents. If the index has a default ingest pipeline specified, setting the value to _none turns off the default ingest pipeline for this request. If a final pipeline is configured, it will always run regardless of the value of this parameter.

    • require_alias boolean

      If true, the request's actions must target an index alias.

      Default value is false.

  • create object

    Index the specified document if it does not already exist. The following line must contain the source data to be indexed.

    Hide create attributes Show create attributes object
    • if_primary_term number
    • dynamic_templates object

      A map from the full name of fields to the name of dynamic templates. It defaults to an empty map. If a name matches a dynamic template, that template will be applied regardless of other match predicates defined in the template. If a field is already defined in the mapping, then this parameter won't be used.

    • pipeline string

      The ID of the pipeline to use to preprocess incoming documents. If the index has a default ingest pipeline specified, setting the value to _none turns off the default ingest pipeline for this request. If a final pipeline is configured, it will always run regardless of the value of this parameter.

    • require_alias boolean

      If true, the request's actions must target an index alias.

      Default value is false.

  • update object

    Perform a partial document update. The following line must contain the partial document and update options.

    Hide update attributes Show update attributes object
    • _id string

      The document ID.

    • _index string

      The name of the index or index alias to perform the action on.

    • routing string

      A custom value used to route operations to a specific shard.

    • if_primary_term number
    • if_seq_no number
    • version number
    • version_type string

      Supported values include:

      • internal: Use internal versioning that starts at 1 and increments with each update or delete.
      • external: Only index the document if the specified version is strictly higher than the version of the stored document or if there is no existing document.
      • external_gte: Only index the document if the specified version is equal or higher than the version of the stored document or if there is no existing document. NOTE: The external_gte version type is meant for special use cases and should be used with care. If used incorrectly, it can result in loss of data.
      • force: This option is deprecated because it can cause primary and replica shards to diverge.

      Values are internal, external, external_gte, or force.

    • require_alias boolean

      If true, the request's actions must target an index alias.

      Default value is false.

    • retry_on_conflict number

      The number of times an update should be retried in the case of a version conflict.

  • delete object

    Remove the specified document from the index.

    Hide delete attributes Show delete attributes object
    • _id string

      The document ID.

    • _index string

      The name of the index or index alias to perform the action on.

    • routing string

      A custom value used to route operations to a specific shard.

    • if_primary_term number
    • if_seq_no number
    • version number
    • version_type string

      Supported values include:

      • internal: Use internal versioning that starts at 1 and increments with each update or delete.
      • external: Only index the document if the specified version is strictly higher than the version of the stored document or if there is no existing document.
      • external_gte: Only index the document if the specified version is equal or higher than the version of the stored document or if there is no existing document. NOTE: The external_gte version type is meant for special use cases and should be used with care. If used incorrectly, it can result in loss of data.
      • force: This option is deprecated because it can cause primary and replica shards to diverge.

      Values are internal, external, external_gte, or force.

Responses

  • 200 application/json
    Hide response attributes Show response attributes object
    • errors boolean Required

      If true, one or more of the operations in the bulk request did not complete successfully.

    • items array[object] Required

      The result of each operation in the bulk request, in the order they were submitted.

      Hide items attribute Show items attribute object
      • * object Additional properties
        Hide * attributes Show * attributes object
        • _id string | null

          The document ID associated with the operation.

        • _index string Required

          The name of the index associated with the operation. If the operation targeted a data stream, this is the backing index into which the document was written.

        • status number Required

          The HTTP status code returned for the operation.

        • failure_store string

          Values are not_applicable_or_unknown, used, not_enabled, or failed.

        • error object

          Additional information about the failed operation. The property is returned only for failed operations.

          Hide error attributes Show error attributes object
          • type string Required

            The type of error

          • reason
          • stack_trace string

            The server stack trace. Present only if the error_trace=true parameter was sent with the request.

          • root_cause array[object]
          • suppressed array[object]
        • _primary_term number

          The primary term assigned to the document for the operation. This property is returned only for successful operations.

        • result string

          The result of the operation. Successful values are created, deleted, and updated.

        • _seq_no number

          The sequence number assigned to the document for the operation. Sequence numbers are used to ensure an older version of a document doesn't overwrite a newer version.

        • _shards object

          Shard information for the operation.

          Hide _shards attribute Show _shards attribute object
          • failures array[object]
        • _version number

          The document version associated with the operation. The document version is incremented each time the document is updated. This property is returned only for successful actions.

        • forced_refresh boolean
        • get object
          Hide get attributes Show get attributes object
          • fields object
          • found boolean Required
          • _primary_term number
          • _source object
    • took number Required

      The length of time, in milliseconds, it took to process the bulk request.

    • ingest_took number
POST _bulk
{ "index" : { "_index" : "test", "_id" : "1" } }
{ "field1" : "value1" }
{ "delete" : { "_index" : "test", "_id" : "2" } }
{ "create" : { "_index" : "test", "_id" : "3" } }
{ "field1" : "value3" }
{ "update" : {"_id" : "1", "_index" : "test"} }
{ "doc" : {"field2" : "value2"} }
resp = client.bulk(
    operations=[
        {
            "index": {
                "_index": "test",
                "_id": "1"
            }
        },
        {
            "field1": "value1"
        },
        {
            "delete": {
                "_index": "test",
                "_id": "2"
            }
        },
        {
            "create": {
                "_index": "test",
                "_id": "3"
            }
        },
        {
            "field1": "value3"
        },
        {
            "update": {
                "_id": "1",
                "_index": "test"
            }
        },
        {
            "doc": {
                "field2": "value2"
            }
        }
    ],
)
const response = await client.bulk({
  operations: [
    {
      index: {
        _index: "test",
        _id: "1",
      },
    },
    {
      field1: "value1",
    },
    {
      delete: {
        _index: "test",
        _id: "2",
      },
    },
    {
      create: {
        _index: "test",
        _id: "3",
      },
    },
    {
      field1: "value3",
    },
    {
      update: {
        _id: "1",
        _index: "test",
      },
    },
    {
      doc: {
        field2: "value2",
      },
    },
  ],
});
response = client.bulk(
  body: [
    {
      "index": {
        "_index": "test",
        "_id": "1"
      }
    },
    {
      "field1": "value1"
    },
    {
      "delete": {
        "_index": "test",
        "_id": "2"
      }
    },
    {
      "create": {
        "_index": "test",
        "_id": "3"
      }
    },
    {
      "field1": "value3"
    },
    {
      "update": {
        "_id": "1",
        "_index": "test"
      }
    },
    {
      "doc": {
        "field2": "value2"
      }
    }
  ]
)
$resp = $client->bulk([
    "body" => array(
        [
            "index" => [
                "_index" => "test",
                "_id" => "1",
            ],
        ],
        [
            "field1" => "value1",
        ],
        [
            "delete" => [
                "_index" => "test",
                "_id" => "2",
            ],
        ],
        [
            "create" => [
                "_index" => "test",
                "_id" => "3",
            ],
        ],
        [
            "field1" => "value3",
        ],
        [
            "update" => [
                "_id" => "1",
                "_index" => "test",
            ],
        ],
        [
            "doc" => [
                "field2" => "value2",
            ],
        ],
    ),
]);
curl -X POST -H "Authorization: ApiKey $ELASTIC_API_KEY" -H "Content-Type: application/json" -d '[{"index":{"_index":"test","_id":"1"}},{"field1":"value1"},{"delete":{"_index":"test","_id":"2"}},{"create":{"_index":"test","_id":"3"}},{"field1":"value3"},{"update":{"_id":"1","_index":"test"}},{"doc":{"field2":"value2"}}]' "$ELASTICSEARCH_URL/_bulk"
Run `POST _bulk` to perform multiple operations.
{ "index" : { "_index" : "test", "_id" : "1" } }
{ "field1" : "value1" }
{ "delete" : { "_index" : "test", "_id" : "2" } }
{ "create" : { "_index" : "test", "_id" : "3" } }
{ "field1" : "value3" }
{ "update" : {"_id" : "1", "_index" : "test"} }
{ "doc" : {"field2" : "value2"} }
When you run `POST _bulk` and use the `update` action, you can use `retry_on_conflict` as a field in the action itself (not in the extra payload line) to specify how many times an update should be retried in the case of a version conflict.
{ "update" : {"_id" : "1", "_index" : "index1", "retry_on_conflict" : 3} }
{ "doc" : {"field" : "value"} }
{ "update" : { "_id" : "0", "_index" : "index1", "retry_on_conflict" : 3} }
{ "script" : { "source": "ctx._source.counter += params.param1", "lang" : "painless", "params" : {"param1" : 1}}, "upsert" : {"counter" : 1}}
{ "update" : {"_id" : "2", "_index" : "index1", "retry_on_conflict" : 3} }
{ "doc" : {"field" : "value"}, "doc_as_upsert" : true }
{ "update" : {"_id" : "3", "_index" : "index1", "_source" : true} }
{ "doc" : {"field" : "value"} }
{ "update" : {"_id" : "4", "_index" : "index1"} }
{ "doc" : {"field" : "value"}, "_source": true}
To return only information about failed operations, run `POST /_bulk?filter_path=items.*.error`.
{ "update": {"_id": "5", "_index": "index1"} }
{ "doc": {"my_field": "foo"} }
{ "update": {"_id": "6", "_index": "index1"} }
{ "doc": {"my_field": "foo"} }
{ "create": {"_id": "7", "_index": "index1"} }
{ "my_field": "foo" }
Run `POST /_bulk` to perform a bulk request that consists of index and create actions with the `dynamic_templates` parameter. The bulk request creates two new fields `work_location` and `home_location` with type `geo_point` according to the `dynamic_templates` parameter. However, the `raw_location` field is created using default dynamic mapping rules, as a text field in that case since it is supplied as a string in the JSON document.
{ "index" : { "_index" : "my_index", "_id" : "1", "dynamic_templates": {"work_location": "geo_point"}} }
{ "field" : "value1", "work_location": "41.12,-71.34", "raw_location": "41.12,-71.34"}
{ "create" : { "_index" : "my_index", "_id" : "2", "dynamic_templates": {"home_location": "geo_point"}} }
{ "field" : "value2", "home_location": "41.12,-71.34"}
Response examples (200)
{
   "took": 30,
   "errors": false,
   "items": [
      {
         "index": {
            "_index": "test",
            "_id": "1",
            "_version": 1,
            "result": "created",
            "_shards": {
               "total": 2,
               "successful": 1,
               "failed": 0
            },
            "status": 201,
            "_seq_no" : 0,
            "_primary_term": 1
         }
      },
      {
         "delete": {
            "_index": "test",
            "_id": "2",
            "_version": 1,
            "result": "not_found",
            "_shards": {
               "total": 2,
               "successful": 1,
               "failed": 0
            },
            "status": 404,
            "_seq_no" : 1,
            "_primary_term" : 2
         }
      },
      {
         "create": {
            "_index": "test",
            "_id": "3",
            "_version": 1,
            "result": "created",
            "_shards": {
               "total": 2,
               "successful": 1,
               "failed": 0
            },
            "status": 201,
            "_seq_no" : 2,
            "_primary_term" : 3
         }
      },
      {
         "update": {
            "_index": "test",
            "_id": "1",
            "_version": 2,
            "result": "updated",
            "_shards": {
                "total": 2,
                "successful": 1,
                "failed": 0
            },
            "status": 200,
            "_seq_no" : 3,
            "_primary_term" : 4
         }
      }
   ]
}
If you run `POST /_bulk` with operations that update non-existent documents, the operations cannot complete successfully. The API returns a response with an `errors` property value `true`. The response also includes an error object for any failed operations. The error object contains additional information about the failure, such as the error type and reason.
{
  "took": 486,
  "errors": true,
  "items": [
    {
      "update": {
        "_index": "index1",
        "_id": "5",
        "status": 404,
        "error": {
          "type": "document_missing_exception",
          "reason": "[5]: document missing",
          "index_uuid": "aAsFqTI0Tc2W0LCWgPNrOA",
          "shard": "0",
          "index": "index1"
        }
      }
    },
    {
      "update": {
        "_index": "index1",
        "_id": "6",
        "status": 404,
        "error": {
          "type": "document_missing_exception",
          "reason": "[6]: document missing",
          "index_uuid": "aAsFqTI0Tc2W0LCWgPNrOA",
          "shard": "0",
          "index": "index1"
        }
      }
    },
    {
      "create": {
        "_index": "index1",
        "_id": "7",
        "_version": 1,
        "result": "created",
        "_shards": {
          "total": 2,
          "successful": 1,
          "failed": 0
        },
        "_seq_no": 0,
        "_primary_term": 1,
        "status": 201
      }
    }
  ]
}
An example response from `POST /_bulk?filter_path=items.*.error`, which returns only information about failed operations.
{
  "items": [
    {
      "update": {
        "error": {
          "type": "document_missing_exception",
          "reason": "[5]: document missing",
          "index_uuid": "aAsFqTI0Tc2W0LCWgPNrOA",
          "shard": "0",
          "index": "index1"
        }
      }
    },
    {
      "update": {
        "error": {
          "type": "document_missing_exception",
          "reason": "[6]: document missing",
          "index_uuid": "aAsFqTI0Tc2W0LCWgPNrOA",
          "shard": "0",
          "index": "index1"
        }
      }
    }
  ]
}








Create or update a document in an index Generally available

POST /{index}/_doc/{id}

All methods and paths for this operation:

POST /{index}/_doc

PUT /{index}/_doc/{id}
POST /{index}/_doc/{id}

Add a JSON document to the specified data stream or index and make it searchable. If the target is an index and the document already exists, the request updates the document and increments its version.

NOTE: You cannot use this API to send update requests for existing documents in a data stream.

If the Elasticsearch security features are enabled, you must have the following index privileges for the target data stream, index, or index alias:

  • To add or overwrite a document using the PUT /<target>/_doc/<_id> request format, you must have the create, index, or write index privilege.
  • To add a document using the POST /<target>/_doc/ request format, you must have the create_doc, create, index, or write index privilege.
  • To automatically create a data stream or index with this API request, you must have the auto_configure, create_index, or manage index privilege.

Automatic data stream creation requires a matching index template with data stream enabled.

NOTE: Replica shards might not all be started when an indexing operation returns successfully. By default, only the primary is required. Set wait_for_active_shards to change this default behavior.

Automatically create data streams and indices

If the request's target doesn't exist and matches an index template with a data_stream definition, the index operation automatically creates the data stream.

If the target doesn't exist and doesn't match a data stream template, the operation automatically creates the index and applies any matching index templates.

NOTE: Elasticsearch includes several built-in index templates. To avoid naming collisions with these templates, refer to index pattern documentation.

If no mapping exists, the index operation creates a dynamic mapping. By default, new fields and objects are automatically added to the mapping if needed.

Automatic index creation is controlled by the action.auto_create_index setting. If it is true, any index can be created automatically. You can modify this setting to explicitly allow or block automatic creation of indices that match specified patterns or set it to false to turn off automatic index creation entirely. Specify a comma-separated list of patterns you want to allow or prefix each pattern with + or - to indicate whether it should be allowed or blocked. When a list is specified, the default behaviour is to disallow.

NOTE: The action.auto_create_index setting affects the automatic creation of indices only. It does not affect the creation of data streams.

Optimistic concurrency control

Index operations can be made conditional and only be performed if the last modification to the document was assigned the sequence number and primary term specified by the if_seq_no and if_primary_term parameters. If a mismatch is detected, the operation will result in a VersionConflictException and a status code of 409.

Routing

By default, shard placement — or routing — is controlled by using a hash of the document's ID value. For more explicit control, the value fed into the hash function used by the router can be directly specified on a per-operation basis using the routing parameter.

When setting up explicit mapping, you can also use the _routing field to direct the index operation to extract the routing value from the document itself. This does come at the (very minimal) cost of an additional document parsing pass. If the _routing mapping is defined and set to be required, the index operation will fail if no routing value is provided or extracted.

NOTE: Data streams do not support custom routing unless they were created with the allow_custom_routing setting enabled in the template.

Distributed

The index operation is directed to the primary shard based on its route and performed on the actual node containing this shard. After the primary shard completes the operation, if needed, the update is distributed to applicable replicas.

Active shards

To improve the resiliency of writes to the system, indexing operations can be configured to wait for a certain number of active shard copies before proceeding with the operation. If the requisite number of active shard copies are not available, then the write operation must wait and retry, until either the requisite shard copies have started or a timeout occurs. By default, write operations only wait for the primary shards to be active before proceeding (that is to say wait_for_active_shards is 1). This default can be overridden in the index settings dynamically by setting index.write.wait_for_active_shards. To alter this behavior per operation, use the wait_for_active_shards request parameter.

Valid values are all or any positive integer up to the total number of configured copies per shard in the index (which is number_of_replicas+1). Specifying a negative value or a number greater than the number of shard copies will throw an error.

For example, suppose you have a cluster of three nodes, A, B, and C and you create an index index with the number of replicas set to 3 (resulting in 4 shard copies, one more copy than there are nodes). If you attempt an indexing operation, by default the operation will only ensure the primary copy of each shard is available before proceeding. This means that even if B and C went down and A hosted the primary shard copies, the indexing operation would still proceed with only one copy of the data. If wait_for_active_shards is set on the request to 3 (and all three nodes are up), the indexing operation will require 3 active shard copies before proceeding. This requirement should be met because there are 3 active nodes in the cluster, each one holding a copy of the shard. However, if you set wait_for_active_shards to all (or to 4, which is the same in this situation), the indexing operation will not proceed as you do not have all 4 copies of each shard active in the index. The operation will timeout unless a new node is brought up in the cluster to host the fourth copy of the shard.

It is important to note that this setting greatly reduces the chances of the write operation not writing to the requisite number of shard copies, but it does not completely eliminate the possibility, because this check occurs before the write operation starts. After the write operation is underway, it is still possible for replication to fail on any number of shard copies but still succeed on the primary. The _shards section of the API response reveals the number of shard copies on which replication succeeded and failed.

No operation (noop) updates

When updating a document by using this API, a new version of the document is always created even if the document hasn't changed. If this isn't acceptable use the _update API with detect_noop set to true. The detect_noop option isn't available on this API because it doesn’t fetch the old source and isn't able to compare it against the new source.

There isn't a definitive rule for when noop updates aren't acceptable. It's a combination of lots of factors like how frequently your data source sends updates that are actually noops and how many queries per second Elasticsearch runs on the shard receiving the updates.

Versioning

Each indexed document is given a version number. By default, internal versioning is used that starts at 1 and increments with each update, deletes included. Optionally, the version number can be set to an external value (for example, if maintained in a database). To enable this functionality, version_type should be set to external. The value provided must be a numeric, long value greater than or equal to 0, and less than around 9.2e+18.

NOTE: Versioning is completely real time, and is not affected by the near real time aspects of search operations. If no version is provided, the operation runs without any version checks.

When using the external version type, the system checks to see if the version number passed to the index request is greater than the version of the currently stored document. If true, the document will be indexed and the new version number used. If the value provided is less than or equal to the stored document's version number, a version conflict will occur and the index operation will fail. For example:

PUT my-index-000001/_doc/1?version=2&version_type=external
{
  "user": {
    "id": "elkbee"
  }
}

In this example, the operation will succeed since the supplied version of 2 is higher than the current document version of 1.
If the document was already updated and its version was set to 2 or higher, the indexing command will fail and result in a conflict (409 HTTP status code).

A nice side effect is that there is no need to maintain strict ordering of async indexing operations run as a result of changes to a source database, as long as version numbers from the source database are used.
Even the simple case of updating the Elasticsearch index using data from a database is simplified if external versioning is used, as only the latest version will be used if the index operations arrive out of order.

## Required authorization

* Index privileges: `index`
External documentation

Path parameters

  • index string Required

    The name of the data stream or index to target. If the target doesn't exist and matches the name or wildcard (*) pattern of an index template with a data_stream definition, this request creates the data stream. If the target doesn't exist and doesn't match a data stream template, this request creates the index. You can check for existing targets with the resolve index API.

  • id string Required

    A unique identifier for the document. To automatically generate a document ID, use the POST /<target>/_doc/ request format and omit this parameter.

Query parameters

  • if_primary_term number

    Only perform the operation if the document has this primary term.

  • if_seq_no number

    Only perform the operation if the document has this sequence number.

  • include_source_on_error boolean

    True or false if to include the document source in the error message in case of parsing errors.

  • op_type string

    Set to create to only index the document if it does not already exist (put if absent). If a document with the specified _id already exists, the indexing operation will fail. The behavior is the same as using the <index>/_create endpoint. If a document ID is specified, this paramater defaults to index. Otherwise, it defaults to create. If the request targets a data stream, an op_type of create is required.

    Supported values include:

    • index: Overwrite any documents that already exist.
    • create: Only index documents that do not already exist.

    Values are index or create.

  • pipeline string

    The ID of the pipeline to use to preprocess incoming documents. If the index has a default ingest pipeline specified, then setting the value to _none disables the default ingest pipeline for this request. If a final pipeline is configured it will always run, regardless of the value of this parameter.

  • refresh string

    If true, Elasticsearch refreshes the affected shards to make this operation visible to search. If wait_for, it waits for a refresh to make this operation visible to search. If false, it does nothing with refreshes.

    Values are true, false, or wait_for.

  • routing string

    A custom value that is used to route operations to a specific shard.

  • timeout string

    The period the request waits for the following operations: automatic index creation, dynamic mapping updates, waiting for active shards.

    This parameter is useful for situations where the primary shard assigned to perform the operation might not be available when the operation runs. Some reasons for this might be that the primary shard is currently recovering from a gateway or undergoing relocation. By default, the operation will wait on the primary shard to become available for at least 1 minute before failing and responding with an error. The actual wait time could be longer, particularly when multiple waits occur.

    Values are -1 or 0.

  • version number

    An explicit version number for concurrency control. It must be a non-negative long number.

  • version_type string

    The version type.

    Supported values include:

    • internal: Use internal versioning that starts at 1 and increments with each update or delete.
    • external: Only index the document if the specified version is strictly higher than the version of the stored document or if there is no existing document.
    • external_gte: Only index the document if the specified version is equal or higher than the version of the stored document or if there is no existing document. NOTE: The external_gte version type is meant for special use cases and should be used with care. If used incorrectly, it can result in loss of data.
    • force: This option is deprecated because it can cause primary and replica shards to diverge.

    Values are internal, external, external_gte, or force.

  • wait_for_active_shards number | string

    The number of shard copies that must be active before proceeding with the operation. You can set it to all or any positive integer up to the total number of shards in the index (number_of_replicas+1). The default value of 1 means it waits for each primary shard to be active.

    Values are all or index-setting.

  • require_alias boolean

    If true, the destination must be an index alias.

  • require_data_stream boolean

    If true, the request's actions must target a data stream (existing or to be created).

application/json

Body Required

object object

Responses

  • 200 application/json
    Hide response attributes Show response attributes object
    • _id string Required

      The unique identifier for the added document.

    • _index string Required

      The name of the index the document was added to.

    • _primary_term number

      The primary term assigned to the document for the indexing operation.

    • result string Required

      The result of the indexing operation: created or updated.

      Values are created, updated, deleted, not_found, or noop.

    • _seq_no number

      The sequence number assigned to the document for the indexing operation. Sequence numbers are used to ensure an older version of a document doesn't overwrite a newer version.

    • _shards object Required

      Information about the replication process of the operation.

      Hide _shards attributes Show _shards attributes object
      • failed number Required

        The number of shards the operation or search attempted to run on but failed.

      • successful number Required

        The number of shards the operation or search succeeded on.

      • total number Required

        The number of shards the operation or search will run on overall.

      • failures array[object]
        Hide failures attributes Show failures attributes object
        • index string
        • node string
        • reason object Required

          Cause and details about a request failure. This class defines the properties common to all error types. Additional details are also provided, that depend on the error type.

        • shard number
        • status string
        • primary boolean
      • skipped number
    • _version number Required

      The document version, which is incremented each time the document is updated.

    • forced_refresh boolean
POST my-index-000001/_doc/
{
  "@timestamp": "2099-11-15T13:12:00",
  "message": "GET /search HTTP/1.1 200 1070000",
  "user": {
    "id": "kimchy"
  }
}
resp = client.index(
    index="my-index-000001",
    document={
        "@timestamp": "2099-11-15T13:12:00",
        "message": "GET /search HTTP/1.1 200 1070000",
        "user": {
            "id": "kimchy"
        }
    },
)
const response = await client.index({
  index: "my-index-000001",
  document: {
    "@timestamp": "2099-11-15T13:12:00",
    message: "GET /search HTTP/1.1 200 1070000",
    user: {
      id: "kimchy",
    },
  },
});
response = client.index(
  index: "my-index-000001",
  body: {
    "@timestamp": "2099-11-15T13:12:00",
    "message": "GET /search HTTP/1.1 200 1070000",
    "user": {
      "id": "kimchy"
    }
  }
)
$resp = $client->index([
    "index" => "my-index-000001",
    "body" => [
        "@timestamp" => "2099-11-15T13:12:00",
        "message" => "GET /search HTTP/1.1 200 1070000",
        "user" => [
            "id" => "kimchy",
        ],
    ],
]);
curl -X POST -H "Authorization: ApiKey $ELASTIC_API_KEY" -H "Content-Type: application/json" -d '{"@timestamp":"2099-11-15T13:12:00","message":"GET /search HTTP/1.1 200 1070000","user":{"id":"kimchy"}}' "$ELASTICSEARCH_URL/my-index-000001/_doc/"
client.index(i -> i
    .index("my-index-000001")
    .document(JsonData.fromJson("{\"@timestamp\":\"2099-11-15T13:12:00\",\"message\":\"GET /search HTTP/1.1 200 1070000\",\"user\":{\"id\":\"kimchy\"}}"))
);
Request examples
Run `POST my-index-000001/_doc/` to index a document. When you use the `POST /<target>/_doc/` request format, the `op_type` is automatically set to `create` and the index operation generates a unique ID for the document.
{
  "@timestamp": "2099-11-15T13:12:00",
  "message": "GET /search HTTP/1.1 200 1070000",
  "user": {
    "id": "kimchy"
  }
}
Run `PUT my-index-000001/_doc/1` to insert a JSON document into the `my-index-000001` index with an `_id` of 1.
{
  "@timestamp": "2099-11-15T13:12:00",
  "message": "GET /search HTTP/1.1 200 1070000",
  "user": {
    "id": "kimchy"
  }
}
Response examples (200)
A successful response from `POST my-index-000001/_doc/`, which contains an automated document ID.
{
  "_shards": {
    "total": 2,
    "failed": 0,
    "successful": 2
  },
  "_index": "my-index-000001",
  "_id": "W0tpsmIBdwcYyG50zbta",
  "_version": 1,
  "_seq_no": 0,
  "_primary_term": 1,
  "result": "created"
}
A successful response from `PUT my-index-000001/_doc/1`.
{
  "_shards": {
    "total": 2,
    "failed": 0,
    "successful": 2
  },
  "_index": "my-index-000001",
  "_id": "1",
  "_version": 1,
  "_seq_no": 0,
  "_primary_term": 1,
  "result": "created"
}












Get a document's source Generally available

GET /{index}/_source/{id}

Get the source of a document. For example:

GET my-index-000001/_source/1

You can use the source filtering parameters to control which parts of the _source are returned:

GET my-index-000001/_source/1/?_source_includes=*.id&_source_excludes=entities

Required authorization

  • Index privileges: read
External documentation

Path parameters

  • index string Required

    The name of the index that contains the document.

  • id string Required

    A unique document identifier.

Query parameters

  • preference string

    The node or shard the operation should be performed on. By default, the operation is randomized between the shard replicas.

  • realtime boolean

    If true, the request is real-time as opposed to near-real-time.

  • refresh boolean

    If true, the request refreshes the relevant shards before retrieving the document. Setting it to true should be done after careful thought and verification that this does not cause a heavy load on the system (and slow down indexing).

  • routing string

    A custom value used to route operations to a specific shard.

  • _source boolean | string | array[string]

    Indicates whether to return the _source field (true or false) or lists the fields to return.

  • _source_excludes string | array[string]

    A comma-separated list of source fields to exclude in the response.

  • _source_includes string | array[string]

    A comma-separated list of source fields to include in the response.

  • version number

    The version number for concurrency control. It must match the current version of the document for the request to succeed.

  • version_type string

    The version type.

    Supported values include:

    • internal: Use internal versioning that starts at 1 and increments with each update or delete.
    • external: Only index the document if the specified version is strictly higher than the version of the stored document or if there is no existing document.
    • external_gte: Only index the document if the specified version is equal or higher than the version of the stored document or if there is no existing document. NOTE: The external_gte version type is meant for special use cases and should be used with care. If used incorrectly, it can result in loss of data.
    • force: This option is deprecated because it can cause primary and replica shards to diverge.

    Values are internal, external, external_gte, or force.

Responses

  • 200 application/json
GET my-index-000001/_source/1
resp = client.get_source(
    index="my-index-000001",
    id="1",
)
const response = await client.getSource({
  index: "my-index-000001",
  id: 1,
});
response = client.get_source(
  index: "my-index-000001",
  id: "1"
)
$resp = $client->getSource([
    "index" => "my-index-000001",
    "id" => "1",
]);
curl -X GET -H "Authorization: ApiKey $ELASTIC_API_KEY" "$ELASTICSEARCH_URL/my-index-000001/_source/1"
client.getSource(g -> g
    .id("1")
    .index("my-index-000001")
);





















































































































Delete indices Generally available

DELETE /{index}

Deleting an index deletes its documents, shards, and metadata. It does not delete related Kibana components, such as data views, visualizations, or dashboards.

You cannot delete the current write index of a data stream. To delete the index, you must roll over the data stream so a new write index is created. You can then use the delete index API to delete the previous write index.

Required authorization

  • Index privileges: delete_index

Path parameters

  • index string | array[string] Required

    Comma-separated list of indices to delete. You cannot specify index aliases. By default, this parameter does not support wildcards (*) or _all. To use wildcards or _all, set the action.destructive_requires_name cluster setting to false.

Query parameters

  • allow_no_indices boolean

    If false, the request returns an error if any wildcard expression, index alias, or _all value targets only missing or closed indices. This behavior applies even if the request targets other open indices.

  • expand_wildcards string | array[string]

    Type of index that wildcard patterns can match. If the request can target data streams, this argument determines whether wildcard expressions match hidden data streams. Supports comma-separated values, such as open,hidden.

    Supported values include:

    • all: Match any data stream or index, including hidden ones.
    • open: Match open, non-hidden indices. Also matches any non-hidden data stream.
    • closed: Match closed, non-hidden indices. Also matches any non-hidden data stream. Data streams cannot be closed.
    • hidden: Match hidden data streams and hidden indices. Must be combined with open, closed, or both.
    • none: Wildcard expressions are not accepted.

    Values are all, open, closed, hidden, or none.

  • ignore_unavailable boolean

    If false, the request returns an error if it targets a missing or closed index.

  • master_timeout string

    Period to wait for a connection to the master node. If no response is received before the timeout expires, the request fails and returns an error.

    Values are -1 or 0.

  • timeout string

    Period to wait for a response. If no response is received before the timeout expires, the request fails and returns an error.

    Values are -1 or 0.

Responses

  • 200 application/json
    Hide response attributes Show response attributes object
    • acknowledged boolean Required

      For a successful response, this value is always true. On failure, an exception is returned instead.

    • _shards object
      Hide _shards attributes Show _shards attributes object
      • failed number Required

        The number of shards the operation or search attempted to run on but failed.

      • successful number Required

        The number of shards the operation or search succeeded on.

      • total number Required

        The number of shards the operation or search will run on overall.

      • failures array[object]
        Hide failures attributes Show failures attributes object
        • index
        • node string
        • reason
        • shard number
        • status string
        • primary boolean
      • skipped number
DELETE /books
resp = client.indices.delete(
    index="books",
)
const response = await client.indices.delete({
  index: "books",
});
response = client.indices.delete(
  index: "books"
)
$resp = $client->indices()->delete([
    "index" => "books",
]);
curl -X DELETE -H "Authorization: ApiKey $ELASTIC_API_KEY" "$ELASTICSEARCH_URL/books"
client.indices().delete(d -> d
    .index("books")
);




























































Roll over to a new index Generally available

POST /{alias}/_rollover/{new_index}

All methods and paths for this operation:

POST /{alias}/_rollover

POST /{alias}/_rollover/{new_index}

TIP: It is recommended to use the index lifecycle rollover action to automate rollovers.

The rollover API creates a new index for a data stream or index alias. The API behavior depends on the rollover target.

Roll over a data stream

If you roll over a data stream, the API creates a new write index for the stream. The stream's previous write index becomes a regular backing index. A rollover also increments the data stream's generation.

Roll over an index alias with a write index

TIP: Prior to Elasticsearch 7.9, you'd typically use an index alias with a write index to manage time series data. Data streams replace this functionality, require less maintenance, and automatically integrate with data tiers.

If an index alias points to multiple indices, one of the indices must be a write index. The rollover API creates a new write index for the alias with is_write_index set to true. The API also sets is_write_index to false for the previous write index.

Roll over an index alias with one index

If you roll over an index alias that points to only one index, the API creates a new index for the alias and removes the original index from the alias.

NOTE: A rollover creates a new index and is subject to the wait_for_active_shards setting.

Increment index names for an alias

When you roll over an index alias, you can specify a name for the new index. If you don't specify a name and the current index ends with - and a number, such as my-index-000001 or my-index-3, the new index name increments that number. For example, if you roll over an alias with a current index of my-index-000001, the rollover creates a new index named my-index-000002. This number is always six characters and zero-padded, regardless of the previous index's name.

If you use an index alias for time series data, you can use date math in the index name to track the rollover date. For example, you can create an alias that points to an index named <my-index-{now/d}-000001>. If you create the index on May 6, 2099, the index's name is my-index-2099.05.06-000001. If you roll over the alias on May 7, 2099, the new index's name is my-index-2099.05.07-000002.

Required authorization

  • Index privileges: manage

Path parameters

  • alias string

    Name of the data stream or index alias to roll over.

  • new_index string Required

    Name of the index to create. Supports date math. Data streams do not support this parameter.

Query parameters

  • dry_run boolean

    If true, checks whether the current index satisfies the specified conditions but does not perform a rollover.

  • master_timeout string

    Period to wait for a connection to the master node. If no response is received before the timeout expires, the request fails and returns an error.

    Values are -1 or 0.

  • timeout string

    Period to wait for a response. If no response is received before the timeout expires, the request fails and returns an error.

    Values are -1 or 0.

  • wait_for_active_shards number | string

    The number of shard copies that must be active before proceeding with the operation. Set to all or any positive integer up to the total number of shards in the index (number_of_replicas+1).

    Values are all or index-setting.

  • lazy boolean

    If set to true, the rollover action will only mark a data stream to signal that it needs to be rolled over at the next write. Only allowed on data streams.

application/json

Body

  • aliases object

    Aliases for the target index. Data streams do not support this parameter.

    Hide aliases attribute Show aliases attribute object
    • * object Additional properties
      Hide * attributes Show * attributes object
      • filter object

        Query used to limit documents the alias can access.

        External documentation
      • index_routing string

        Value used to route indexing operations to a specific shard. If specified, this overwrites the routing value for indexing operations.

      • is_hidden boolean

        If true, the alias is hidden. All indices for the alias must have the same is_hidden value.

        Default value is false.

      • is_write_index boolean

        If true, the index is the write index for the alias.

        Default value is false.

      • routing string

        Value used to route indexing and search operations to a specific shard.

      • search_routing string

        Value used to route search operations to a specific shard. If specified, this overwrites the routing value for search operations.

  • conditions object

    Conditions for the rollover. If specified, Elasticsearch only performs the rollover if the current index satisfies these conditions. If this parameter is not specified, Elasticsearch performs the rollover unconditionally. If conditions are specified, at least one of them must be a max_* condition. The index will rollover if any max_* condition is satisfied and all min_* conditions are satisfied.

    Hide conditions attributes Show conditions attributes object
    • min_age string

      A duration. Units can be nanos, micros, ms (milliseconds), s (seconds), m (minutes), h (hours) and d (days). Also accepts "0" without a unit and "-1" to indicate an unspecified value.

    • max_age string

      A duration. Units can be nanos, micros, ms (milliseconds), s (seconds), m (minutes), h (hours) and d (days). Also accepts "0" without a unit and "-1" to indicate an unspecified value.

    • Time unit for milliseconds

    • min_docs number
    • max_docs number
    • max_size number | string

    • max_size_bytes number
    • min_size number | string

    • min_size_bytes number
    • max_primary_shard_size number | string

    • max_primary_shard_size_bytes number
    • min_primary_shard_size number | string

    • min_primary_shard_size_bytes number
    • max_primary_shard_docs number
    • min_primary_shard_docs number
  • mappings object

    Mapping for fields in the index. If specified, this mapping can include field names, field data types, and mapping paramaters.

    Hide mappings attributes Show mappings attributes object
    • all_field object
      Hide all_field attributes Show all_field attributes object
      • analyzer string Required
      • enabled boolean Required
      • omit_norms boolean Required
      • search_analyzer string Required
      • similarity string Required
      • store boolean Required
      • store_term_vector_offsets boolean Required
      • store_term_vector_payloads boolean Required
      • store_term_vector_positions boolean Required
      • store_term_vectors boolean Required
    • date_detection boolean
    • dynamic string

      Values are strict, runtime, true, or false.

    • dynamic_date_formats array[string]
    • dynamic_templates array[object]
    • _field_names object
      Hide _field_names attribute Show _field_names attribute object
      • enabled boolean Required
    • index_field object
      Hide index_field attribute Show index_field attribute object
      • enabled boolean Required
    • _meta object
      Hide _meta attribute Show _meta attribute object
      • * object Additional properties
    • numeric_detection boolean
    • properties object
    • _routing object
      Hide _routing attribute Show _routing attribute object
      • required boolean Required
    • _size object
      Hide _size attribute Show _size attribute object
      • enabled boolean Required
    • _source object
      Hide _source attributes Show _source attributes object
      • compress boolean
      • compress_threshold string
      • enabled boolean
      • excludes array[string]
      • includes array[string]
      • mode string

        Supported values include:

        • disabled
        • stored
        • synthetic: Instead of storing source documents on disk exactly as you send them, Elasticsearch can reconstruct source content on the fly upon retrieval.

        Values are disabled, stored, or synthetic.

    • runtime object
      Hide runtime attribute Show runtime attribute object
      • * object Additional properties
        Hide * attributes Show * attributes object
        • fields object

          For type composite

          Hide fields attribute Show fields attribute object
          • * object Additional properties
        • fetch_fields array[object]

          For type lookup

          Hide fetch_fields attributes Show fetch_fields attributes object
          • field
          • format string
        • format string

          A custom format for date type runtime fields.

        • input_field string

          For type lookup

        • target_field string

          For type lookup

        • target_index string

          For type lookup

        • script object

          Painless script executed at query time.

          Hide script attributes Show script attributes object
          • params object

            Specifies any named parameters that are passed into the script as variables. Use parameters instead of hard-coded values to decrease compile time.

          • options object
        • type string Required

          Field type, which can be: boolean, composite, date, double, geo_point, ip,keyword, long, or lookup.

          Values are boolean, composite, date, double, geo_point, geo_shape, ip, keyword, long, or lookup.

    • enabled boolean
    • subobjects string

      Values are true or false.

    • _data_stream_timestamp object
      Hide _data_stream_timestamp attribute Show _data_stream_timestamp attribute object
      • enabled boolean Required
  • settings object

    Configuration options for the index. Data streams do not support this parameter.

    Hide settings attribute Show settings attribute object
    • * object Additional properties

Responses

  • 200 application/json
    Hide response attributes Show response attributes object
    • acknowledged boolean Required
    • conditions object Required
      Hide conditions attribute Show conditions attribute object
      • * boolean Additional properties
    • dry_run boolean Required
    • new_index string Required
    • old_index string Required
    • rolled_over boolean Required
    • shards_acknowledged boolean Required
POST /{alias}/_rollover/{new_index}
POST my-data-stream/_rollover
{
  "conditions": {
    "max_age": "7d",
    "max_docs": 1000,
    "max_primary_shard_size": "50gb",
    "max_primary_shard_docs": "2000"
  }
}
resp = client.indices.rollover(
    alias="my-data-stream",
    conditions={
        "max_age": "7d",
        "max_docs": 1000,
        "max_primary_shard_size": "50gb",
        "max_primary_shard_docs": "2000"
    },
)
const response = await client.indices.rollover({
  alias: "my-data-stream",
  conditions: {
    max_age: "7d",
    max_docs: 1000,
    max_primary_shard_size: "50gb",
    max_primary_shard_docs: "2000",
  },
});
response = client.indices.rollover(
  alias: "my-data-stream",
  body: {
    "conditions": {
      "max_age": "7d",
      "max_docs": 1000,
      "max_primary_shard_size": "50gb",
      "max_primary_shard_docs": "2000"
    }
  }
)
$resp = $client->indices()->rollover([
    "alias" => "my-data-stream",
    "body" => [
        "conditions" => [
            "max_age" => "7d",
            "max_docs" => 1000,
            "max_primary_shard_size" => "50gb",
            "max_primary_shard_docs" => "2000",
        ],
    ],
]);
curl -X POST -H "Authorization: ApiKey $ELASTIC_API_KEY" -H "Content-Type: application/json" -d '{"conditions":{"max_age":"7d","max_docs":1000,"max_primary_shard_size":"50gb","max_primary_shard_docs":"2000"}}' "$ELASTICSEARCH_URL/my-data-stream/_rollover"
client.indices().rollover(r -> r
    .alias("my-data-stream")
    .conditions(c -> c
        .maxAge(m -> m
            .time("7d")
        )
        .maxDocs(1000L)
        .maxPrimaryShardSize("50gb")
        .maxPrimaryShardDocs(2000L)
    )
);
Request examples
Create a new index for a data stream
{
  "conditions": {
    "max_age": "7d",
    "max_docs": 1000,
    "max_primary_shard_size": "50gb",
    "max_primary_shard_docs": "2000"
  }
}
Create a new index for a data stream
{}
Response examples (200)
An abbreviated response from `GET /_segments`.
{
  "_shards": {},
  "indices": {
    "test": {
      "shards": {
        "0": [
          {
            "routing": {
              "state": "STARTED",
              "primary": true,
              "node": "zDC_RorJQCao9xf9pg3Fvw"
            },
            "num_committed_segments": 0,
            "num_search_segments": 1,
            "segments": {
              "_0": {
                "generation": 0,
                "num_docs": 1,
                "deleted_docs": 0,
                "size_in_bytes": 3800,
                "committed": false,
                "search": true,
                "version": "7.0.0",
                "compound": true,
                "attributes": {}
              }
            }
          }
        ]
      }
    }
  }
}





































































Create a Cohere inference endpoint Generally available

PUT /_inference/{task_type}/{cohere_inference_id}

Create an inference endpoint to perform an inference task with the cohere service.

Required authorization

  • Cluster privileges: manage_inference

Path parameters

  • task_type string

    The type of the inference task that the model will perform.

    Values are completion, rerank, or text_embedding.

  • cohere_inference_id string Required

    The unique identifier of the inference endpoint.

Query parameters

  • timeout string

    Specifies the amount of time to wait for the inference endpoint to be created.

    Values are -1 or 0.

application/json

Body

  • chunking_settings object

    The chunking configuration object.

    Hide chunking_settings attributes Show chunking_settings attributes object
    • max_chunk_size number

      The maximum size of a chunk in words. This value cannot be higher than 300 or lower than 20 (for sentence strategy) or 10 (for word strategy).

      Default value is 250.

    • overlap number

      The number of overlapping words for chunks. It is applicable only to a word chunking strategy. This value cannot be higher than half the max_chunk_size value.

      Default value is 100.

    • sentence_overlap number

      The number of overlapping sentences for chunks. It is applicable only for a sentence chunking strategy. It can be either 1 or 0.

      Default value is 1.

    • separator_group string

      Only applicable to the recursive strategy and required when using it.

      Sets a predefined list of separators in the saved chunking settings based on the selected text type. Values can be markdown or plaintext.

      Using this parameter is an alternative to manually specifying a custom separators list.

    • separators array[string]

      Only applicable to the recursive strategy and required when using it.

      A list of strings used as possible split points when chunking text.

      Each string can be a plain string or a regular expression (regex) pattern. The system tries each separator in order to split the text, starting from the first item in the list.

      After splitting, it attempts to recombine smaller pieces into larger chunks that stay within the max_chunk_size limit, to reduce the total number of chunks generated.

    • strategy string

      The chunking strategy: sentence, word, none or recursive.

      • If strategy is set to recursive, you must also specify:

        • max_chunk_size
        • either separators orseparator_group

      Learn more about different chunking strategies in the linked documentation.

      Default value is sentence.

      External documentation
  • service string Required

    The type of service supported for the specified task type. In this case, cohere.

    Value is cohere.

  • service_settings object Required

    Settings used to install the inference model. These settings are specific to the cohere service.

    Hide service_settings attributes Show service_settings attributes object
    • api_key string Required

      A valid API key for your Cohere account. You can find or create your Cohere API keys on the Cohere API key settings page.

      IMPORTANT: You need to provide the API key only once, during the inference model creation. The get inference endpoint API does not retrieve your API key. After creating the inference model, you cannot change the associated API key. If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.

      External documentation
    • embedding_type string

      For a text_embedding task, the types of embeddings you want to get back. Use binary for binary embeddings, which are encoded as bytes with signed int8 precision. Use bit for binary embeddings, which are encoded as bytes with signed int8 precision (this is a synonym of binary). Use byte for signed int8 embeddings (this is a synonym of int8). Use float for the default float embeddings. Use int8 for signed int8 embeddings.

      Values are binary, bit, byte, float, or int8.

    • model_id string Required

      For a completion, rerank, or text_embedding task, the name of the model to use for the inference task.

    • rate_limit object

      This setting helps to minimize the number of rate limit errors returned from Cohere. By default, the cohere service sets the number of requests allowed per minute to 10000.

      Hide rate_limit attribute Show rate_limit attribute object
      • requests_per_minute number

        The number of requests allowed per minute. By default, the number of requests allowed per minute is set by each service as follows:

        • alibabacloud-ai-search service: 1000
        • anthropic service: 50
        • azureaistudio service: 240
        • azureopenai service and task type text_embedding: 1440
        • azureopenai service and task type completion: 120
        • cohere service: 10000
        • elastic service and task type chat_completion: 240
        • googleaistudio service: 360
        • googlevertexai service: 30000
        • hugging_face service: 3000
        • jinaai service: 2000
        • llama service: 3000
        • mistral service: 240
        • openai service and task type text_embedding: 3000
        • openai service and task type completion: 500
        • voyageai service: 2000
        • watsonxai service: 120
    • similarity string

      The similarity measure. If the embedding_type is float, the default value is dot_product. If the embedding_type is int8 or byte, the default value is cosine.

      Values are cosine, dot_product, or l2_norm.

  • task_settings object

    Settings to configure the inference task. These settings are specific to the task type you specified.

    Hide task_settings attributes Show task_settings attributes object
    • input_type string Required

      For a text_embedding task, the type of input passed to the model. Valid values are:

      • classification: Use it for embeddings passed through a text classifier.
      • clustering: Use it for the embeddings run through a clustering algorithm.
      • ingest: Use it for storing document embeddings in a vector database.
      • search: Use it for storing embeddings of search queries run against a vector database to find relevant documents.

      IMPORTANT: The input_type field is required when using embedding models v3 and higher.

      Values are classification, clustering, ingest, or search.

    • return_documents boolean

      For a rerank task, return doc text within the results.

    • top_n number

      For a rerank task, the number of most relevant documents to return. It defaults to the number of the documents. If this inference endpoint is used in a text_similarity_reranker retriever query and top_n is set, it must be greater than or equal to rank_window_size in the query.

    • truncate string

      For a text_embedding task, the method to handle inputs longer than the maximum token length. Valid values are:

      • END: When the input exceeds the maximum input token length, the end of the input is discarded.
      • NONE: When the input exceeds the maximum input token length, an error is returned.
      • START: When the input exceeds the maximum input token length, the start of the input is discarded.

      Values are END, NONE, or START.

Responses

  • 200 application/json
    Hide response attributes Show response attributes object
    • chunking_settings object

      Chunking configuration object

      Hide chunking_settings attributes Show chunking_settings attributes object
      • max_chunk_size number

        The maximum size of a chunk in words. This value cannot be higher than 300 or lower than 20 (for sentence strategy) or 10 (for word strategy).

        Default value is 250.

      • overlap number

        The number of overlapping words for chunks. It is applicable only to a word chunking strategy. This value cannot be higher than half the max_chunk_size value.

        Default value is 100.

      • sentence_overlap number

        The number of overlapping sentences for chunks. It is applicable only for a sentence chunking strategy. It can be either 1 or 0.

        Default value is 1.

      • separator_group string

        Only applicable to the recursive strategy and required when using it.

        Sets a predefined list of separators in the saved chunking settings based on the selected text type. Values can be markdown or plaintext.

        Using this parameter is an alternative to manually specifying a custom separators list.

      • separators array[string]

        Only applicable to the recursive strategy and required when using it.

        A list of strings used as possible split points when chunking text.

        Each string can be a plain string or a regular expression (regex) pattern. The system tries each separator in order to split the text, starting from the first item in the list.

        After splitting, it attempts to recombine smaller pieces into larger chunks that stay within the max_chunk_size limit, to reduce the total number of chunks generated.

      • strategy string

        The chunking strategy: sentence, word, none or recursive.

        • If strategy is set to recursive, you must also specify:

          • max_chunk_size
          • either separators orseparator_group

        Learn more about different chunking strategies in the linked documentation.

        Default value is sentence.

        External documentation
    • service string Required

      The service type

    • service_settings object Required

      Settings specific to the service

    • task_settings object

      Task settings specific to the service and task type

    • inference_id string Required

      The inference Id

    • task_type string Required

      The task type

      Values are text_embedding, rerank, or completion.

PUT /_inference/{task_type}/{cohere_inference_id}
PUT _inference/text_embedding/cohere-embeddings
{
    "service": "cohere",
    "service_settings": {
        "api_key": "Cohere-Api-key",
        "model_id": "embed-english-light-v3.0",
        "embedding_type": "byte"
    }
}
resp = client.inference.put(
    task_type="text_embedding",
    inference_id="cohere-embeddings",
    inference_config={
        "service": "cohere",
        "service_settings": {
            "api_key": "Cohere-Api-key",
            "model_id": "embed-english-light-v3.0",
            "embedding_type": "byte"
        }
    },
)
const response = await client.inference.put({
  task_type: "text_embedding",
  inference_id: "cohere-embeddings",
  inference_config: {
    service: "cohere",
    service_settings: {
      api_key: "Cohere-Api-key",
      model_id: "embed-english-light-v3.0",
      embedding_type: "byte",
    },
  },
});
response = client.inference.put(
  task_type: "text_embedding",
  inference_id: "cohere-embeddings",
  body: {
    "service": "cohere",
    "service_settings": {
      "api_key": "Cohere-Api-key",
      "model_id": "embed-english-light-v3.0",
      "embedding_type": "byte"
    }
  }
)
$resp = $client->inference()->put([
    "task_type" => "text_embedding",
    "inference_id" => "cohere-embeddings",
    "body" => [
        "service" => "cohere",
        "service_settings" => [
            "api_key" => "Cohere-Api-key",
            "model_id" => "embed-english-light-v3.0",
            "embedding_type" => "byte",
        ],
    ],
]);
curl -X PUT -H "Authorization: ApiKey $ELASTIC_API_KEY" -H "Content-Type: application/json" -d '{"service":"cohere","service_settings":{"api_key":"Cohere-Api-key","model_id":"embed-english-light-v3.0","embedding_type":"byte"}}' "$ELASTICSEARCH_URL/_inference/text_embedding/cohere-embeddings"
client.inference().put(p -> p
    .inferenceId("cohere-embeddings")
    .taskType(TaskType.TextEmbedding)
    .inferenceConfig(i -> i
        .service("cohere")
        .serviceSettings(JsonData.fromJson("{\"api_key\":\"Cohere-Api-key\",\"model_id\":\"embed-english-light-v3.0\",\"embedding_type\":\"byte\"}"))
    )
);
Run `PUT _inference/text_embedding/cohere-embeddings` to create an inference endpoint that performs a text embedding task.
{
    "service": "cohere",
    "service_settings": {
        "api_key": "Cohere-Api-key",
        "model_id": "embed-english-light-v3.0",
        "embedding_type": "byte"
    }
}
Run `PUT _inference/rerank/cohere-rerank` to create an inference endpoint that performs a rerank task.
{
    "service": "cohere",
    "service_settings": {
        "api_key": "Cohere-API-key",
        "model_id": "rerank-english-v3.0"
    },
    "task_settings": {
        "top_n": 10,
        "return_documents": true
    }
}
Run `PUT _inference/completion/cohere-completion` to create an inference endpoint that performs a completion task.
{
    "service": "cohere",
    "service_settings": {
        "api_key": "Cohere-API-key",
        "model_id": "command-a-03-2025"
    }
}
































Create a Llama inference endpoint Generally available

PUT /_inference/{task_type}/{llama_inference_id}

Create an inference endpoint to perform an inference task with the llama service.

Required authorization

  • Cluster privileges: manage_inference

Path parameters

  • task_type string

    The type of the inference task that the model will perform.

    Values are text_embedding, completion, or chat_completion.

  • llama_inference_id string Required

    The unique identifier of the inference endpoint.

Query parameters

  • timeout string

    Specifies the amount of time to wait for the inference endpoint to be created.

    Values are -1 or 0.

application/json

Body

  • chunking_settings object

    The chunking configuration object.

    Hide chunking_settings attributes Show chunking_settings attributes object
    • max_chunk_size number

      The maximum size of a chunk in words. This value cannot be higher than 300 or lower than 20 (for sentence strategy) or 10 (for word strategy).

      Default value is 250.

    • overlap number

      The number of overlapping words for chunks. It is applicable only to a word chunking strategy. This value cannot be higher than half the max_chunk_size value.

      Default value is 100.

    • sentence_overlap number

      The number of overlapping sentences for chunks. It is applicable only for a sentence chunking strategy. It can be either 1 or 0.

      Default value is 1.

    • separator_group string

      Only applicable to the recursive strategy and required when using it.

      Sets a predefined list of separators in the saved chunking settings based on the selected text type. Values can be markdown or plaintext.

      Using this parameter is an alternative to manually specifying a custom separators list.

    • separators array[string]

      Only applicable to the recursive strategy and required when using it.

      A list of strings used as possible split points when chunking text.

      Each string can be a plain string or a regular expression (regex) pattern. The system tries each separator in order to split the text, starting from the first item in the list.

      After splitting, it attempts to recombine smaller pieces into larger chunks that stay within the max_chunk_size limit, to reduce the total number of chunks generated.

    • strategy string

      The chunking strategy: sentence, word, none or recursive.

      • If strategy is set to recursive, you must also specify:

        • max_chunk_size
        • either separators orseparator_group

      Learn more about different chunking strategies in the linked documentation.

      Default value is sentence.

      External documentation
  • service string Required

    The type of service supported for the specified task type. In this case, llama.

    Value is llama.

  • service_settings object Required

    Settings used to install the inference model. These settings are specific to the llama service.

    Hide service_settings attributes Show service_settings attributes object
    • url string Required

      The URL endpoint of the Llama stack endpoint. URL must contain:

      • For text_embedding task - /v1/inference/embeddings.
      • For completion and chat_completion tasks - /v1/openai/v1/chat/completions.
    • model_id string Required

      The name of the model to use for the inference task. Refer to the Llama downloading models documentation for different ways of getting a list of available models and downloading them. Service has been tested and confirmed to be working with the following models:

      • For text_embedding task - all-MiniLM-L6-v2.
      • For completion and chat_completion tasks - llama3.2:3b.
      External documentation
    • max_input_tokens number

      For a text_embedding task, the maximum number of tokens per input before chunking occurs.

    • similarity string

      For a text_embedding task, the similarity measure. One of cosine, dot_product, l2_norm.

      Values are cosine, dot_product, or l2_norm.

    • rate_limit object

      This setting helps to minimize the number of rate limit errors returned from the Llama API. By default, the llama service sets the number of requests allowed per minute to 3000.

      Hide rate_limit attribute Show rate_limit attribute object
      • requests_per_minute number

        The number of requests allowed per minute. By default, the number of requests allowed per minute is set by each service as follows:

        • alibabacloud-ai-search service: 1000
        • anthropic service: 50
        • azureaistudio service: 240
        • azureopenai service and task type text_embedding: 1440
        • azureopenai service and task type completion: 120
        • cohere service: 10000
        • elastic service and task type chat_completion: 240
        • googleaistudio service: 360
        • googlevertexai service: 30000
        • hugging_face service: 3000
        • jinaai service: 2000
        • llama service: 3000
        • mistral service: 240
        • openai service and task type text_embedding: 3000
        • openai service and task type completion: 500
        • voyageai service: 2000
        • watsonxai service: 120

Responses

  • 200 application/json
    Hide response attributes Show response attributes object
    • chunking_settings object

      Chunking configuration object

      Hide chunking_settings attributes Show chunking_settings attributes object
      • max_chunk_size number

        The maximum size of a chunk in words. This value cannot be higher than 300 or lower than 20 (for sentence strategy) or 10 (for word strategy).

        Default value is 250.

      • overlap number

        The number of overlapping words for chunks. It is applicable only to a word chunking strategy. This value cannot be higher than half the max_chunk_size value.

        Default value is 100.

      • sentence_overlap number

        The number of overlapping sentences for chunks. It is applicable only for a sentence chunking strategy. It can be either 1 or 0.

        Default value is 1.

      • separator_group string

        Only applicable to the recursive strategy and required when using it.

        Sets a predefined list of separators in the saved chunking settings based on the selected text type. Values can be markdown or plaintext.

        Using this parameter is an alternative to manually specifying a custom separators list.

      • separators array[string]

        Only applicable to the recursive strategy and required when using it.

        A list of strings used as possible split points when chunking text.

        Each string can be a plain string or a regular expression (regex) pattern. The system tries each separator in order to split the text, starting from the first item in the list.

        After splitting, it attempts to recombine smaller pieces into larger chunks that stay within the max_chunk_size limit, to reduce the total number of chunks generated.

      • strategy string

        The chunking strategy: sentence, word, none or recursive.

        • If strategy is set to recursive, you must also specify:

          • max_chunk_size
          • either separators orseparator_group

        Learn more about different chunking strategies in the linked documentation.

        Default value is sentence.

        External documentation
    • service string Required

      The service type

    • service_settings object Required

      Settings specific to the service

    • task_settings object

      Task settings specific to the service and task type

    • inference_id string Required

      The inference Id

    • task_type string Required

      The task type

      Values are text_embedding, chat_completion, or completion.

PUT /_inference/{task_type}/{llama_inference_id}
PUT _inference/text_embedding/llama-text-embedding
{
  "service": "llama",
  "service_settings": {
    "url": "http://localhost:8321/v1/inference/embeddings",
    "dimensions": 384,
    "model_id": "all-MiniLM-L6-v2" 
  }
}
resp = client.inference.put(
    task_type="text_embedding",
    inference_id="llama-text-embedding",
    inference_config={
        "service": "llama",
        "service_settings": {
            "url": "http://localhost:8321/v1/inference/embeddings",
            "dimensions": 384,
            "model_id": "all-MiniLM-L6-v2"
        }
    },
)
const response = await client.inference.put({
  task_type: "text_embedding",
  inference_id: "llama-text-embedding",
  inference_config: {
    service: "llama",
    service_settings: {
      url: "http://localhost:8321/v1/inference/embeddings",
      dimensions: 384,
      model_id: "all-MiniLM-L6-v2",
    },
  },
});
response = client.inference.put(
  task_type: "text_embedding",
  inference_id: "llama-text-embedding",
  body: {
    "service": "llama",
    "service_settings": {
      "url": "http://localhost:8321/v1/inference/embeddings",
      "dimensions": 384,
      "model_id": "all-MiniLM-L6-v2"
    }
  }
)
$resp = $client->inference()->put([
    "task_type" => "text_embedding",
    "inference_id" => "llama-text-embedding",
    "body" => [
        "service" => "llama",
        "service_settings" => [
            "url" => "http://localhost:8321/v1/inference/embeddings",
            "dimensions" => 384,
            "model_id" => "all-MiniLM-L6-v2",
        ],
    ],
]);
curl -X PUT -H "Authorization: ApiKey $ELASTIC_API_KEY" -H "Content-Type: application/json" -d '{"service":"llama","service_settings":{"url":"http://localhost:8321/v1/inference/embeddings","dimensions":384,"model_id":"all-MiniLM-L6-v2"}}' "$ELASTICSEARCH_URL/_inference/text_embedding/llama-text-embedding"
Run `PUT _inference/text_embedding/llama-text-embedding` to create a Llama inference endpoint that performs a `text_embedding` task.
{
  "service": "llama",
  "service_settings": {
    "url": "http://localhost:8321/v1/inference/embeddings",
    "dimensions": 384,
    "model_id": "all-MiniLM-L6-v2" 
  }
}
Run `PUT _inference/completion/llama-completion` to create a Llama inference endpoint that performs a `completion` task.
{
  "service": "llama",
  "service_settings": {
    "url": "http://localhost:8321/v1/openai/v1/chat/completions",
    "model_id": "llama3.2:3b" 
  }
}
Run `PUT _inference/chat-completion/llama-chat-completion` to create a Llama inference endpoint that performs a `chat_completion` task.
{
  "service": "llama",
  "service_settings": {
    "url": "http://localhost:8321/v1/openai/v1/chat/completions",
    "model_id": "llama3.2:3b" 
  }
}




Create an OpenAI inference endpoint Generally available

PUT /_inference/{task_type}/{openai_inference_id}

Create an inference endpoint to perform an inference task with the openai service or openai compatible APIs.

Required authorization

  • Cluster privileges: manage_inference

Path parameters

  • task_type string

    The type of the inference task that the model will perform. NOTE: The chat_completion task type only supports streaming and only through the _stream API.

    Values are chat_completion, completion, or text_embedding.

  • openai_inference_id string Required

    The unique identifier of the inference endpoint.

Query parameters

  • timeout string

    Specifies the amount of time to wait for the inference endpoint to be created.

    Values are -1 or 0.

application/json

Body

  • chunking_settings object

    The chunking configuration object.

    Hide chunking_settings attributes Show chunking_settings attributes object
    • max_chunk_size number

      The maximum size of a chunk in words. This value cannot be higher than 300 or lower than 20 (for sentence strategy) or 10 (for word strategy).

      Default value is 250.

    • overlap number

      The number of overlapping words for chunks. It is applicable only to a word chunking strategy. This value cannot be higher than half the max_chunk_size value.

      Default value is 100.

    • sentence_overlap number

      The number of overlapping sentences for chunks. It is applicable only for a sentence chunking strategy. It can be either 1 or 0.

      Default value is 1.

    • separator_group string

      Only applicable to the recursive strategy and required when using it.

      Sets a predefined list of separators in the saved chunking settings based on the selected text type. Values can be markdown or plaintext.

      Using this parameter is an alternative to manually specifying a custom separators list.

    • separators array[string]

      Only applicable to the recursive strategy and required when using it.

      A list of strings used as possible split points when chunking text.

      Each string can be a plain string or a regular expression (regex) pattern. The system tries each separator in order to split the text, starting from the first item in the list.

      After splitting, it attempts to recombine smaller pieces into larger chunks that stay within the max_chunk_size limit, to reduce the total number of chunks generated.

    • strategy string

      The chunking strategy: sentence, word, none or recursive.

      • If strategy is set to recursive, you must also specify:

        • max_chunk_size
        • either separators orseparator_group

      Learn more about different chunking strategies in the linked documentation.

      Default value is sentence.

      External documentation
  • service string Required

    The type of service supported for the specified task type. In this case, openai.

    Value is openai.

  • service_settings object Required

    Settings used to install the inference model. These settings are specific to the openai service.

    Hide service_settings attributes Show service_settings attributes object
    • api_key string Required

      A valid API key of your OpenAI account. You can find your OpenAI API keys in your OpenAI account under the API keys section.

      IMPORTANT: You need to provide the API key only once, during the inference model creation. The get inference endpoint API does not retrieve your API key. After creating the inference model, you cannot change the associated API key. If you want to use a different API key, delete the inference model and recreate it with the same name and the updated API key.

      External documentation
    • dimensions number

      The number of dimensions the resulting output embeddings should have. It is supported only in text-embedding-3 and later models. If it is not set, the OpenAI defined default for the model is used.

    • model_id string Required

      The name of the model to use for the inference task. Refer to the OpenAI documentation for the list of available text embedding models.

      External documentation
    • organization_id string

      The unique identifier for your organization. You can find the Organization ID in your OpenAI account under Settings > Organizations.

    • rate_limit object

      This setting helps to minimize the number of rate limit errors returned from OpenAI. The openai service sets a default number of requests allowed per minute depending on the task type. For text_embedding, it is set to 3000. For completion, it is set to 500.

      Hide rate_limit attribute Show rate_limit attribute object
      • requests_per_minute number

        The number of requests allowed per minute. By default, the number of requests allowed per minute is set by each service as follows:

        • alibabacloud-ai-search service: 1000
        • anthropic service: 50
        • azureaistudio service: 240
        • azureopenai service and task type text_embedding: 1440
        • azureopenai service and task type completion: 120
        • cohere service: 10000
        • elastic service and task type chat_completion: 240
        • googleaistudio service: 360
        • googlevertexai service: 30000
        • hugging_face service: 3000
        • jinaai service: 2000
        • llama service: 3000
        • mistral service: 240
        • openai service and task type text_embedding: 3000
        • openai service and task type completion: 500
        • voyageai service: 2000
        • watsonxai service: 120
    • url string

      The URL endpoint to use for the requests. It can be changed for testing purposes.

      Default value is https://api.openai.com/v1/embeddings..

  • task_settings object

    Settings to configure the inference task. These settings are specific to the task type you specified.

    Hide task_settings attribute Show task_settings attribute object
    • user string

      For a completion or text_embedding task, specify the user issuing the request. This information can be used for abuse detection.

Responses

  • 200 application/json
    Hide response attributes Show response attributes object
    • chunking_settings object

      Chunking configuration object

      Hide chunking_settings attributes Show chunking_settings attributes object
      • max_chunk_size number

        The maximum size of a chunk in words. This value cannot be higher than 300 or lower than 20 (for sentence strategy) or 10 (for word strategy).

        Default value is 250.

      • overlap number

        The number of overlapping words for chunks. It is applicable only to a word chunking strategy. This value cannot be higher than half the max_chunk_size value.

        Default value is 100.

      • sentence_overlap number

        The number of overlapping sentences for chunks. It is applicable only for a sentence chunking strategy. It can be either 1 or 0.

        Default value is 1.

      • separator_group string

        Only applicable to the recursive strategy and required when using it.

        Sets a predefined list of separators in the saved chunking settings based on the selected text type. Values can be markdown or plaintext.

        Using this parameter is an alternative to manually specifying a custom separators list.

      • separators array[string]

        Only applicable to the recursive strategy and required when using it.

        A list of strings used as possible split points when chunking text.

        Each string can be a plain string or a regular expression (regex) pattern. The system tries each separator in order to split the text, starting from the first item in the list.

        After splitting, it attempts to recombine smaller pieces into larger chunks that stay within the max_chunk_size limit, to reduce the total number of chunks generated.

      • strategy string

        The chunking strategy: sentence, word, none or recursive.

        • If strategy is set to recursive, you must also specify:

          • max_chunk_size
          • either separators orseparator_group

        Learn more about different chunking strategies in the linked documentation.

        Default value is sentence.

        External documentation
    • service string Required

      The service type

    • service_settings object Required

      Settings specific to the service

    • task_settings object

      Task settings specific to the service and task type

    • inference_id string Required

      The inference Id

    • task_type string Required

      The task type

      Values are text_embedding, chat_completion, or completion.

PUT /_inference/{task_type}/{openai_inference_id}
PUT _inference/text_embedding/openai-embeddings
{
    "service": "openai",
    "service_settings": {
        "api_key": "OpenAI-API-Key",
        "model_id": "text-embedding-3-small",
        "dimensions": 128
    }
}
resp = client.inference.put(
    task_type="text_embedding",
    inference_id="openai-embeddings",
    inference_config={
        "service": "openai",
        "service_settings": {
            "api_key": "OpenAI-API-Key",
            "model_id": "text-embedding-3-small",
            "dimensions": 128
        }
    },
)
const response = await client.inference.put({
  task_type: "text_embedding",
  inference_id: "openai-embeddings",
  inference_config: {
    service: "openai",
    service_settings: {
      api_key: "OpenAI-API-Key",
      model_id: "text-embedding-3-small",
      dimensions: 128,
    },
  },
});
response = client.inference.put(
  task_type: "text_embedding",
  inference_id: "openai-embeddings",
  body: {
    "service": "openai",
    "service_settings": {
      "api_key": "OpenAI-API-Key",
      "model_id": "text-embedding-3-small",
      "dimensions": 128
    }
  }
)
$resp = $client->inference()->put([
    "task_type" => "text_embedding",
    "inference_id" => "openai-embeddings",
    "body" => [
        "service" => "openai",
        "service_settings" => [
            "api_key" => "OpenAI-API-Key",
            "model_id" => "text-embedding-3-small",
            "dimensions" => 128,
        ],
    ],
]);
curl -X PUT -H "Authorization: ApiKey $ELASTIC_API_KEY" -H "Content-Type: application/json" -d '{"service":"openai","service_settings":{"api_key":"OpenAI-API-Key","model_id":"text-embedding-3-small","dimensions":128}}' "$ELASTICSEARCH_URL/_inference/text_embedding/openai-embeddings"
client.inference().put(p -> p
    .inferenceId("openai-embeddings")
    .taskType(TaskType.TextEmbedding)
    .inferenceConfig(i -> i
        .service("openai")
        .serviceSettings(JsonData.fromJson("{\"api_key\":\"OpenAI-API-Key\",\"model_id\":\"text-embedding-3-small\",\"dimensions\":128}"))
    )
);
Request examples
Run `PUT _inference/text_embedding/openai-embeddings` to create an inference endpoint that performs a `text_embedding` task. The embeddings created by requests to this endpoint will have 128 dimensions.
{
    "service": "openai",
    "service_settings": {
        "api_key": "OpenAI-API-Key",
        "model_id": "text-embedding-3-small",
        "dimensions": 128
    }
}
Run `PUT _inference/completion/amazon_bedrock_completion` to create an inference endpoint to perform a completion task.
{
    "service": "amazonbedrock",
    "service_settings": {
        "access_key": "AWS-access-key",
        "secret_key": "AWS-secret-key",
        "region": "us-east-1",
        "provider": "amazontitan",
        "model": "amazon.titan-text-premier-v1:0"
    }
}
















Perform text embedding inference on the service Generally available

POST /_inference/text_embedding/{inference_id}

Path parameters

  • inference_id string Required

    The inference Id

Query parameters

  • timeout string

    Specifies the amount of time to wait for the inference request to complete.

    Values are -1 or 0.

application/json

Body

Responses

  • 200 application/json
    Hide response attributes Show response attributes object
    • text_embedding_bytes array[object]

      The text embedding result object for byte representation

      Hide text_embedding_bytes attribute Show text_embedding_bytes attribute object
      • embedding array[number] Required

        Text Embedding results containing bytes are represented as Dense Vectors of bytes.

    • text_embedding_bits array[object]

      The text embedding result object for byte representation

      Hide text_embedding_bits attribute Show text_embedding_bits attribute object
      • embedding array[number] Required

        Text Embedding results containing bytes are represented as Dense Vectors of bytes.

    • text_embedding array[object]

      The text embedding result object

      Hide text_embedding attribute Show text_embedding attribute object
      • embedding array[number] Required

        Text Embedding results are represented as Dense Vectors of floats.

POST /_inference/text_embedding/{inference_id}
POST _inference/text_embedding/my-cohere-endpoint
{
  "input": "The sky above the port was the color of television tuned to a dead channel.",
  "task_settings": {
    "input_type": "ingest"
  }
}
resp = client.inference.text_embedding(
    inference_id="my-cohere-endpoint",
    input="The sky above the port was the color of television tuned to a dead channel.",
    task_settings={
        "input_type": "ingest"
    },
)
const response = await client.inference.textEmbedding({
  inference_id: "my-cohere-endpoint",
  input:
    "The sky above the port was the color of television tuned to a dead channel.",
  task_settings: {
    input_type: "ingest",
  },
});
response = client.inference.text_embedding(
  inference_id: "my-cohere-endpoint",
  body: {
    "input": "The sky above the port was the color of television tuned to a dead channel.",
    "task_settings": {
      "input_type": "ingest"
    }
  }
)
$resp = $client->inference()->textEmbedding([
    "inference_id" => "my-cohere-endpoint",
    "body" => [
        "input" => "The sky above the port was the color of television tuned to a dead channel.",
        "task_settings" => [
            "input_type" => "ingest",
        ],
    ],
]);
curl -X POST -H "Authorization: ApiKey $ELASTIC_API_KEY" -H "Content-Type: application/json" -d '{"input":"The sky above the port was the color of television tuned to a dead channel.","task_settings":{"input_type":"ingest"}}' "$ELASTICSEARCH_URL/_inference/text_embedding/my-cohere-endpoint"
client.inference().textEmbedding(t -> t
    .inferenceId("my-cohere-endpoint")
    .input("The sky above the port was the color of television tuned to a dead channel.")
    .taskSettings(JsonData.fromJson("{\"input_type\":\"ingest\"}"))
);
Request example
Run `POST _inference/text_embedding/my-cohere-endpoint` to perform text embedding on the example sentence using the Cohere integration,
{
  "input": "The sky above the port was the color of television tuned to a dead channel.",
  "task_settings": {
    "input_type": "ingest"
  }
}
Response examples (200)
An abbreviated response from `POST _inference/text_embedding/my-cohere-endpoint`.
{
  "text_embedding": [
    {
      "embedding": [
        {
          0.018569946,
          -0.036895752,
          0.01486969,
          -0.0045204163,
          -0.04385376,
          0.0075950623,
          0.04260254,
          -0.004005432,
          0.007865906,
          0.030792236,
          -0.050476074,
          0.011795044,
          -0.011642456,
          -0.010070801
        }
      ]
    }
  ]
}




































Create or update a Logstash pipeline Generally available

PUT /_logstash/pipeline/{id}

Create a pipeline that is used for Logstash Central Management. If the specified pipeline exists, it is replaced.

Required authorization

  • Cluster privileges: manage_logstash_pipelines
External documentation

Path parameters

  • id string Required

    An identifier for the pipeline.

application/json

Body Required

  • description string Required

    A description of the pipeline. This description is not used by Elasticsearch or Logstash.

  • last_modified string | number

    The date the pipeline was last updated. It must be in the yyyy-MM-dd'T'HH:mm:ss.SSSZZ strict_date_time format.

    One of:

    The date the pipeline was last updated. It must be in the yyyy-MM-dd'T'HH:mm:ss.SSSZZ strict_date_time format.

  • pipeline string Required

    The configuration for the pipeline.

    External documentation
  • pipeline_metadata object Required

    Optional metadata about the pipeline, which can have any contents. This metadata is not generated or used by Elasticsearch or Logstash.

    Hide pipeline_metadata attributes Show pipeline_metadata attributes object
    • type string Required
    • version string Required
  • pipeline_settings object Required

    Settings for the pipeline. It supports only flat keys in dot notation.

    Hide pipeline_settings attributes Show pipeline_settings attributes object
    • pipeline.workers number Required

      The number of workers that will, in parallel, execute the filter and output stages of the pipeline.

    • pipeline.batch.size number Required

      The maximum number of events an individual worker thread will collect from inputs before attempting to execute its filters and outputs.

    • pipeline.batch.delay number Required

      When creating pipeline event batches, how long in milliseconds to wait for each event before dispatching an undersized batch to pipeline workers.

    • queue.type string Required

      The internal queuing model to use for event buffering.

    • queue.max_bytes string Required

      The total capacity of the queue (queue.type: persisted) in number of bytes.

    • queue.checkpoint.writes number Required

      The maximum number of written events before forcing a checkpoint when persistent queues are enabled (queue.type: persisted).

  • username string Required

    The user who last updated the pipeline.

Responses

  • 200 application/json
PUT _logstash/pipeline/my_pipeline
{
  "description": "Sample pipeline for illustration purposes",
  "last_modified": "2021-01-02T02:50:51.250Z",
  "pipeline_metadata": {
    "type": "logstash_pipeline",
    "version": 1
  },
  "username": "elastic",
  "pipeline": "input {}\\n filter { grok {} }\\n output {}",
  "pipeline_settings": {
    "pipeline.workers": 1,
    "pipeline.batch.size": 125,
    "pipeline.batch.delay": 50,
    "queue.type": "memory",
    "queue.max_bytes": "1gb",
    "queue.checkpoint.writes": 1024
  }
}
resp = client.logstash.put_pipeline(
    id="my_pipeline",
    pipeline={
        "description": "Sample pipeline for illustration purposes",
        "last_modified": "2021-01-02T02:50:51.250Z",
        "pipeline_metadata": {
            "type": "logstash_pipeline",
            "version": 1
        },
        "username": "elastic",
        "pipeline": "input {}\\n filter { grok {} }\\n output {}",
        "pipeline_settings": {
            "pipeline.workers": 1,
            "pipeline.batch.size": 125,
            "pipeline.batch.delay": 50,
            "queue.type": "memory",
            "queue.max_bytes": "1gb",
            "queue.checkpoint.writes": 1024
        }
    },
)
const response = await client.logstash.putPipeline({
  id: "my_pipeline",
  pipeline: {
    description: "Sample pipeline for illustration purposes",
    last_modified: "2021-01-02T02:50:51.250Z",
    pipeline_metadata: {
      type: "logstash_pipeline",
      version: 1,
    },
    username: "elastic",
    pipeline: "input {}\\n filter { grok {} }\\n output {}",
    pipeline_settings: {
      "pipeline.workers": 1,
      "pipeline.batch.size": 125,
      "pipeline.batch.delay": 50,
      "queue.type": "memory",
      "queue.max_bytes": "1gb",
      "queue.checkpoint.writes": 1024,
    },
  },
});
response = client.logstash.put_pipeline(
  id: "my_pipeline",
  body: {
    "description": "Sample pipeline for illustration purposes",
    "last_modified": "2021-01-02T02:50:51.250Z",
    "pipeline_metadata": {
      "type": "logstash_pipeline",
      "version": 1
    },
    "username": "elastic",
    "pipeline": "input {}\\n filter { grok {} }\\n output {}",
    "pipeline_settings": {
      "pipeline.workers": 1,
      "pipeline.batch.size": 125,
      "pipeline.batch.delay": 50,
      "queue.type": "memory",
      "queue.max_bytes": "1gb",
      "queue.checkpoint.writes": 1024
    }
  }
)
$resp = $client->logstash()->putPipeline([
    "id" => "my_pipeline",
    "body" => [
        "description" => "Sample pipeline for illustration purposes",
        "last_modified" => "2021-01-02T02:50:51.250Z",
        "pipeline_metadata" => [
            "type" => "logstash_pipeline",
            "version" => 1,
        ],
        "username" => "elastic",
        "pipeline" => "input {}\\n filter { grok {} }\\n output {}",
        "pipeline_settings" => [
            "pipeline.workers" => 1,
            "pipeline.batch.size" => 125,
            "pipeline.batch.delay" => 50,
            "queue.type" => "memory",
            "queue.max_bytes" => "1gb",
            "queue.checkpoint.writes" => 1024,
        ],
    ],
]);
curl -X PUT -H "Authorization: ApiKey $ELASTIC_API_KEY" -H "Content-Type: application/json" -d '{"description":"Sample pipeline for illustration purposes","last_modified":"2021-01-02T02:50:51.250Z","pipeline_metadata":{"type":"logstash_pipeline","version":1},"username":"elastic","pipeline":"input {}\\n filter { grok {} }\\n output {}","pipeline_settings":{"pipeline.workers":1,"pipeline.batch.size":125,"pipeline.batch.delay":50,"queue.type":"memory","queue.max_bytes":"1gb","queue.checkpoint.writes":1024}}' "$ELASTICSEARCH_URL/_logstash/pipeline/my_pipeline"
client.logstash().putPipeline(p -> p
    .id("my_pipeline")
    .pipeline(pi -> pi
        .description("Sample pipeline for illustration purposes")
        .lastModified(DateTime.of("2021-01-02T02:50:51.250Z"))
        .pipeline("input {}\n filter { grok {} }\n output {}")
        .pipelineMetadata(pip -> pip
            .type("logstash_pipeline")
            .version("1")
        )
        .pipelineSettings(pip -> pip
            .pipelineWorkers(1)
            .pipelineBatchSize(125)
            .pipelineBatchDelay(50)
            .queueType("memory")
            .queueMaxBytes("1gb")
            .queueCheckpointWrites(1024)
        )
        .username("elastic")
    )
);
Request example
Run `PUT _logstash/pipeline/my_pipeline` to create a pipeline.
{
  "description": "Sample pipeline for illustration purposes",
  "last_modified": "2021-01-02T02:50:51.250Z",
  "pipeline_metadata": {
    "type": "logstash_pipeline",
    "version": 1
  },
  "username": "elastic",
  "pipeline": "input {}\\n filter { grok {} }\\n output {}",
  "pipeline_settings": {
    "pipeline.workers": 1,
    "pipeline.batch.size": 125,
    "pipeline.batch.delay": 50,
    "queue.type": "memory",
    "queue.max_bytes": "1gb",
    "queue.checkpoint.writes": 1024
  }
}













































Get filters Generally available

GET /_ml/filters/{filter_id}

All methods and paths for this operation:

GET /_ml/filters

GET /_ml/filters/{filter_id}

You can get a single filter or all filters.

Required authorization

  • Cluster privileges: manage_ml

Path parameters

  • filter_id string | array[string] Required

    A string that uniquely identifies a filter.

Query parameters

  • from number

    Skips the specified number of filters.

  • size number

    Specifies the maximum number of filters to obtain.

Responses

  • 200 application/json
    Hide response attributes Show response attributes object
    • count number Required
    • filters array[object] Required
      Hide filters attributes Show filters attributes object
      • description string

        A description of the filter.

      • filter_id string Required

        A string that uniquely identifies a filter.

      • items array[string] Required

        An array of strings which is the filter item list.

GET _ml/filters/safe_domains
resp = client.ml.get_filters(
    filter_id="safe_domains",
)
const response = await client.ml.getFilters({
  filter_id: "safe_domains",
});
response = client.ml.get_filters(
  filter_id: "safe_domains"
)
$resp = $client->ml()->getFilters([
    "filter_id" => "safe_domains",
]);
curl -X GET -H "Authorization: ApiKey $ELASTIC_API_KEY" "$ELASTICSEARCH_URL/_ml/filters/safe_domains"
client.ml().getFilters(g -> g
    .filterId("safe_domains")
);




















Estimate job model memory usage Generally available

POST /_ml/anomaly_detectors/_estimate_model_memory

Make an estimation of the memory usage for an anomaly detection job model. The estimate is based on analysis configuration details for the job and cardinality estimates for the fields it references.

Required authorization

  • Cluster privileges: manage_ml
application/json

Body Required

  • analysis_config object

    For a list of the properties that you can specify in the analysis_config component of the body of this API.

    Hide analysis_config attributes Show analysis_config attributes object
    • bucket_span string

      The size of the interval that the analysis is aggregated into, typically between 5m and 1h. This value should be either a whole number of days or equate to a whole number of buckets in one day. If the anomaly detection job uses a datafeed with aggregations, this value must also be divisible by the interval of the date histogram aggregation.

    • categorization_analyzer string | object

      If categorization_field_name is specified, you can also define the analyzer that is used to interpret the categorization field. This property cannot be used at the same time as categorization_filters. The categorization analyzer specifies how the categorization_field is interpreted by the categorization process. The categorization_analyzer field can be specified either as a string or as an object. If it is a string, it must refer to a built-in analyzer or one added by another plugin.

      One of:

      If categorization_field_name is specified, you can also define the analyzer that is used to interpret the categorization field. This property cannot be used at the same time as categorization_filters. The categorization analyzer specifies how the categorization_field is interpreted by the categorization process. The categorization_analyzer field can be specified either as a string or as an object. If it is a string, it must refer to a built-in analyzer or one added by another plugin.

    • categorization_field_name string

      If this property is specified, the values of the specified field will be categorized. The resulting categories must be used in a detector by setting by_field_name, over_field_name, or partition_field_name to the keyword mlcategory.

    • categorization_filters array[string]

      If categorization_field_name is specified, you can also define optional filters. This property expects an array of regular expressions. The expressions are used to filter out matching sequences from the categorization field values. You can use this functionality to fine tune the categorization by excluding sequences from consideration when categories are defined. For example, you can exclude SQL statements that appear in your log files. This property cannot be used at the same time as categorization_analyzer. If you only want to define simple regular expression filters that are applied prior to tokenization, setting this property is the easiest method. If you also want to customize the tokenizer or post-tokenization filtering, use the categorization_analyzer property instead and include the filters as pattern_replace character filters. The effect is exactly the same.

    • detectors array[object] Required

      Detector configuration objects specify which data fields a job analyzes. They also specify which analytical functions are used. You can specify multiple detectors for a job. If the detectors array does not contain at least one detector, no analysis can occur and an error is returned.

      Hide detectors attributes Show detectors attributes object
      • by_field_name string

        The field used to split the data. In particular, this property is used for analyzing the splits with respect to their own history. It is used for finding unusual values in the context of the split.

      • custom_rules array[object]

        Custom rules enable you to customize the way detectors operate. For example, a rule may dictate conditions under which results should be skipped. Kibana refers to custom rules as job rules.

        Hide custom_rules attributes Show custom_rules attributes object
        • actions array[string]

          The set of actions to be triggered when the rule applies. If more than one action is specified the effects of all actions are combined.

          Supported values include:

          • skip_result: The result will not be created. Unless you also specify skip_model_update, the model will be updated as usual with the corresponding series value.
          • skip_model_update: The value for that series will not be used to update the model. Unless you also specify skip_result, the results will be created as usual. This action is suitable when certain values are expected to be consistently anomalous and they affect the model in a way that negatively impacts the rest of the results.

          Values are skip_result or skip_model_update. Default value is ["skip_result"].

        • conditions array[object]

          An array of numeric conditions when the rule applies. A rule must either have a non-empty scope or at least one condition. Multiple conditions are combined together with a logical AND.

        • scope object

          A scope of series where the rule applies. A rule must either have a non-empty scope or at least one condition. By default, the scope includes all series. Scoping is allowed for any of the fields that are also specified in by_field_name, over_field_name, or partition_field_name.

      • detector_description string

        A description of the detector.

      • detector_index number

        A unique identifier for the detector. This identifier is based on the order of the detectors in the analysis_config, starting at zero. If you specify a value for this property, it is ignored.

      • exclude_frequent string

        If set, frequent entities are excluded from influencing the anomaly results. Entities can be considered frequent over time or frequent in a population. If you are working with both over and by fields, you can set exclude_frequent to all for both fields, or to by or over for those specific fields.

        Values are all, none, by, or over.

      • field_name string

        The field that the detector uses in the function. If you use an event rate function such as count or rare, do not specify this field. The field_name cannot contain double quotes or backslashes.

      • function string

        The analysis function that is used. For example, count, rare, mean, min, max, or sum.

      • over_field_name string

        The field used to split the data. In particular, this property is used for analyzing the splits with respect to the history of all splits. It is used for finding unusual values in the population of all splits.

      • partition_field_name string

        The field used to segment the analysis. When you use this property, you have completely independent baselines for each value of this field.

      • use_null boolean

        Defines whether a new series is used as the null series when there is no value for the by or partition fields.

        Default value is false.

    • influencers array[string]

      A comma separated list of influencer field names. Typically these can be the by, over, or partition fields that are used in the detector configuration. You might also want to use a field name that is not specifically named in a detector, but is available as part of the input data. When you use multiple detectors, the use of influencers is recommended as it aggregates results for each influencer entity.

    • latency string

      The size of the window in which to expect data that is out of time order. If you specify a non-zero value, it must be greater than or equal to one second. NOTE: Latency is applicable only when you send data by using the post data API.

    • model_prune_window string

      Advanced configuration option. Affects the pruning of models that have not been updated for the given time duration. The value must be set to a multiple of the bucket_span. If set too low, important information may be removed from the model. For jobs created in 8.1 and later, the default value is the greater of 30d or 20 times bucket_span.

    • multivariate_by_fields boolean

      This functionality is reserved for internal use. It is not supported for use in customer environments and is not subject to the support SLA of official GA features. If set to true, the analysis will automatically find correlations between metrics for a given by field value and report anomalies when those correlations cease to hold. For example, suppose CPU and memory usage on host A is usually highly correlated with the same metrics on host B. Perhaps this correlation occurs because they are running a load-balanced application. If you enable this property, anomalies will be reported when, for example, CPU usage on host A is high and the value of CPU usage on host B is low. That is to say, you’ll see an anomaly when the CPU of host A is unusual given the CPU of host B. To use the multivariate_by_fields property, you must also specify by_field_name in your detector.

    • per_partition_categorization object

      Settings related to how categorization interacts with partition fields.

      Hide per_partition_categorization attributes Show per_partition_categorization attributes object
      • enabled boolean

        To enable this setting, you must also set the partition_field_name property to the same value in every detector that uses the keyword mlcategory. Otherwise, job creation fails.

      • stop_on_warn boolean

        This setting can be set to true only if per-partition categorization is enabled. If true, both categorization and subsequent anomaly detection stops for partitions where the categorization status changes to warn. This setting makes it viable to have a job where it is expected that categorization works well for some partitions but not others; you do not pay the cost of bad categorization forever in the partitions where it works badly.

    • summary_count_field_name string

      If this property is specified, the data that is fed to the job is expected to be pre-summarized. This property value is the name of the field that contains the count of raw data points that have been summarized. The same summary_count_field_name applies to all detectors in the job. NOTE: The summary_count_field_name property cannot be used with the metric function.

  • max_bucket_cardinality object

    Estimates of the highest cardinality in a single bucket that is observed for influencer fields over the time period that the job analyzes data. To produce a good answer, values must be provided for all influencer fields. Providing values for fields that are not listed as influencers has no effect on the estimation.

    Hide max_bucket_cardinality attribute Show max_bucket_cardinality attribute object
    • * number Additional properties
  • overall_cardinality object

    Estimates of the cardinality that is observed for fields over the whole time period that the job analyzes data. To produce a good answer, values must be provided for fields referenced in the by_field_name, over_field_name and partition_field_name of any detectors. Providing values for other fields has no effect on the estimation. It can be omitted from the request if no detectors have a by_field_name, over_field_name or partition_field_name.

    Hide overall_cardinality attribute Show overall_cardinality attribute object
    • * number Additional properties

Responses

  • 200 application/json
    Hide response attribute Show response attribute object
    • model_memory_estimate string Required
POST /_ml/anomaly_detectors/_estimate_model_memory
POST _ml/anomaly_detectors/_estimate_model_memory
{
  "analysis_config": {
    "bucket_span": "5m",
    "detectors": [
      {
        "function": "sum",
        "field_name": "bytes",
        "by_field_name": "status",
        "partition_field_name": "app"
      }
    ],
    "influencers": [
      "source_ip",
      "dest_ip"
    ]
  },
  "overall_cardinality": {
    "status": 10,
    "app": 50
  },
  "max_bucket_cardinality": {
    "source_ip": 300,
    "dest_ip": 30
  }
}
resp = client.ml.estimate_model_memory(
    analysis_config={
        "bucket_span": "5m",
        "detectors": [
            {
                "function": "sum",
                "field_name": "bytes",
                "by_field_name": "status",
                "partition_field_name": "app"
            }
        ],
        "influencers": [
            "source_ip",
            "dest_ip"
        ]
    },
    overall_cardinality={
        "status": 10,
        "app": 50
    },
    max_bucket_cardinality={
        "source_ip": 300,
        "dest_ip": 30
    },
)
const response = await client.ml.estimateModelMemory({
  analysis_config: {
    bucket_span: "5m",
    detectors: [
      {
        function: "sum",
        field_name: "bytes",
        by_field_name: "status",
        partition_field_name: "app",
      },
    ],
    influencers: ["source_ip", "dest_ip"],
  },
  overall_cardinality: {
    status: 10,
    app: 50,
  },
  max_bucket_cardinality: {
    source_ip: 300,
    dest_ip: 30,
  },
});
response = client.ml.estimate_model_memory(
  body: {
    "analysis_config": {
      "bucket_span": "5m",
      "detectors": [
        {
          "function": "sum",
          "field_name": "bytes",
          "by_field_name": "status",
          "partition_field_name": "app"
        }
      ],
      "influencers": [
        "source_ip",
        "dest_ip"
      ]
    },
    "overall_cardinality": {
      "status": 10,
      "app": 50
    },
    "max_bucket_cardinality": {
      "source_ip": 300,
      "dest_ip": 30
    }
  }
)
$resp = $client->ml()->estimateModelMemory([
    "body" => [
        "analysis_config" => [
            "bucket_span" => "5m",
            "detectors" => array(
                [
                    "function" => "sum",
                    "field_name" => "bytes",
                    "by_field_name" => "status",
                    "partition_field_name" => "app",
                ],
            ),
            "influencers" => array(
                "source_ip",
                "dest_ip",
            ),
        ],
        "overall_cardinality" => [
            "status" => 10,
            "app" => 50,
        ],
        "max_bucket_cardinality" => [
            "source_ip" => 300,
            "dest_ip" => 30,
        ],
    ],
]);
curl -X POST -H "Authorization: ApiKey $ELASTIC_API_KEY" -H "Content-Type: application/json" -d '{"analysis_config":{"bucket_span":"5m","detectors":[{"function":"sum","field_name":"bytes","by_field_name":"status","partition_field_name":"app"}],"influencers":["source_ip","dest_ip"]},"overall_cardinality":{"status":10,"app":50},"max_bucket_cardinality":{"source_ip":300,"dest_ip":30}}' "$ELASTICSEARCH_URL/_ml/anomaly_detectors/_estimate_model_memory"
client.ml().estimateModelMemory(e -> e
    .analysisConfig(a -> a
        .bucketSpan(b -> b
            .time("5m")
        )
        .detectors(d -> d
            .byFieldName("status")
            .fieldName("bytes")
            .function("sum")
            .partitionFieldName("app")
        )
        .influencers(List.of("source_ip","dest_ip"))
    )
    .maxBucketCardinality(Map.of("dest_ip", 30L,"source_ip", 300L))
    .overallCardinality(Map.of("app", 50L,"status", 10L))
);
Request example
Run `POST _ml/anomaly_detectors/_estimate_model_memory` to estimate the model memory limit based on the analysis configuration details provided in the request body.
{
  "analysis_config": {
    "bucket_span": "5m",
    "detectors": [
      {
        "function": "sum",
        "field_name": "bytes",
        "by_field_name": "status",
        "partition_field_name": "app"
      }
    ],
    "influencers": [
      "source_ip",
      "dest_ip"
    ]
  },
  "overall_cardinality": {
    "status": 10,
    "app": 50
  },
  "max_bucket_cardinality": {
    "source_ip": 300,
    "dest_ip": 30
  }
}
Response examples (200)
A successful response from `POST _ml/anomaly_detectors/_estimate_model_memory`.
{
  "model_memory_estimate": "21mb"
}
















































Update a filter Generally available

POST /_ml/filters/{filter_id}/_update

Updates the description of a filter, adds items, or removes items from the list.

Required authorization

  • Cluster privileges: manage_ml

Path parameters

  • filter_id string Required

    A string that uniquely identifies a filter.

application/json

Body Required

  • add_items array[string]

    The items to add to the filter.

  • description string

    A description for the filter.

  • remove_items array[string]

    The items to remove from the filter.

Responses

  • 200 application/json
    Hide response attributes Show response attributes object
    • description string Required
    • filter_id string Required
    • items array[string] Required
POST /_ml/filters/{filter_id}/_update
POST _ml/filters/safe_domains/_update
{
  "description": "Updated list of domains",
  "add_items": ["*.myorg.com"],
  "remove_items": ["wikipedia.org"]
}
resp = client.ml.update_filter(
    filter_id="safe_domains",
    description="Updated list of domains",
    add_items=[
        "*.myorg.com"
    ],
    remove_items=[
        "wikipedia.org"
    ],
)
const response = await client.ml.updateFilter({
  filter_id: "safe_domains",
  description: "Updated list of domains",
  add_items: ["*.myorg.com"],
  remove_items: ["wikipedia.org"],
});
response = client.ml.update_filter(
  filter_id: "safe_domains",
  body: {
    "description": "Updated list of domains",
    "add_items": [
      "*.myorg.com"
    ],
    "remove_items": [
      "wikipedia.org"
    ]
  }
)
$resp = $client->ml()->updateFilter([
    "filter_id" => "safe_domains",
    "body" => [
        "description" => "Updated list of domains",
        "add_items" => array(
            "*.myorg.com",
        ),
        "remove_items" => array(
            "wikipedia.org",
        ),
    ],
]);
curl -X POST -H "Authorization: ApiKey $ELASTIC_API_KEY" -H "Content-Type: application/json" -d '{"description":"Updated list of domains","add_items":["*.myorg.com"],"remove_items":["wikipedia.org"]}' "$ELASTICSEARCH_URL/_ml/filters/safe_domains/_update"
client.ml().updateFilter(u -> u
    .addItems("*.myorg.com")
    .description("Updated list of domains")
    .filterId("safe_domains")
    .removeItems("wikipedia.org")
);
Request example
An example body for a `POST _ml/filters/safe_domains/_update` request.
{
  "description": "Updated list of domains",
  "add_items": ["*.myorg.com"],
  "remove_items": ["wikipedia.org"]
}

























Preview features used by data frame analytics Generally available

POST /_ml/data_frame/analytics/{id}/_preview

All methods and paths for this operation:

GET /_ml/data_frame/analytics/_preview

POST /_ml/data_frame/analytics/_preview
GET /_ml/data_frame/analytics/{id}/_preview
POST /_ml/data_frame/analytics/{id}/_preview

Preview the extracted features used by a data frame analytics config.

Required authorization

  • Cluster privileges: monitor_ml

Path parameters

  • id string Required

    Identifier for the data frame analytics job.

application/json

Body

  • config object

    A data frame analytics config as described in create data frame analytics jobs. Note that id and dest don’t need to be provided in the context of this API.

    Hide config attributes Show config attributes object
    • source object Required
      Hide source attributes Show source attributes object
      • index string | array[string] Required

        Index or indices on which to perform the analysis. It can be a single index or index pattern as well as an array of indices or patterns. NOTE: If your source indices contain documents with the same IDs, only the document that is indexed last appears in the destination index.

      • runtime_mappings object

        Definitions of runtime fields that will become part of the mapping of the destination index.

        Hide runtime_mappings attribute Show runtime_mappings attribute object
        • * object Additional properties
      • _source object

        Specify includes and/or `excludes patterns to select which fields will be present in the destination. Fields that are excluded cannot be included in the analysis.

        Hide _source attributes Show _source attributes object
        • includes array[string]

          An array of strings that defines the fields that will be excluded from the analysis. You do not need to add fields with unsupported data types to excludes, these fields are excluded from the analysis automatically.

        • excludes array[string]

          An array of strings that defines the fields that will be included in the analysis.

      • query object

        The Elasticsearch query domain-specific language (DSL). This value corresponds to the query object in an Elasticsearch search POST body. All the options that are supported by Elasticsearch can be used, as this object is passed verbatim to Elasticsearch. By default, this property has the following value: {"match_all": {}}.

        Query DSL
    • analysis object Required
      Hide analysis attributes Show analysis attributes object
      • outlier_detection object

        The configuration information necessary to perform outlier detection. NOTE: Advanced parameters are for fine-tuning classification analysis. They are set automatically by hyperparameter optimization to give the minimum validation error. It is highly recommended to use the default values unless you fully understand the function of these parameters.

        Hide outlier_detection attributes Show outlier_detection attributes object
        • compute_feature_influence boolean

          Specifies whether the feature influence calculation is enabled.

          Default value is true.

        • feature_influence_threshold number

          The minimum outlier score that a document needs to have in order to calculate its feature influence score. Value range: 0-1.

          Default value is 0.1.

        • method string

          The method that outlier detection uses. Available methods are lof, ldof, distance_kth_nn, distance_knn, and ensemble. The default value is ensemble, which means that outlier detection uses an ensemble of different methods and normalises and combines their individual outlier scores to obtain the overall outlier score.

          Default value is ensemble.

        • n_neighbors number

          Defines the value for how many nearest neighbors each method of outlier detection uses to calculate its outlier score. When the value is not set, different values are used for different ensemble members. This default behavior helps improve the diversity in the ensemble; only override it if you are confident that the value you choose is appropriate for the data set.

        • outlier_fraction number

          The proportion of the data set that is assumed to be outlying prior to outlier detection. For example, 0.05 means it is assumed that 5% of values are real outliers and 95% are inliers.

        • standardization_enabled boolean

          If true, the following operation is performed on the columns before computing outlier scores: (x_i - mean(x_i)) / sd(x_i).

          Default value is true.

    • model_memory_limit string
    • max_num_threads number
    • analyzed_fields object
      Hide analyzed_fields attributes Show analyzed_fields attributes object
      • includes array[string]

        An array of strings that defines the fields that will be excluded from the analysis. You do not need to add fields with unsupported data types to excludes, these fields are excluded from the analysis automatically.

      • excludes array[string]

        An array of strings that defines the fields that will be included in the analysis.

Responses

  • 200 application/json
    Hide response attribute Show response attribute object
    • feature_values array[object] Required

      An array of objects that contain feature name and value pairs. The features have been processed and indicate what will be sent to the model for training.

      Hide feature_values attribute Show feature_values attribute object
      • * string Additional properties
POST /_ml/data_frame/analytics/{id}/_preview
POST _ml/data_frame/analytics/_preview
{
  "config": {
    "source": {
      "index": "houses_sold_last_10_yrs"
    },
    "analysis": {
      "regression": {
        "dependent_variable": "price"
      }
    }
  }
}
resp = client.ml.preview_data_frame_analytics(
    config={
        "source": {
            "index": "houses_sold_last_10_yrs"
        },
        "analysis": {
            "regression": {
                "dependent_variable": "price"
            }
        }
    },
)
const response = await client.ml.previewDataFrameAnalytics({
  config: {
    source: {
      index: "houses_sold_last_10_yrs",
    },
    analysis: {
      regression: {
        dependent_variable: "price",
      },
    },
  },
});
response = client.ml.preview_data_frame_analytics(
  body: {
    "config": {
      "source": {
        "index": "houses_sold_last_10_yrs"
      },
      "analysis": {
        "regression": {
          "dependent_variable": "price"
        }
      }
    }
  }
)
$resp = $client->ml()->previewDataFrameAnalytics([
    "body" => [
        "config" => [
            "source" => [
                "index" => "houses_sold_last_10_yrs",
            ],
            "analysis" => [
                "regression" => [
                    "dependent_variable" => "price",
                ],
            ],
        ],
    ],
]);
curl -X POST -H "Authorization: ApiKey $ELASTIC_API_KEY" -H "Content-Type: application/json" -d '{"config":{"source":{"index":"houses_sold_last_10_yrs"},"analysis":{"regression":{"dependent_variable":"price"}}}}' "$ELASTICSEARCH_URL/_ml/data_frame/analytics/_preview"
client.ml().previewDataFrameAnalytics(p -> p
    .config(c -> c
        .source(s -> s
            .index("houses_sold_last_10_yrs")
        )
        .analysis(a -> a
            .regression(r -> r
                .dependentVariable("price")
            )
        )
    )
);
Request example
An example body for a `POST _ml/data_frame/analytics/_preview` request.
{
  "config": {
    "source": {
      "index": "houses_sold_last_10_yrs"
    },
    "analysis": {
      "regression": {
        "dependent_variable": "price"
      }
    }
  }
}