Skip to content

Ingester memory leak version 2.4.1 #4713

Closed
@timansky

Description

@timansky

noticed memory leak with version 2.4.1

Screen Shot 2021-11-09 at 09 00 52

commo-loki-i4 - v2.4.1
commo-loki-i[1:3;5] - v 2.4.0

---
auth_enabled: true

common:
  storage:
    s3:
      bucketnames: loki-00,loki-01,loki-02,loki-03,loki-04,loki-05,loki-06,loki-07,loki-08,loki-09
      endpoint: ceph:7480
      region: US
      access_key_id:xxx
      secret_access_key: xxx
      insecure: true
      sse_encryption: false
      http_config:
        idle_conn_timeout: 90s
        response_header_timeout: 0s
        insecure_skip_verify: true
      s3forcepathstyle: true
  ring:
    kvstore:
      store: etcd
      etcd:
        endpoints:
          - common-loki-d1:2379
          - common-loki-d2:2379
          - common-loki-d3:2379
    heartbeat_timeout: 1m
    heartbeat_period: 15s
  replication_factor: 2

schema_config:
  configs:
    - from: 2020-12-01
      store: boltdb-shipper
      object_store: s3
      schema: v11
      index:
        prefix: loki_index_
        period: 24h
      chunks:
        prefix: loki_chunk_
        period: 24h

storage_config:
  boltdb_shipper:
    shared_store: s3
    active_index_directory: /var/lib/loki/active # Directory where ingesters would write boltdb files which would then be uploaded by shipper to configured storage
    cache_location: /var/lib/loki/cache          # Cache location for restoring boltDB files for queries
    cache_ttl: 168h                              # TTL for boltDB files restored in cache for queries
    resync_interval: 5m                          # Resync downloaded files with the storage
    query_ready_num_days: 1                      # Number of days of index to be kept downloaded for queries
    index_gateway_client:
      server_address: loki-index-gateway.monitoring:9095

  index_cache_validity: 2h                       # Cache validity for active index entries (Must be lower than ingester.chunk_idle_period)
  max_chunk_batch_size: 100

  index_queries_cache_config:
    enable_fifocache: false
    memcached:
      expiration: 1h
      batch_size: 256
      parallelism: 32
    memcached_client:
      addresses: loki-cache.monitoring:11213
      consistent_hash: true
      timeout: 1s
      update_interval: 5s

chunk_store_config:
  cache_lookups_older_than: 1d
  chunk_cache_config:
    enable_fifocache: false
    memcached:
      expiration: 1h
      batch_size: 256
      parallelism: 32
    memcached_client:
      addresses: loki-cache.monitoring:11211
      consistent_hash: true
      timeout: 1s
      update_interval: 5s

server:
  http_listen_port: 3100
  grpc_listen_port: 9095
  grpc_server_max_recv_msg_size: 104857600 # 1024 * 1024 * 100
  grpc_server_max_send_msg_size: 104857600 # 1024 * 1024 * 100

ingester:
  chunk_block_size: 262144           # When this threshold is exceeded the head block will be cut and compressed inside the chunk
  chunk_target_size: 1572864         # Loki will attempt to build chunks up to 2MB, flushing first if chunk_idle_period or max_chunk_age is reached first
  max_chunk_age: 3h                  # (1h) The maximum duration of a timeseries chunk in memory. If a timeseries runs for longer than this the current chunk will be flushed to the store and a new chunk created.
  max_transfer_retries: 0            # Disable chunk transfer which is not possible with statefulsets
  chunk_idle_period: 2h              # (30m) How long chunks should sit in-memory with no updates before being flushed if they don't hit the max block size
  chunk_retain_period: 5m            # How long chunks should be retained in-memory after they've been flushed.
  chunk_encoding: snappy             #
  sync_period: 1m
  sync_min_utilization: 0.7
  lifecycler:
    final_sleep: 10s                   # Duration to sleep before exiting to ensure metrics are scraped
    min_ready_duration: 10s
  query_store_max_look_back_period: 0 # For S3 this value must always be left as 0
  autoforget_unhealthy: false
  wal:
    enabled: true
    dir: /var/lib/loki/wal
    flush_on_shutdown: true
    replay_memory_ceiling: 2GB # Maximum memory size the WAL may use during replay. After hitting this it will flush data to storage before continuing.

limits_config:
  ingestion_rate_strategy: global
  reject_old_samples: true
  reject_old_samples_max_age: 96h
  ingestion_rate_mb: 70
  ingestion_burst_size_mb: 100
  max_query_length: 720h
  max_streams_per_user: 0 # Disabled in favor of the global limit
  max_global_streams_per_user: 100000  # 100k
  max_cache_freshness_per_query: 5m # (1m) Most recent allowed cacheable result per-tenant, to prevent caching very recent results that might still be in flux.
  enforce_metric_name: false
  max_query_series: 10000
  max_query_lookback: 720h
  unordered_writes: true

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions