Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Is there a problem with multi-level directory collection? #21319

Open
laopo521anping opened this issue Sep 19, 2024 · 1 comment
Open

Is there a problem with multi-level directory collection? #21319

laopo521anping opened this issue Sep 19, 2024 · 1 comment
Labels
meta: awaiting author Pull requests that are awaiting their author. source: file Anything `file` source related type: bug A code related bug.

Comments

@laopo521anping
Copy link

laopo521anping commented Sep 19, 2024

A note for the community

  • Please vote on this issue by adding a 👍 reaction to the original issue to help the community and maintainers prioritize this request
  • If you are interested in working on this issue or have submitted a pull request, please leave a comment

Problem

Collection link
file -vector -kafka
The directory hierarchy for data collection is 10

Problem phenomenon
There is a problem of data loss during data collection
A total of 3000 files, with one line written to each file, only 949 lines were collected

多级目录

kafka logs:
{"@timestamp":"2024-09-19T05:49:44.874343145Z","file":"/opt/file1/20240919/1/2/tmp_20240919_1.log","host":"760b54e990e1","message":"20240919134945_1","offset":0,"source_type":"file"}
{"@timestamp":"2024-09-19T05:49:44.874380045Z","file":"/opt/file1/20240919/1/2/tmp_20240919_10.log","host":"760b54e990e1","message":"20240919134945_10","offset":0,"source_type":"file"}
{"@timestamp":"2024-09-19T05:49:44.874396715Z","file":"/opt/file1/20240919/1/2/tmp_20240919_2.log","host":"760b54e990e1","message":"20240919134945_2","offset":0,"source_type":"file"}
{"@timestamp":"2024-09-19T05:49:44.874406722Z","file":"/opt/file1/20240919/1/2/tmp_20240919_3.log","host":"760b54e990e1","message":"20240919134945_3","offset":0,"source_type":"file"}
{"@timestamp":"2024-09-19T05:49:44.874416272Z","file":"/opt/file1/20240919/1/2/tmp_20240919_4.log","host":"760b54e990e1","message":"20240919134945_4","offset":0,"source_type":"file"}
{"@timestamp":"2024-09-19T05:49:44.874425632Z","file":"/opt/file1/20240919/1/2/tmp_20240919_5.log","host":"760b54e990e1","message":"20240919134945_5","offset":0,"source_type":"file"}
{"@timestamp":"2024-09-19T05:49:44.874434169Z","file":"/opt/file1/20240919/1/2/tmp_20240919_6.log","host":"760b54e990e1","message":"20240919134945_6","offset":0,"source_type":"file"}
{"@timestamp":"2024-09-19T05:49:44.874444929Z","file":"/opt/file1/20240919/1/2/tmp_20240919_7.log","host":"760b54e990e1","message":"20240919134945_7","offset":0,"source_type":"file"}
{"@timestamp":"2024-09-19T05:49:44.874454575Z","file":"/opt/file1/20240919/1/2/tmp_20240919_8.log","host":"760b54e990e1","message":"20240919134945_8","offset":0,"source_type":"file"}
{"@timestamp":"2024-09-19T05:49:44.874466055Z","file":"/opt/file1/20240919/1/2/tmp_20240919_9.log","host":"760b54e990e1","message":"20240919134945_9","offset":0,"source_type":"file"}
{"@timestamp":"2024-09-19T05:49:44.874474832Z","file":"/opt/file1/20240919/1/tmp_20240919_100.log","host":"760b54e990e1","message":"20240919134945_100","offset":0,"source_type":"file"}
{"@timestamp":"2024-09-19T05:49:44.874483879Z","file":"/opt/file1/20240919/1/tmp_20240919_11.log","host":"760b54e990e1","message":"20240919134945_11","offset":0,"source_type":"file"}
{"@timestamp":"2024-09-19T05:49:44.874494239Z","file":"/opt/file1/20240919/1/tmp_20240919_12.log","host":"760b54e990e1","message":"20240919134945_12","offset":0,"source_type":"file"}
{"@timestamp":"2024-09-19T05:49:44.874502815Z","file":"/opt/file1/20240919/1/tmp_20240919_13.log","host":"760b54e990e1","message":"20240919134945_13","offset":0,"source_type":"file"}
{"@timestamp":"2024-09-19T05:49:44.874511982Z","file":"/opt/file1/20240919/1/tmp_20240919_14.log","host":"760b54e990e1","message":"20240919134945_14","offset":0,"source_type":"file"}
{"@timestamp":"2024-09-19T05:49:44.874520819Z","file":"/opt/file1/20240919/1/tmp_20240919_15.log","host":"760b54e990e1","message":"20240919134945_15","offset":0,"source_type":"file"}
{"@timestamp":"2024-09-19T05:49:44.874531702Z","file":"/opt/file1/20
加载更多

Configuration

# Set global options
"data_dir": "/var/lib/vector"

# Ingest data by tailing one or more files
"sources":
  "file_logs":
    "type": "file"
    data_dir: /var/lib/vector/
    "include": [ "/opt/file1/**/*.log" ]    # supports globbing
    file_key: file
    glob_minimum_cooldown_ms: 1000
    host_key: host
    ignore_older_secs: 3600
    #line_delimiter: "\n"
    max_line_bytes: 102400
    max_read_bytes: 2048
    offset_key: offset
    read_from: beginning
    #read_from: end
#    rotate_wait_secs: 9223372036854776000

# Structure and parse the data
"transforms":
  "file_trans_logs":
    "inputs": [ "file_logs" ]
    "type": "remap"
    "drop_on_error": false
    source: |-
      .@timestamp = del(.timestamp)
    timezone: local      

#  "apache_sample":
#    "inputs": [ "apache_parser" ]
#    "type": "sample"
#    "rate": 2                            # only keep 50% (1/`rate`)

# Send structured data to a short-term storage
sinks:
  #out:
    #inputs: [ "file_trans_logs" ]
    #type: "console"
    #encoding:
      #codec: "json"
  kafkaout:
    inputs: [ "file_trans_logs" ]            # only take sampled data
    type: "kafka"       
    bootstrap_servers: 192.168.4.57:9092,192.168.4.97:9092,192.168.4.136:9092
    topic: vector-kafka-logs12
    encoding:
       codec: json
    buffer:
    - type: memory
      max_events: 10000

Version

vector 0.40.1

Debug Output

No response

Example Data

20240919134945_1

Additional Context

is Vector running in docker

References

No response

@laopo521anping laopo521anping added the type: bug A code related bug. label Sep 19, 2024
@jszwedko
Copy link
Member

I can't think of any issues that would result from multi-level directory collection, no. Do the files end with a newline character?

@jszwedko jszwedko added source: file Anything `file` source related meta: awaiting author Pull requests that are awaiting their author. labels Sep 23, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
meta: awaiting author Pull requests that are awaiting their author. source: file Anything `file` source related type: bug A code related bug.
Projects
None yet
Development

No branches or pull requests

2 participants