-
-
Notifications
You must be signed in to change notification settings - Fork 4.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Usage Stats Collection #2852
Usage Stats Collection #2852
Changes from 40 commits
e0e1386
739f4a1
c33b4cc
b74e3a6
c988e07
88c5187
85adbab
33c9dff
ad609f0
8a2f18a
8e9e5be
f537692
0f1ba7f
abc3948
ec54145
f84ccaa
b08ba86
83ff459
73b689a
86da72f
9c9a188
d2f84cf
4e888e0
eb48061
0684c06
8e9890e
5cf652a
d910b05
8cf264b
93b8773
fe39b84
fc6e374
ab23171
686c84a
877eb78
bc89a66
36fd304
e54f15b
4e35b3b
a1597fb
c580797
84353d4
f2e69fc
4e19967
f327f3c
d9c8a44
59f0f10
f34259a
30df77c
60b652b
be91bab
4f04743
f4bf862
6b968db
2006788
db715c8
2c1e557
42e66b8
a4e5742
9652830
ba63b44
58fb78d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
import os | ||
import torch | ||
import json | ||
import platform | ||
import pkg_resources | ||
import requests | ||
import datetime | ||
import psutil | ||
from threading import Thread | ||
from pathlib import Path | ||
from typing import Optional | ||
from enum import Enum | ||
|
||
_xdg_config_home = os.getenv('XDG_CONFIG_HOME', | ||
os.path.expanduser('~/.config')) | ||
_vllm_internal_path = 'vllm/usage_stats.json' | ||
|
||
_USAGE_STATS_FILE = os.path.join( | ||
_xdg_config_home, | ||
_vllm_internal_path) #File path to store usage data locally | ||
_USAGE_STATS_ENABLED = None | ||
_USAGE_STATS_SERVER = os.environ.get('VLLM_USAGE_STATS_SERVER', | ||
'https://stats.vllm.ai') | ||
|
||
|
||
def is_usage_stats_enabled(): | ||
"""Determine whether or not we can send usage stats to the server. | ||
The logic is as follows: | ||
- By default, it should be enabled. | ||
- Two environment variables can disable it: | ||
- DO_NOT_TRACK=1 | ||
- VLLM_NO_USAGE_STATS=1 | ||
- A file in the home directory can disable it if it exists: | ||
- $HOME/.config/vllm/do_not_track | ||
""" | ||
global _USAGE_STATS_ENABLED | ||
if _USAGE_STATS_ENABLED is None: | ||
do_not_track = os.environ.get('DO_NOT_TRACK', '0') == '1' | ||
no_usage_stats = os.environ.get('VLLM_NO_USAGE_STATS', '0') == '1' | ||
do_not_track_file = os.path.exists( | ||
os.path.expanduser('~/.config/vllm/do_not_track')) | ||
|
||
_USAGE_STATS_ENABLED = not (do_not_track or no_usage_stats | ||
or do_not_track_file) | ||
return _USAGE_STATS_ENABLED | ||
|
||
|
||
def _get_current_timestamp_ns() -> int: | ||
return int(datetime.datetime.now(datetime.timezone.utc).timestamp() * 1e9) | ||
|
||
|
||
def _detect_cloud_provider() -> str: | ||
# Try detecting through vendor file | ||
vendor_files = [ | ||
'/sys/class/dmi/id/product_version', '/sys/class/dmi/id/bios_vendor', | ||
'/sys/class/dmi/id/product_name', | ||
'/sys/class/dmi/id/chassis_asset_tag', '/sys/class/dmi/id/sys_vendor' | ||
] | ||
# Mapping of identifiable strings to cloud providers | ||
cloud_identifiers = { | ||
'amazon': "AWS", | ||
'microsoft corporation': "AZURE", | ||
'google': "GCP", | ||
'oraclecloud': "OCI", | ||
} | ||
|
||
for vendor_file in vendor_files: | ||
path = Path(vendor_file) | ||
if path.is_file(): | ||
file_content = path.read_text().lower() | ||
for identifier, provider in cloud_identifiers.items(): | ||
if identifier in file_content: | ||
return provider | ||
return "UNKNOWN" | ||
|
||
|
||
class UsageContext(Enum): | ||
UNKNOWN_CONTEXT = "UNKNOWN_CONTEXT" | ||
LLM_CLASS = "LLM_CLASS" | ||
API_SERVER = "API_SERVER" | ||
OPENAI_API_SERVER = "OPENAI_API_SERVER" | ||
ENGINE_CONTEXT = "ENGINE_CONTEXT" | ||
|
||
|
||
class UsageMessage: | ||
|
||
def __init__(self) -> None: | ||
self.gpu_list: Optional[dict] = None | ||
self.provider: Optional[str] = None | ||
self.architecture: Optional[str] = None | ||
self.platform: Optional[str] = None | ||
self.model: Optional[str] = None | ||
self.vllm_version: Optional[str] = None | ||
self.context: Optional[str] = None | ||
self.log_time: Optional[int] = None | ||
#Logical CPU count | ||
self.num_cpu: Optional[int] = None | ||
self.cpu_type: Optional[str] = None | ||
self.total_memory: Optional[int] = None | ||
self.source: Optional[str] = None | ||
|
||
def report_usage(self, model: str, context: UsageContext) -> None: | ||
t = Thread(target=usage_message._report_usage, args=(model, context)) | ||
t.start() | ||
|
||
def _report_usage(self, model: str, context: UsageContext) -> None: | ||
self.context = context.value | ||
self.gpu_list = [] | ||
for i in range(torch.cuda.device_count()): | ||
device_property = torch.cuda.get_device_properties(i) | ||
gpu_name = device_property.name | ||
gpu_memory = device_property.total_memory | ||
self.gpu_list.append({"name": gpu_name, "memory": gpu_memory}) | ||
self.provider = _detect_cloud_provider() | ||
self.architecture = platform.machine() | ||
self.platform = platform.platform() | ||
self.vllm_version = pkg_resources.get_distribution("vllm").version | ||
self.model = model | ||
self.log_time = _get_current_timestamp_ns() | ||
self.num_cpu = os.cpu_count() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be good to get the type of CPU as well, such as it's product name so you can be aware of what ISA extensions are available for performance There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1! |
||
#Best effort reading processor name | ||
self.cpu_type = platform.processor() | ||
self.total_memory = psutil.virtual_memory().total | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just a heads up - if the model server is deployed as a linux docker container, then most metrics from There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah i know there's a way around this but a bit too complex. i'm just going to assume most folks who are running LLM prod will adopt a one-pod-per-vm approach |
||
self.source = os.environ.get("VLLM_USAGE_SOURCE", "production") | ||
self._write_to_file() | ||
headers = {'Content-type': 'application/x-ndjson'} | ||
payload = json.dumps(vars(self)) | ||
try: | ||
requests.post(_USAGE_STATS_SERVER, data=payload, headers=headers) | ||
except requests.exceptions.RequestException: | ||
print("Usage Log Request Failed") | ||
|
||
def _write_to_file(self): | ||
os.makedirs(os.path.dirname(_USAGE_STATS_FILE), exist_ok=True) | ||
with open(_USAGE_STATS_FILE, "w+") as outfile: | ||
json.dump(vars(self), outfile) | ||
|
||
|
||
usage_message = UsageMessage() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wonder if it makes more sense to get the model architecture vs the model name?
E.g. its probably more useful to know its the Llama architecture with size X over the name of the string for tracking purposes. Otherwise youll have to do this on the backend + it may not be recoverable for local models
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
+100