Skip to content

Commit

Permalink
initial open source string sketch
Browse files Browse the repository at this point in the history
  • Loading branch information
williballenthin committed May 4, 2023
1 parent 861813d commit 1db0188
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 0 deletions.
3 changes: 3 additions & 0 deletions db/oss/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
*.csv
*.jsonl
*.jsonl.gz
Binary file added db/oss/zlib.jsonl.gz
Binary file not shown.
28 changes: 28 additions & 0 deletions floss/qs/db/oss.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import gzip
import pathlib
from typing import Dict

import msgspec


class OpenSourceString(msgspec.Struct):
string: str
library_name: str
library_version: str
file_path: str
function_name: str
line_number: int | None = None


class OpenSourceStringDatabase:
metadata_by_string: Dict[str, OpenSourceString]

@classmethod
def from_file(cls, path: pathlib.Path) -> "OpenSourceStringDatabase":
metadata_by_string: Dict[str, OpenSourceString] = {}
decoder = msgspec.json.Decoder(type=OpenSourceString)
for line in gzip.decompress(path.read_bytes()).split(b"\n"):
s = decoder.decode(line)
metadata_by_string[s.string] = s

return cls(metadata_by_string=metadata_by_string)
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
"networkx==2.5.1",
"halo==0.0.31",
"rich==13.3.4",
"msgspec==0.14.2",
]

# this sets __version__
Expand Down

0 comments on commit 1db0188

Please sign in to comment.