Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,41 @@ venv.bak/

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Testing and coverage
.pytest_cache/
.coverage
.coverage.*
htmlcov/
coverage.xml
*.cover
*.py,cover
.hypothesis/
pytest_cache/
test-results/

# Claude settings
.claude/*

# Poetry
poetry.lock

# IDE and editor files
.idea/
.vscode/
*.swp
*.swo
*~
.DS_Store

# Virtual environments
virtualenv/
.virtualenv/

# Build artifacts
*.egg-info/
.eggs/
develop-eggs/
*.egg
106 changes: 106 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
[tool.poetry]
name = "grover"
version = "0.1.0"
description = "Grover: A State-of-the-Art Defense against Neural Fake News"
authors = ["Your Name <you@example.com>"]
readme = "README.md"
license = "MIT"
packages = [
{ include = "lm" },
{ include = "sample" }
]

[tool.poetry.dependencies]
python = "^3.8"
pandas = ">=0.24.2"
regex = ">=2019.4.14"
h5py = ">=2.9.0"
numpy = ">=1.19.0,<1.24.0"
tensorboard = ">=1.13.1"
tensorflow = ">=1.13.1,<2.0.0"
tensorflow-estimator = ">=1.13.0,<2.0.0"
tqdm = ">=4.31.1"
requests = ">=2.22.0"

[tool.poetry.group.dev.dependencies]
pytest = "^7.4.0"
pytest-cov = "^4.1.0"
pytest-mock = "^3.11.1"

[tool.poetry.scripts]
test = "pytest"
tests = "pytest"

[tool.pytest.ini_options]
minversion = "7.0"
testpaths = ["tests"]
python_files = ["test_*.py", "*_test.py"]
python_classes = ["Test*"]
python_functions = ["test_*"]
addopts = [
"-ra",
"--strict-markers",
"--cov=lm",
"--cov=sample",
"--cov=discrimination",
"--cov-report=term-missing",
"--cov-report=html",
"--cov-report=xml",
"-v",
"--tb=short",
"--maxfail=3"
]
markers = [
"unit: marks tests as unit tests (fast, isolated)",
"integration: marks tests as integration tests (may require external resources)",
"slow: marks tests as slow (deselect with '-m \"not slow\"')"
]
console_output_style = "progress"
filterwarnings = [
"error",
"ignore::UserWarning",
"ignore::DeprecationWarning"
]

[tool.coverage.run]
source = ["lm", "sample", "discrimination"]
branch = true
parallel = true
omit = [
"*/tests/*",
"*/__init__.py",
"*/setup.py",
"*/venv/*",
"*/virtualenv/*",
"*/.venv/*",
"*/.virtualenv/*"
]

[tool.coverage.report]
exclude_lines = [
"pragma: no cover",
"def __repr__",
"if self.debug:",
"if settings.DEBUG",
"raise AssertionError",
"raise NotImplementedError",
"if 0:",
"if __name__ == .__main__.:",
"if TYPE_CHECKING:",
"class .*\\bProtocol\\):",
"@(abc\\.)?abstractmethod"
]
precision = 2
show_missing = true
skip_covered = false
fail_under = 80

[tool.coverage.html]
directory = "htmlcov"

[tool.coverage.xml]
output = "coverage.xml"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
Empty file added tests/__init__.py
Empty file.
193 changes: 193 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
"""
Shared pytest fixtures and configuration for all tests.
"""
import os
import tempfile
import shutil
import json
from pathlib import Path
from typing import Generator, Dict, Any
import pytest


@pytest.fixture
def temp_dir() -> Generator[Path, None, None]:
"""
Create a temporary directory for tests that need file system operations.

Yields:
Path: Path to the temporary directory
"""
temp_path = tempfile.mkdtemp()
yield Path(temp_path)
# Cleanup after test
shutil.rmtree(temp_path, ignore_errors=True)


@pytest.fixture
def mock_config() -> Dict[str, Any]:
"""
Provide a mock configuration dictionary for testing.

Returns:
Dict[str, Any]: Mock configuration data
"""
return {
"model_name": "test_model",
"batch_size": 32,
"learning_rate": 0.001,
"num_epochs": 10,
"hidden_size": 768,
"num_layers": 12,
"vocab_size": 50000,
"max_seq_length": 512,
"checkpoint_dir": "/tmp/checkpoints",
"log_dir": "/tmp/logs"
}


@pytest.fixture
def sample_json_data() -> Dict[str, Any]:
"""
Provide sample JSON data for testing data processing functions.

Returns:
Dict[str, Any]: Sample JSON data
"""
return {
"id": "test_001",
"text": "This is a sample text for testing purposes.",
"label": "real",
"domain": "test.com",
"date": "2023-01-01",
"authors": ["Test Author"],
"title": "Test Article Title"
}


@pytest.fixture
def sample_jsonl_file(temp_dir: Path) -> Path:
"""
Create a temporary JSONL file with sample data.

Args:
temp_dir: Temporary directory fixture

Returns:
Path: Path to the created JSONL file
"""
jsonl_path = temp_dir / "sample_data.jsonl"

sample_data = [
{"id": "1", "text": "First sample text", "label": "real"},
{"id": "2", "text": "Second sample text", "label": "fake"},
{"id": "3", "text": "Third sample text", "label": "real"}
]

with open(jsonl_path, 'w') as f:
for item in sample_data:
f.write(json.dumps(item) + '\n')

return jsonl_path


@pytest.fixture
def mock_model_checkpoint(temp_dir: Path) -> Path:
"""
Create a mock model checkpoint directory structure.

Args:
temp_dir: Temporary directory fixture

Returns:
Path: Path to the mock checkpoint directory
"""
checkpoint_dir = temp_dir / "checkpoint"
checkpoint_dir.mkdir(exist_ok=True)

# Create mock checkpoint files
(checkpoint_dir / "model.ckpt.index").touch()
(checkpoint_dir / "model.ckpt.data-00000-of-00001").touch()
(checkpoint_dir / "checkpoint").write_text("model_checkpoint_path: \"model.ckpt\"")

return checkpoint_dir


@pytest.fixture
def mock_vocab_files(temp_dir: Path) -> Dict[str, Path]:
"""
Create mock vocabulary files for testing tokenization.

Args:
temp_dir: Temporary directory fixture

Returns:
Dict[str, Path]: Dictionary with paths to encoder.json and vocab.bpe
"""
encoder_path = temp_dir / "encoder.json"
vocab_path = temp_dir / "vocab.bpe"

# Create minimal mock encoder
encoder_data = {
"hello": 1,
"world": 2,
"test": 3,
"<|endoftext|>": 4
}

with open(encoder_path, 'w') as f:
json.dump(encoder_data, f)

# Create minimal mock BPE vocab
vocab_path.write_text("#version: 0.2\nh e l l o</w> 1\nw o r l d</w> 2\n")

return {
"encoder": encoder_path,
"vocab": vocab_path
}


@pytest.fixture
def environment_variables(monkeypatch) -> None:
"""
Set up common environment variables for testing.

Args:
monkeypatch: pytest monkeypatch fixture
"""
monkeypatch.setenv("TF_CPP_MIN_LOG_LEVEL", "3") # Suppress TensorFlow warnings
monkeypatch.setenv("CUDA_VISIBLE_DEVICES", "") # Disable GPU for tests
monkeypatch.setenv("TEST_MODE", "1")


@pytest.fixture(autouse=True)
def cleanup_tensorflow():
"""
Automatically clean up TensorFlow resources after each test.
"""
yield
# Import only if TensorFlow is available
try:
import tensorflow as tf
tf.keras.backend.clear_session()
tf.compat.v1.reset_default_graph()
except ImportError:
pass


@pytest.fixture
def sample_model_config() -> Dict[str, Any]:
"""
Provide a sample model configuration matching the project's config format.

Returns:
Dict[str, Any]: Model configuration
"""
return {
"n_ctx": 1024,
"n_embd": 768,
"n_head": 12,
"n_layer": 12,
"n_vocab": 50257,
"n_special": 0
}
Empty file added tests/integration/__init__.py
Empty file.
Loading