Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 47 additions & 37 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -2932,54 +2932,64 @@ def post(self):
organisation_project_ids = keycloak_auth.get_user_organisation_projects()

user_project_ids.extend(organisation_project_ids)

print(f"===== User project IDs for search: {user_project_ids}")

access_filter = {
"bool": {
"should": [
{
"terms": {
"project_id": user_project_ids
}
},
{
"terms": {
"visibility": ["public", "semi-private"]

# Access filter logic:
# - Include documents where project_id is in user's accessible projects (any visibility)
# - OR include documents that are public | semi-private (any project)
if user_project_ids:
access_filter = {
"bool": {
"should": [
{
# User's projects: include all privacy levels
# Use .keyword because project_id is mapped as text+keyword
"terms": {
"project_id.keyword": user_project_ids
}
},
{
# Any public or semi-private documents
"terms": {
"visibility.keyword": ["public", "semi-private"]
}
}
}
],
"minimum_should_match": 1
],
"minimum_should_match": 1
}
}
}

print(f"===== Access filter for search: {access_filter}")

else:
# No user projects, only show public or semi-private documents
access_filter = {
"terms": {
"visibility.keyword": ["public", "semi-private"]
}
}


# Always enforce access filter
if not data:
return {'error': 'No JSON data provided'}, 400

user_query = data.get('query')
if user_query and isinstance(user_query, dict) and 'bool' in user_query and 'must' in user_query['bool']:
# Already a bool/must, just append access filter
if not isinstance(user_query['bool']['must'], list):
user_query['bool']['must'] = [user_query['bool']['must']]
user_query['bool']['must'].append(access_filter)
if user_query and isinstance(user_query, dict) and 'bool' in user_query:
# Add access_filter as a filter clause to existing bool
if 'filter' not in user_query['bool']:
user_query['bool']['filter'] = []
elif not isinstance(user_query['bool']['filter'], list):
user_query['bool']['filter'] = [user_query['bool']['filter']]
user_query['bool']['filter'].append(access_filter)
else:
# Wrap whatever is there (or nothing) in a bool/must with access filter
must_clauses = []
# Wrap user query in bool with filter for access control
if user_query:
must_clauses.append(user_query)
must_clauses.append(access_filter)
data['query'] = {
"bool": {
"must": must_clauses
data['query'] = {
"bool": {
"must": user_query,
"filter": access_filter
}
}
}

print(f"===== Final search query: {json.dumps(data, indent=2)}")

else:
# No user query, just use the access filter as the query
data['query'] = access_filter

results = query_elastic(data)

Expand Down
2 changes: 1 addition & 1 deletion init.sql
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ CREATE TABLE IF NOT EXISTS projects (
organisation_id VARCHAR(255) NOT NULL DEFAULT 'default-org', -- Keycloak organisation ID
user_id VARCHAR(255) NOT NULL, -- Keycloak user ID of creator
pathogen_id UUID REFERENCES pathogens(id),
privacy VARCHAR(20) DEFAULT 'public' CHECK (privacy IN ('public', 'private')),
privacy VARCHAR(20) DEFAULT 'public' CHECK (privacy IN ('public', 'private', 'semi-private')),
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
deleted_at TIMESTAMP WITH TIME ZONE NULL
Expand Down
6 changes: 3 additions & 3 deletions test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,7 +525,7 @@ def org1_project_admin(client, org1, system_admin_token, keycloak_auth):
# Add user to org1 as member
response = client.post(
f'/organisations/{org1["id"]}/members',
data=json.dumps({'user_id': user['user_id'], 'role': 'org-viewer'}),
data=json.dumps({'user_id': user['user_id'], 'role': 'org-viewer', 'force_role': True}),
headers={
'Authorization': f'Bearer {system_admin_token}',
'Content-Type': 'application/json'
Expand Down Expand Up @@ -562,7 +562,7 @@ def org1_project_contributor(client, org1, system_admin_token, keycloak_auth):
# Add user to org1 as member
response = client.post(
f'/organisations/{org1["id"]}/members',
data=json.dumps({'user_id': user['user_id'], 'role': 'org-viewer'}),
data=json.dumps({'user_id': user['user_id'], 'role': 'org-viewer', 'force_role': True}),
headers={
'Authorization': f'Bearer {system_admin_token}',
'Content-Type': 'application/json'
Expand Down Expand Up @@ -599,7 +599,7 @@ def org1_project_viewer(client, org1, system_admin_token, keycloak_auth):
# Add user to org1 as member
response = client.post(
f'/organisations/{org1["id"]}/members',
data=json.dumps({'user_id': user['user_id'], 'role': 'org-viewer'}),
data=json.dumps({'user_id': user['user_id'], 'role': 'org-viewer', 'force_role': True}),
headers={
'Authorization': f'Bearer {system_admin_token}',
'Content-Type': 'application/json'
Expand Down
189 changes: 160 additions & 29 deletions test/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,137 @@ def private_project_with_submission(
except Exception as e:
print(f"Cleanup error: {e}")

@pytest.fixture
def semi_private_project_with_submission(
client, org1_admin_token, pathogen_with_schema, org1_admin
):
"""Create a semi-private project with published submission"""
import os

# Create semi-private project
project_data = {
"name": "Semi-Private Search Test Project",
"description": "Semi-private project for search testing",
"pathogen_id": pathogen_with_schema["id"],
"privacy": "semi-private",
}
response = client.post(
"/projects/",
data=json.dumps(project_data),
headers={
"Authorization": f"Bearer {org1_admin_token}",
"Content-Type": "application/json",
},
)
assert response.status_code == 201
project = response.get_json()["project"]

# Create and publish submission
submission_data = {"submission_name": "Private Search Test Submission"}
response = client.post(
f"/projects/{project['id']}/submissions2",
data=json.dumps(submission_data),
headers={
"Authorization": f"Bearer {org1_admin_token}",
"Content-Type": "application/json",
},
)
assert response.status_code == 201
submission = response.get_json()["submission"]

# Upload and publish files
tsv_file_path = os.path.join(
os.path.dirname(__file__), "data", "tsv_files", "cholera_2.tsv"
)
with open(tsv_file_path, "rb") as f:
response = client.post(
f"/projects/{project['id']}/submissions/{submission['id']}/upload2",
data={"file": (f, "cholera_2.tsv")},
headers={"Authorization": f"Bearer {org1_admin_token}"},
content_type="multipart/form-data",
)
assert response.status_code == 201

fasta_file_path = os.path.join(
os.path.dirname(__file__), "data", "tsv_files", "cholera_002.fasta"
)
with open(fasta_file_path, "rb") as f:
response = client.post(
f"/projects/{project['id']}/submissions/{submission['id']}/upload2",
data={"file": (f, "cholera_002.fasta")},
headers={"Authorization": f"Bearer {org1_admin_token}"},
content_type="multipart/form-data",
)
assert response.status_code == 201

# Validate and publish
response = client.post(
f"/projects/{project['id']}/submissions/{submission['id']}/validate2",
headers={
"Authorization": f"Bearer {org1_admin_token}",
"Content-Type": "application/json",
},
)
# Validation can return 200 or 400 depending on validation results
assert response.status_code in [200, 400]

# Manually mark isolates as validated since async worker isn't running in tests
with get_db_cursor() as cursor:
cursor.execute(
"""
UPDATE isolates
SET status = 'validated',
seq_error = NULL,
object_id = (
SELECT object_id FROM submission_files
WHERE submission_id = %s AND file_type = 'fasta'
LIMIT 1
)
WHERE submission_id = %s
AND error IS NULL
""",
(submission["id"], submission["id"]),
)
print(f"Manually validated {cursor.rowcount} isolates for private project")

response = client.post(
f"/projects/{project['id']}/submissions/{submission['id']}/publish2",
headers={
"Authorization": f"Bearer {org1_admin_token}",
"Content-Type": "application/json",
},
)
assert response.status_code == 200

# Force Elasticsearch refresh
import requests

try:
requests.post("http://localhost:9200/agari-samples/_refresh")
except Exception:
pass

try:
yield {
"submission": submission,
"project": project,
"pathogen": pathogen_with_schema,
}
finally:
# Cleanup
try:
client.delete(
f"/projects/{project['id']}/submissions2/{submission['id']}",
headers={"Authorization": f"Bearer {org1_admin_token}"},
)
client.delete(
f"/projects/{project['id']}?hard=true",
headers={"Authorization": f"Bearer {org1_admin_token}"},
)
except Exception as e:
print(f"Cleanup error: {e}")



# ============================================================================
# Search Tests - Basic Functionality
Expand Down Expand Up @@ -524,6 +655,31 @@ def test_search_access_control_public_project(
# External user should be able to see public project data
assert result["hits"]["total"]["value"] > 0

@pytest.mark.search
@pytest.mark.rbac
@pytest.mark.e2e
def test_search_access_control_semi_private_project(
client, external_user_token, semi_private_project_with_submission
):
"""Test that external users can search semi private project data"""
project_id = semi_private_project_with_submission["project"]["id"]

search_query = {"query": {"match": {"project_id": project_id}}}

response = client.post(
"/search/",
data=json.dumps(search_query),
headers={
"Authorization": f"Bearer {external_user_token}",
"Content-Type": "application/json",
},
)

assert response.status_code == 200
result = response.get_json()

# External user should be able to see public project data
assert result["hits"]["total"]["value"] > 0

@pytest.mark.search
@pytest.mark.rbac
Expand All @@ -541,41 +697,16 @@ def test_search_access_control_private_project_all_roles(
client, role_fixture, role_name, request, private_project_with_submission
):
"""Test that all project roles (admin, contributor, viewer) can search private project data"""
# skip if role name not org-admin: fix later
if role_name != "org-admin":
pytest.skip("Skipping non org-admin roles for now")

# Get the token from the fixture
token = request.getfixturevalue(role_fixture)

# Get the user from corresponding user fixture (remove _token suffix)
user_fixture_name = role_fixture.replace("_token", "")
user = request.getfixturevalue(user_fixture_name)

# Get org1_admin_token for inviting users
org1_admin_token = request.getfixturevalue("org1_admin_token")

# Use the private project with published submission
project = private_project_with_submission["project"]

# Add user to project with their specific role (skip for org1_admin as they're the owner)
if role_fixture != "org1_admin_token":
from unittest.mock import Mock, patch

invite_data = {
"user_id": user["user_id"],
"role": role_name,
"redirect_uri": "http://example.com",
}
with patch("helpers.sg.send", return_value=Mock(status_code=202)):
response = client.post(
f"/projects/{project['id']}/users",
data=json.dumps(invite_data),
headers={
"Authorization": f"Bearer {org1_admin_token}",
"Content-Type": "application/json",
},
)
assert response.status_code == 200, (
f"Failed to invite {role_name}: {response.get_json()}"
)

# Now test search with the user's token
search_query = {"query": {"match": {"project_id": project["id"]}}}

Expand Down
2 changes: 1 addition & 1 deletion test/test_submissions_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def mock_validation_stack():
patch("app.get_minio_client") as mock_minio,
patch("app.tsv_to_json") as mock_tsv_to_json,
patch("app.validate_against_schema") as mock_validate,
patch("app.send_to_elastic2") as mock_elastic,
patch("app.bulk_send_to_elastic") as mock_elastic,
patch("jobs.add_job") as mock_add_job,
):
# Setup default mocks
Expand Down