Skip to content

FastAPI Integration #86

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 26 commits into from
Apr 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
bd7a390
adds project by project id
anmarhindi Mar 15, 2024
dd9e8d0
adding user mail name
LennartSchmidtKern Mar 18, 2024
02c682e
restructure
LennartSchmidtKern Mar 19, 2024
df95ed6
Hotfix unused var
JWittmeyer Mar 19, 2024
cf821b1
added get_all_by_user_organization_id
anmarhindi Mar 19, 2024
d189240
Extends columns constructor to have include param
JWittmeyer Mar 21, 2024
22b8bbd
Adds a simple selection builder
JWittmeyer Mar 21, 2024
004849b
Adds whitelist for exclusion of columns
JWittmeyer Mar 21, 2024
9bc286d
whitelist inner
LennartSchmidtKern Mar 21, 2024
aabe218
labeling task expand
LennartSchmidtKern Mar 21, 2024
ad025bd
labeling task fix
LennartSchmidtKern Mar 21, 2024
a3854bf
Adds labeling task global query
JWittmeyer Mar 21, 2024
cf48635
project labeling task query
LennartSchmidtKern Mar 21, 2024
714bbc2
Merge branch 'fastapi' of github.com:code-kern-ai/refinery-submodule-…
LennartSchmidtKern Mar 21, 2024
8190d47
Sql queries for heuristics by heuristic id
lumburovskalina Mar 22, 2024
94e2e57
Query payloadByPayloadId
lumburovskalina Mar 22, 2024
0b8d8fc
small restructure
LennartSchmidtKern Mar 26, 2024
c408652
adds to_frontend_obj_raw
anmarhindi Mar 26, 2024
89a4f0d
Merge remote-tracking branch 'origin/fastapi' into fastapi
anmarhindi Mar 26, 2024
fae51a0
getRecordExportFromData
LennartSchmidtKern Mar 26, 2024
a232ba4
fix sql injection param
anmarhindi Apr 2, 2024
063e3a0
Fixes heuristic request with most recent payload
JWittmeyer Apr 11, 2024
aa65c0a
Adds max lvl to gql wrap
JWittmeyer Apr 11, 2024
d0c3ad3
Merge remote-tracking branch 'origin/dev' into fastapi
JWittmeyer Apr 11, 2024
d3ce5c6
Adds maxlvl fix
JWittmeyer Apr 11, 2024
6cdb0b6
Merge branch 'dev' into fastapi
anmarhindi Apr 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 46 additions & 7 deletions business_objects/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ def construct_select_columns(
table_schema: Optional[str] = None,
prefix: Optional[str] = None,
exclude_columns: Optional[Union[str, List[str]]] = None,
include_columns: Optional[Union[str, List[str]]] = None,
indent: int = 1,
) -> str:
table_enum: Tablenames = try_parse_enum_value(table, Tablenames)
Expand All @@ -145,13 +146,22 @@ def construct_select_columns(
prefix += "."

column_exclusion = ""
if exclude_columns:
if isinstance(exclude_columns, str):
column_exclusion = f"AND c.column_name != '{exclude_columns}'"
else:
column_exclusion = (
"AND c.column_name NOT IN ('" + "','".join(exclude_columns) + "')"
)
column_inclusion = ""
if exclude_columns or include_columns:
if exclude_columns:
if isinstance(exclude_columns, str):
column_exclusion = f"AND c.column_name != '{exclude_columns}'"
else:
column_exclusion = (
"AND c.column_name NOT IN ('" + "','".join(exclude_columns) + "')"
)
if include_columns:
if isinstance(include_columns, str):
column_inclusion = f"AND c.column_name = '{include_columns}'"
else:
column_inclusion = (
"AND c.column_name IN ('" + "','".join(include_columns) + "')"
)
else:
return prefix + "*"

Expand All @@ -161,10 +171,39 @@ def construct_select_columns(
WHERE table_name = '{table_enum.value}'
AND c.table_schema = '{table_schema}'
{column_exclusion}
{column_inclusion}
ORDER BY ordinal_position
"""

columns = [prefix + r[0] for r in execute_all(query)]
join_on_me = ",\n"
join_on_me += INDENT * indent
return join_on_me.join(columns)


# aimed to create a simple SELECT x,y,z FROM table WHERE condition
def simple_selection_builder(
table: str,
table_schema: Optional[str] = None,
exclude_columns: Optional[Union[str, List[str]]] = None,
include_columns: Optional[Union[str, List[str]]] = None,
where_condition: Optional[str] = None,
order_by: Optional[str] = None,
) -> str:
table_enum: Tablenames = try_parse_enum_value(table, Tablenames)

if table_schema is None:
table_schema = "public"
where = ""
if where_condition:
where = f"WHERE {where_condition}"
order_by_s = ""
if order_by:
order_by_s = f"ORDER BY {order_by}"
return f"""
SELECT
{construct_select_columns(table, table_schema,None, exclude_columns, include_columns)}
FROM {table_schema}.{table_enum.value}
{where}
{order_by_s}
"""
32 changes: 32 additions & 0 deletions business_objects/information_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,3 +591,35 @@ def check_is_active(project_id: str, statistics_id: str) -> bool:
.first()[0]
> 0
)


def get_source_statistics(
project_id: str, heuristic_id: str
) -> List[InformationSourceStatistics]:

query = f"""
SELECT iss.id, iss.true_positives, iss.false_negatives, iss.false_positives, iss.record_coverage, iss.total_hits, iss.source_conflicts,json_build_object('name', ltl.name, 'color', ltl.color,'id', ltl.id) AS labeling_task_label
FROM information_source_statistics iss
JOIN labeling_task_label ltl
ON ltl.id = iss.labeling_task_label_id
WHERE iss.project_id = '{project_id}' AND source_id = '{heuristic_id}'
"""
return general.execute_all(query)


def get_heuristic_id_with_most_recent_payload(project_id: str, heuristic_id: str):
base_columns = general.construct_select_columns("information_source", "public", "h")
query = f"""
SELECT {base_columns}, row_to_json(isp) last_payload
FROM information_source h
LEFT JOIN LATERAL(
SELECT isp.id, isp.created_at, isp.finished_at, isp.state, isp.iteration, isp.progress
FROM information_source_payload isp
WHERE h.id = isp.source_id AND h.project_id = isp.project_id
ORDER BY isp.iteration DESC
LIMIT 1
) isp
ON TRUE
WHERE h.project_id = '{project_id}' AND h.id = '{heuristic_id}'
"""
return general.execute_first(query)
57 changes: 54 additions & 3 deletions business_objects/labeling_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from sqlalchemy.sql.expression import cast
import sqlalchemy

from sqlalchemy.engine.row import Row

from ..util import prevent_sql_injection


Expand Down Expand Up @@ -65,6 +67,53 @@ def get_task_and_label_by_ids_and_type(
return []


def get_labeling_tasks_by_project_id_full(project_id: str) -> Row:
project_id = prevent_sql_injection(project_id, isinstance(project_id, str))
query = f"""
WITH attribute_select AS (
SELECT id, jsonb_build_object('id',id,'name', NAME,'relative_position', relative_position, 'data_type', data_Type) a_data
FROM attribute a
WHERE project_id = '{project_id}'
),
label_select AS (
SELECT labeling_Task_id, jsonb_build_object('edges',array_agg(jsonb_build_object('node',jsonb_build_object('id',id,'name', NAME,'color', color, 'hotkey', hotkey)))) l_data
FROM labeling_task_label ltl
WHERE project_id = '{project_id}'
GROUP BY 1
),
is_select AS (
SELECT labeling_task_id, jsonb_build_object('edges',array_agg(jsonb_build_object('node',jsonb_build_object('id',id,'type', type,'return_type', return_type, 'description', description,'name',NAME)))) i_data
FROM information_source _is
WHERE project_id = '{project_id}'
GROUP BY 1
)

SELECT
'{project_id}' id,
jsonb_build_object('edges',array_agg(jsonb_build_object('node', lt_data))) labeling_tasks
FROM (
SELECT
jsonb_build_object(
'id',lt.id,
'name', NAME,
'task_target', task_target,
'task_type', task_type,
'attribute',a.a_data,
'labels',COALESCE(l.l_data,jsonb_build_object('edges',ARRAY[]::jsonb[])),
'information_sources',COALESCE(i.i_data,jsonb_build_object('edges',ARRAY[]::jsonb[]))
) lt_data
FROM labeling_task lt
LEFT JOIN attribute_select a
ON lt.attribute_id = a.id
LEFT JOIN label_select l
ON l.labeling_Task_id = lt.id
LEFT JOIN is_select i
ON i.labeling_task_id = lt.id
WHERE project_id = '{project_id}'
) x """
return general.execute_first(query)


def get_task_name_id_dict(project_id: str) -> Dict[str, str]:
labeling_tasks = get_all(project_id)
return {labeling_task.name: labeling_task.id for labeling_task in labeling_tasks}
Expand Down Expand Up @@ -283,9 +332,11 @@ def create_multiple(
name=task_name,
project_id=project_id,
attribute_id=attribute_id or None,
task_target=enums.LabelingTaskTarget.ON_WHOLE_RECORD.value
if not attribute_id
else enums.LabelingTaskTarget.ON_ATTRIBUTE.value,
task_target=(
enums.LabelingTaskTarget.ON_WHOLE_RECORD.value
if not attribute_id
else enums.LabelingTaskTarget.ON_ATTRIBUTE.value
),
task_type=enums.LabelingTaskType.CLASSIFICATION.value,
)
tasks.append(labeling_task)
Expand Down
11 changes: 11 additions & 0 deletions business_objects/payload.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,3 +328,14 @@ def remove(
general.flush_or_commit(with_commit)
else:
raise ValueError("Payload does not belong to source")


def get_payload_with_heuristic_type(project_id: str, payload_id: str):
query = f"""
SELECT isp.id,isp.created_at,isp.state,isp.logs,isp.iteration, json_build_object('type', is2."type") as information_source
FROM information_source_payload isp
LEFT JOIN information_source is2
ON is2.id = isp.source_id
WHERE isp.project_id = '{project_id}' AND isp.id = '{payload_id}'
"""
return general.execute_first(query)
138 changes: 136 additions & 2 deletions business_objects/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,104 @@ def get(project_id: str) -> Project:
return session.query(Project).filter(Project.id == project_id).first()


def get_with_labling_tasks_info_attributes(project_id: str) -> Project:
project_id = prevent_sql_injection(project_id, isinstance(project_id, str))
labeling_task_query = __build_sql_labeling_tasks_by_project(project_id)
information_sources_query = __build_sql_information_sources_by_project(project_id)
attributes_query = __build_sql_attributes_by_project(project_id)
data_slice_query = __build_sql_data_slices_by_project(project_id)

return {
"project_id": project_id,
"labeling_tasks": general.execute_first(labeling_task_query)[0],
"information_sources": general.execute_first(information_sources_query)[0],
"attributes": general.execute_first(attributes_query)[0],
"data_slices": general.execute_first(data_slice_query)[0],
}


def __build_sql_labeling_tasks_by_project(project_id: str) -> str:
return f"""
SELECT
json_agg(json_build_object(
'id', labeling_task.id,
'name', labeling_task.name,

'attribute',
CASE
WHEN attribute.id IS NULL THEN NULL
ELSE json_build_object(
'relative_position', attribute.relative_position
)
END
)) AS labeling_task
FROM
project
LEFT JOIN
labeling_task
ON project.id = labeling_task.project_id
LEFT JOIN
attribute
ON labeling_task.attribute_id = attribute.id
WHERE
project.id = '{project_id}'::UUID;
"""


def __build_sql_information_sources_by_project(project_id: str) -> str:
return f"""
SELECT
json_agg(json_build_object(
'id', information_source.id,
'name', information_source.name
)) AS information_sources
FROM
project
LEFT JOIN
information_source
ON project.id = information_source.project_id
WHERE
project.id = '{project_id}'::UUID;
"""


def __build_sql_attributes_by_project(project_id: str) -> str:
return f"""
SELECT
json_agg(json_build_object(
'id', attribute.id,
'name', attribute.name,
'state', attribute.state
)) AS attributes
FROM
project
LEFT JOIN
attribute
ON project.id = attribute.project_id
WHERE
project.id = '{project_id}'::UUID;
"""


def __build_sql_data_slices_by_project(project_id: str) -> str:
return f"""
SELECT
json_agg(json_build_object(
'id', data_slice.id,
'name', data_slice.name,
'slice_type', data_slice.slice_type,
'created_at', data_slice.created_at
)) AS data_slices
FROM
project
LEFT JOIN
data_slice
ON project.id = data_slice.project_id
WHERE
project.id = '{project_id}'::UUID;
"""


def get_org_id(project_id: str) -> str:
if p := get(project_id):
return str(p.organization_id)
Expand All @@ -48,6 +146,13 @@ def get_all(organization_id: str) -> List[Project]:
)


def get_all_by_user_organization_id(organization_id: str) -> List[Project]:
projects = (
session.query(Project).filter(Project.organization_id == organization_id).all()
)
return projects


def get_all_all() -> List[Project]:
return session.query(Project).all()

Expand Down Expand Up @@ -594,6 +699,7 @@ def __build_sql_project_stats(
slice_id: Optional[str] = None,
) -> str:
labeling_task_filter = ""
labeling_task_filter_is = ""
if labeling_task_id:
labeling_task_filter_is = f"AND _is.labeling_task_id = '{labeling_task_id}'"
labeling_task_filter = f"""
Expand Down Expand Up @@ -648,8 +754,7 @@ def __build_sql_project_stats(
WHERE rla.source_type = '{enums.LabelSource.INFORMATION_SOURCE.value}'
GROUP BY rla.source_id
)y
)y)x

)y)x
"""


Expand Down Expand Up @@ -738,3 +843,32 @@ def __get_project_size_sql(project_id: str) -> str:
) x
ORDER BY order_
"""


def get_project_by_project_id_sql(project_id: str) -> Dict[str, Any]:
project_id = prevent_sql_injection(project_id, isinstance(project_id, str))

query = f"""
SELECT row_to_json(y)
FROM (
SELECT
id,
NAME,
description,
NULL AS project_type,
tokenizer,
CASE
WHEN status = 'IN_DELETION' THEN -1
ELSE r_count
END num_data_scale_uploaded
FROM project p,
(
SELECT COUNT(*) r_count FROM record WHERE project_id = '{project_id}'
)x
WHERE p.id = '{project_id}' )y
"""
value = general.execute_first(query)
if value:
return value[0]
else:
return None
2 changes: 1 addition & 1 deletion business_objects/record_label_association.py
Original file line number Diff line number Diff line change
Expand Up @@ -819,7 +819,7 @@ def check_label_duplication_classification(
project_id = prevent_sql_injection(project_id, isinstance(project_id, str))
record_id = prevent_sql_injection(record_id, isinstance(record_id, str))
user_id = prevent_sql_injection(user_id, isinstance(user_id, str))
label_ids = [prevent_sql_injection(li) for li in label_ids]
label_ids = [prevent_sql_injection(li, isinstance(li, str)) for li in label_ids]
label_id_str = "'" + "', '".join(label_ids) + "'"
# sleep a bit to ensure requests went through
time.sleep(0.5)
Expand Down
Loading