code-kern-ai · anmarhindi · Apr 15, 2024 · Mar 15, 2024 · Mar 18, 2024 · Mar 19, 2024
diff --git a/business_objects/general.py b/business_objects/general.py
@@ -132,6 +132,7 @@ def construct_select_columns(
     table_schema: Optional[str] = None,
     prefix: Optional[str] = None,
     exclude_columns: Optional[Union[str, List[str]]] = None,
+    include_columns: Optional[Union[str, List[str]]] = None,
     indent: int = 1,
 ) -> str:
     table_enum: Tablenames = try_parse_enum_value(table, Tablenames)
@@ -145,13 +146,22 @@ def construct_select_columns(
         prefix += "."
 
     column_exclusion = ""
-    if exclude_columns:
-        if isinstance(exclude_columns, str):
-            column_exclusion = f"AND c.column_name != '{exclude_columns}'"
-        else:
-            column_exclusion = (
-                "AND c.column_name NOT IN ('" + "','".join(exclude_columns) + "')"
-            )
+    column_inclusion = ""
+    if exclude_columns or include_columns:
+        if exclude_columns:
+            if isinstance(exclude_columns, str):
+                column_exclusion = f"AND c.column_name != '{exclude_columns}'"
+            else:
+                column_exclusion = (
+                    "AND c.column_name NOT IN ('" + "','".join(exclude_columns) + "')"
+                )
+        if include_columns:
+            if isinstance(include_columns, str):
+                column_inclusion = f"AND c.column_name = '{include_columns}'"
+            else:
+                column_inclusion = (
+                    "AND c.column_name IN ('" + "','".join(include_columns) + "')"
+                )
     else:
         return prefix + "*"
 
@@ -161,10 +171,39 @@ def construct_select_columns(
     WHERE table_name = '{table_enum.value}'
     AND c.table_schema = '{table_schema}'
     {column_exclusion}
+    {column_inclusion}
     ORDER BY ordinal_position
     """
 
     columns = [prefix + r[0] for r in execute_all(query)]
     join_on_me = ",\n"
     join_on_me += INDENT * indent
     return join_on_me.join(columns)
+
+
+# aimed to create a simple SELECT x,y,z FROM table WHERE condition
+def simple_selection_builder(
+    table: str,
+    table_schema: Optional[str] = None,
+    exclude_columns: Optional[Union[str, List[str]]] = None,
+    include_columns: Optional[Union[str, List[str]]] = None,
+    where_condition: Optional[str] = None,
+    order_by: Optional[str] = None,
+) -> str:
+    table_enum: Tablenames = try_parse_enum_value(table, Tablenames)
+
+    if table_schema is None:
+        table_schema = "public"
+    where = ""
+    if where_condition:
+        where = f"WHERE {where_condition}"
+    order_by_s = ""
+    if order_by:
+        order_by_s = f"ORDER BY {order_by}"
+    return f"""
+    SELECT 
+    {construct_select_columns(table, table_schema,None, exclude_columns, include_columns)}
+    FROM {table_schema}.{table_enum.value}
+    {where}
+    {order_by_s}
+    """
diff --git a/business_objects/information_source.py b/business_objects/information_source.py
@@ -591,3 +591,35 @@ def check_is_active(project_id: str, statistics_id: str) -> bool:
         .first()[0]
         > 0
     )
+
+
+def get_source_statistics(
+    project_id: str, heuristic_id: str
+) -> List[InformationSourceStatistics]:
+
+    query = f"""
+        SELECT iss.id, iss.true_positives, iss.false_negatives, iss.false_positives, iss.record_coverage, iss.total_hits, iss.source_conflicts,json_build_object('name', ltl.name, 'color', ltl.color,'id', ltl.id) AS labeling_task_label
+        FROM information_source_statistics iss
+        JOIN labeling_task_label ltl 
+            ON ltl.id = iss.labeling_task_label_id
+        WHERE iss.project_id = '{project_id}' AND source_id = '{heuristic_id}'
+    """
+    return general.execute_all(query)
+
+
+def get_heuristic_id_with_most_recent_payload(project_id: str, heuristic_id: str):
+    base_columns = general.construct_select_columns("information_source", "public", "h")
+    query = f"""
+    SELECT {base_columns}, row_to_json(isp) last_payload        
+    FROM information_source h
+    LEFT JOIN LATERAL(
+        SELECT isp.id, isp.created_at, isp.finished_at, isp.state, isp.iteration, isp.progress
+        FROM information_source_payload isp
+        WHERE h.id = isp.source_id AND h.project_id = isp.project_id
+        ORDER BY isp.iteration DESC
+        LIMIT 1
+    ) isp
+        ON TRUE
+    WHERE h.project_id = '{project_id}' AND h.id = '{heuristic_id}'
+    """
+    return general.execute_first(query)
diff --git a/business_objects/labeling_task.py b/business_objects/labeling_task.py
@@ -8,6 +8,8 @@
 from sqlalchemy.sql.expression import cast
 import sqlalchemy
 
+from sqlalchemy.engine.row import Row
+
 from ..util import prevent_sql_injection
 
 
@@ -65,6 +67,53 @@ def get_task_and_label_by_ids_and_type(
     return []
 
 
+def get_labeling_tasks_by_project_id_full(project_id: str) -> Row:
+    project_id = prevent_sql_injection(project_id, isinstance(project_id, str))
+    query = f"""
+    WITH attribute_select AS (	
+        SELECT id, jsonb_build_object('id',id,'name', NAME,'relative_position', relative_position, 'data_type', data_Type) a_data
+        FROM attribute a
+        WHERE project_id = '{project_id}'
+    ),
+    label_select AS (	
+        SELECT labeling_Task_id, jsonb_build_object('edges',array_agg(jsonb_build_object('node',jsonb_build_object('id',id,'name', NAME,'color', color, 'hotkey', hotkey)))) l_data
+        FROM labeling_task_label ltl
+        WHERE project_id = '{project_id}'
+        GROUP BY 1
+    ), 
+    is_select AS (
+        SELECT labeling_task_id, jsonb_build_object('edges',array_agg(jsonb_build_object('node',jsonb_build_object('id',id,'type', type,'return_type', return_type, 'description', description,'name',NAME)))) i_data
+        FROM information_source _is
+        WHERE project_id = '{project_id}'
+        GROUP BY 1
+    )
+
+    SELECT 
+        '{project_id}' id,
+        jsonb_build_object('edges',array_agg(jsonb_build_object('node', lt_data))) labeling_tasks
+    FROM (
+        SELECT 
+            jsonb_build_object(
+                'id',lt.id,
+                'name', NAME,
+                'task_target', task_target, 
+                'task_type', task_type, 
+                'attribute',a.a_data,
+                'labels',COALESCE(l.l_data,jsonb_build_object('edges',ARRAY[]::jsonb[])),
+                'information_sources',COALESCE(i.i_data,jsonb_build_object('edges',ARRAY[]::jsonb[]))
+            ) lt_data
+        FROM labeling_task lt
+        LEFT JOIN attribute_select a
+            ON lt.attribute_id = a.id
+        LEFT JOIN label_select l
+            ON l.labeling_Task_id = lt.id
+        LEFT JOIN is_select i
+            ON i.labeling_task_id = lt.id
+        WHERE project_id = '{project_id}'
+    ) x """
+    return general.execute_first(query)
+
+
 def get_task_name_id_dict(project_id: str) -> Dict[str, str]:
     labeling_tasks = get_all(project_id)
     return {labeling_task.name: labeling_task.id for labeling_task in labeling_tasks}
@@ -283,9 +332,11 @@ def create_multiple(
             name=task_name,
             project_id=project_id,
             attribute_id=attribute_id or None,
-            task_target=enums.LabelingTaskTarget.ON_WHOLE_RECORD.value
-            if not attribute_id
-            else enums.LabelingTaskTarget.ON_ATTRIBUTE.value,
+            task_target=(
+                enums.LabelingTaskTarget.ON_WHOLE_RECORD.value
+                if not attribute_id
+                else enums.LabelingTaskTarget.ON_ATTRIBUTE.value
+            ),
             task_type=enums.LabelingTaskType.CLASSIFICATION.value,
         )
         tasks.append(labeling_task)

diff --git a/business_objects/payload.py b/business_objects/payload.py
@@ -328,3 +328,14 @@ def remove(
         general.flush_or_commit(with_commit)
     else:
         raise ValueError("Payload does not belong to source")
+
+
+def get_payload_with_heuristic_type(project_id: str, payload_id: str):
+    query = f"""
+    SELECT isp.id,isp.created_at,isp.state,isp.logs,isp.iteration, json_build_object('type', is2."type") as information_source
+    FROM information_source_payload isp 
+    LEFT JOIN information_source is2 
+        ON is2.id = isp.source_id 
+    WHERE isp.project_id = '{project_id}' AND isp.id = '{payload_id}'
+    """
+    return general.execute_first(query)
diff --git a/business_objects/project.py b/business_objects/project.py
@@ -25,6 +25,104 @@ def get(project_id: str) -> Project:
     return session.query(Project).filter(Project.id == project_id).first()
 
 
+def get_with_labling_tasks_info_attributes(project_id: str) -> Project:
+    project_id = prevent_sql_injection(project_id, isinstance(project_id, str))
+    labeling_task_query = __build_sql_labeling_tasks_by_project(project_id)
+    information_sources_query = __build_sql_information_sources_by_project(project_id)
+    attributes_query = __build_sql_attributes_by_project(project_id)
+    data_slice_query = __build_sql_data_slices_by_project(project_id)
+
+    return {
+        "project_id": project_id,
+        "labeling_tasks": general.execute_first(labeling_task_query)[0],
+        "information_sources": general.execute_first(information_sources_query)[0],
+        "attributes": general.execute_first(attributes_query)[0],
+        "data_slices": general.execute_first(data_slice_query)[0],
+    }
+
+
+def __build_sql_labeling_tasks_by_project(project_id: str) -> str:
+    return f"""
+    SELECT
+        json_agg(json_build_object(
+            'id', labeling_task.id,
+            'name', labeling_task.name,
+
+            'attribute', 
+            CASE
+                WHEN attribute.id IS NULL THEN NULL
+                ELSE json_build_object(
+                    'relative_position', attribute.relative_position
+                    )
+            END
+        )) AS labeling_task
+    FROM
+        project
+    LEFT JOIN
+        labeling_task
+            ON project.id = labeling_task.project_id
+    LEFT JOIN
+        attribute
+            ON labeling_task.attribute_id = attribute.id
+    WHERE
+        project.id = '{project_id}'::UUID;
+            """
+
+
+def __build_sql_information_sources_by_project(project_id: str) -> str:
+    return f"""
+    SELECT
+         json_agg(json_build_object(
+            'id', information_source.id,
+            'name', information_source.name
+        )) AS information_sources
+    FROM
+        project
+    LEFT JOIN
+        information_source
+            ON project.id = information_source.project_id
+    WHERE
+        project.id = '{project_id}'::UUID;
+            """
+
+
+def __build_sql_attributes_by_project(project_id: str) -> str:
+    return f"""
+    SELECT
+         json_agg(json_build_object(
+            'id', attribute.id,
+            'name', attribute.name,
+            'state', attribute.state
+        )) AS attributes
+    FROM
+        project
+    LEFT JOIN
+        attribute
+            ON project.id = attribute.project_id
+    WHERE
+        project.id = '{project_id}'::UUID;
+            """
+
+
+def __build_sql_data_slices_by_project(project_id: str) -> str:
+    return f"""
+    SELECT
+         json_agg(json_build_object(
+            'id', data_slice.id,
+            'name', data_slice.name,
+            'slice_type', data_slice.slice_type,
+            'created_at', data_slice.created_at
+        )) AS data_slices
+    FROM
+        project
+    LEFT JOIN
+        data_slice
+            ON project.id = data_slice.project_id
+    WHERE
+        project.id = '{project_id}'::UUID;
+            """
+
+
 def get_org_id(project_id: str) -> str:
     if p := get(project_id):
         return str(p.organization_id)
@@ -48,6 +146,13 @@ def get_all(organization_id: str) -> List[Project]:
     )
 
 
+def get_all_by_user_organization_id(organization_id: str) -> List[Project]:
+    projects = (
+        session.query(Project).filter(Project.organization_id == organization_id).all()
+    )
+    return projects
+
+
 def get_all_all() -> List[Project]:
     return session.query(Project).all()
 
@@ -594,6 +699,7 @@ def __build_sql_project_stats(
     slice_id: Optional[str] = None,
 ) -> str:
     labeling_task_filter = ""
+    labeling_task_filter_is = ""
     if labeling_task_id:
         labeling_task_filter_is = f"AND _is.labeling_task_id = '{labeling_task_id}'"
         labeling_task_filter = f"""
@@ -648,8 +754,7 @@ def __build_sql_project_stats(
             WHERE rla.source_type = '{enums.LabelSource.INFORMATION_SOURCE.value}'
             GROUP BY rla.source_id
         )y
-    )y)x 
-
+    )y)x    
     """
 
 
@@ -738,3 +843,32 @@ def __get_project_size_sql(project_id: str) -> str:
         ) x
         ORDER BY order_
     """
+
+
+def get_project_by_project_id_sql(project_id: str) -> Dict[str, Any]:
+    project_id = prevent_sql_injection(project_id, isinstance(project_id, str))
+
+    query = f"""
+    SELECT row_to_json(y)
+    FROM (
+        SELECT 
+            id,
+            NAME,
+            description,
+            NULL AS project_type,
+            tokenizer,
+            CASE 
+                WHEN status = 'IN_DELETION' THEN -1
+                ELSE r_count
+            END num_data_scale_uploaded
+        FROM project p,
+        (
+            SELECT COUNT(*) r_count FROM record WHERE project_id = '{project_id}' 
+        )x
+        WHERE p.id = '{project_id}' )y
+    """
+    value = general.execute_first(query)
+    if value:
+        return value[0]
+    else:
+        return None
diff --git a/business_objects/record_label_association.py b/business_objects/record_label_association.py
@@ -819,7 +819,7 @@ def check_label_duplication_classification(
     project_id = prevent_sql_injection(project_id, isinstance(project_id, str))
     record_id = prevent_sql_injection(record_id, isinstance(record_id, str))
     user_id = prevent_sql_injection(user_id, isinstance(user_id, str))
-    label_ids = [prevent_sql_injection(li) for li in label_ids]
+    label_ids = [prevent_sql_injection(li, isinstance(li, str)) for li in label_ids]
     label_id_str = "'" + "', '".join(label_ids) + "'"
     # sleep a bit to ensure requests went through
     time.sleep(0.5)