Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions app/data/RiskAssessment.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Class used to create Risk Assessment examples with methods to create LLM prompts specific to the risk assessment example from LLM prompt templates

from typing import Type

from ..utils.LLMCaller import *
Expand Down
4 changes: 1 addition & 3 deletions app/data/example_risk_assessments.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
# Learnings:
# 1. Keeping a safe distance away from a possible projectile is a prevention measure.
# The hazard event is therefore the projectile hitting someone, not the projectile being released.
# Risk Assessments used to test the accuracy of LLM prompts

import numpy as np

Expand Down
113 changes: 0 additions & 113 deletions app/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ class Params(TypedDict):
is_feedback_text: bool
is_risk_matrix: bool
is_risk_assessment: bool
are_all_input_fields_entered_manually: bool
LLM: str

def provide_feedback_on_risk_matrix(response):
Expand Down Expand Up @@ -237,7 +236,6 @@ def evaluation_function(response: Any, answer: Any, params: Params) -> Result:
LLM_name = params["LLM"]
LLM = LLM_dictionary[LLM_name]

if params['are_all_input_fields_entered_manually'] == True:
activity, hazard, how_it_harms, who_it_harms, uncontrolled_likelihood, uncontrolled_severity, uncontrolled_risk, prevention, mitigation, controlled_likelihood, controlled_severity, controlled_risk = np.array(response).flatten()

RA = RiskAssessment(activity=activity, hazard=hazard, who_it_harms=who_it_harms, how_it_harms=how_it_harms,
Expand Down Expand Up @@ -393,115 +391,4 @@ def evaluation_function(response: Any, answer: Any, params: Params) -> Result:
{feedback_for_correct_answers} \n\n\n\n\n
{no_information_provided_message}'''

return Result(is_correct=is_everything_correct, feedback=feedback)

if params['are_all_input_fields_entered_manually'] == False:

prevention, mitigation = np.array(response).flatten()

activity = 'Heat transfer lab'
hazard = 'Boiling hot water'
who_it_harms = 'Students'
how_it_harms = 'Burns'

hazard_event = 'Boiling hot water split on student'
harm_caused = 'Burns'

RA = RiskAssessment(activity=activity, hazard=hazard, who_it_harms=who_it_harms, how_it_harms=how_it_harms,
uncontrolled_likelihood=1, uncontrolled_severity=1,
uncontrolled_risk=1, prevention=prevention, mitigation=mitigation,
controlled_likelihood=1, controlled_severity=1, controlled_risk=1,
prevention_prompt_expected_class='prevention', mitigation_prompt_expected_class='mitigation', risk_domain='')

input_check_feedback_message = RA.get_input_check_feedback_message()

if input_check_feedback_message != True:
return Result(is_correct=False,
feedback=f'''\n\n\n\n\n # Feedback:\n\n\n\n\n
\n\n\n\n\n## {input_check_feedback_message}\n\n\n\n\n''')

feedback_for_incorrect_answers = '\n\n\n\n# Feedback for Incorrect Answers\n\n\n\n'
feedback_for_correct_answers = '\n\n\n\n# Feedback for Correct Answers\n\n\n\n'

fields_for_which_no_information_provided = []

is_everything_correct = True

# PREVENTION CHECKS
no_information_provided_for_prevention_prompt_input = RA.get_no_information_provided_for_prevention_input()
no_information_provided_for_prevention_prompt_output, no_information_provided_for_prevention_pattern = RA.get_prompt_output_and_pattern_matched(prompt_input_object=no_information_provided_for_prevention_prompt_input, LLM_caller=LLM)

if no_information_provided_for_prevention_pattern == 'no information provided' or RA.prevention == '':
fields_for_which_no_information_provided.append('Prevention')

else:

control_measure_prompt_with_prevention_input = RA.get_control_measure_prompt_with_prevention_input()
control_measure_prompt_with_prevention_output, control_measure_prompt_with_prevention_pattern = RA.get_prompt_output_and_pattern_matched(prompt_input_object=control_measure_prompt_with_prevention_input,
LLM_caller=LLM,
harm_caused=harm_caused,
hazard_event=hazard_event)

feedback_for_correct_answers, feedback_for_incorrect_answers, is_everything_correct = provide_feedback_on_control_measure_input(
control_measure_input_field='prevention',
control_measure_prompt_input=control_measure_prompt_with_prevention_input,
control_measure_prompt_output=control_measure_prompt_with_prevention_output,
control_measure_prompt_pattern=control_measure_prompt_with_prevention_pattern,
feedback_for_correct_answers=feedback_for_correct_answers,
feedback_for_incorrect_answers=feedback_for_incorrect_answers,
is_everything_correct=is_everything_correct,
risk_assessment=RA,
LLM_caller=LLM
)

# MITIGATION CHECKS
no_information_provided_for_mitigation_prompt_input = RA.get_no_information_provided_for_mitigation_input()
no_information_provided_for_mitigation_prompt_output, no_information_provided_for_mitigation_pattern = RA.get_prompt_output_and_pattern_matched(prompt_input_object=no_information_provided_for_mitigation_prompt_input, LLM_caller=LLM)

if no_information_provided_for_mitigation_pattern == 'no information provided' or RA.mitigation == '':
fields_for_which_no_information_provided.append('Mitigation')
else:

control_measure_prompt_with_mitigation_input = RA.get_control_measure_prompt_with_mitigation_input()
control_measure_prompt_with_mitigation_output, control_measure_prompt_with_mitigation_pattern = RA.get_prompt_output_and_pattern_matched(prompt_input_object=control_measure_prompt_with_mitigation_input,
LLM_caller=LLM,
harm_caused=harm_caused,
hazard_event=hazard_event)

feedback_for_correct_answers, feedback_for_incorrect_answers, is_everything_correct = provide_feedback_on_control_measure_input(
control_measure_input_field='mitigation',
control_measure_prompt_input=control_measure_prompt_with_mitigation_input,
control_measure_prompt_output=control_measure_prompt_with_mitigation_output,
control_measure_prompt_pattern=control_measure_prompt_with_mitigation_pattern,
feedback_for_correct_answers=feedback_for_correct_answers,
feedback_for_incorrect_answers=feedback_for_incorrect_answers,
is_everything_correct=is_everything_correct,
risk_assessment=RA,
LLM_caller=LLM
)

if is_everything_correct == True:
feedback_for_incorrect_answers = '# Congratulations! All your answers are correct!'

if fields_for_which_no_information_provided == []:
no_information_provided_message = ''
else:
no_information_provided_message = f'\n\n\n\n\n## Fields for which no information is provided and hence no feedback given: {", ".join(fields_for_which_no_information_provided)}\n\n\n\n\n'

if fields_for_which_no_information_provided != ['Prevention', 'Mitigation']:
hazard_event_and_harm_caused_inferred_message = f'''## The following were inferred from your answers: \n\n\n\n\n
\n\n\n\n\n### Event that leads to harm: "{hazard_event}"\n\n\n\n\n
\n\n\n\n\n### Harm caused to '{RA.who_it_harms}': "{harm_caused}".\n\n\n\n
\n\n\n\n\n### If they are incorrect, please make these more explicit in the "Hazard" and "How it harms" fields.\n\n\n\n\n'''
else:
hazard_event_and_harm_caused_inferred_message = ''

feedback_for_correct_answers += f'''
\n\n\n\n### There are no errors in your likelihood, severity, and risk values.\n\n\n\n'''

feedback=f'''{hazard_event_and_harm_caused_inferred_message} \n\n\n\n\n
{feedback_for_incorrect_answers} \n\n\n\n\n
{feedback_for_correct_answers} \n\n\n\n\n
{no_information_provided_message}'''

return Result(is_correct=is_everything_correct, feedback=feedback)
2 changes: 2 additions & 0 deletions app/prompts/BasePromptInput.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Base class that other PromptInput classes inherit from.

try:
from utils.RegexPatternMatcher import RegexPatternMatcher
except:
Expand Down
2 changes: 2 additions & 0 deletions app/prompts/ControlMeasureClassification.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# PromptInput class used to classify a control measure as either a prevention, mitigation, both or neither. This prompt takes the "event that leads to harm" and the "harm caused" as input.

from ..prompts.BasePromptInput import BasePromptInput
from ..utils.RegexPatternMatcher import RegexPatternMatcher

Expand Down
2 changes: 2 additions & 0 deletions app/prompts/HarmCausedAndHazardEvent.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# PromptInput class used to infer the "event that leads to harm" and the "harm caused" from the student's risk assessment inputs.

from .BasePromptInput import BasePromptInput

class HarmCausedAndHazardEvent(BasePromptInput):
Expand Down
2 changes: 2 additions & 0 deletions app/prompts/HowItHarmsInContext.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# PromptInput class that checks whether the "How it harms" input matches the "activity" and "hazard" inputs.

from ..prompts.BasePromptInput import BasePromptInput
from ..utils.RegexPatternMatcher import RegexPatternMatcher

Expand Down
2 changes: 2 additions & 0 deletions app/prompts/NoInformationProvided.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# PromptInput class that checks whether no information is provided in the "prevention" or "mitigation" input fields.

from .BasePromptInput import BasePromptInput

class NoInformationProvided(BasePromptInput):
Expand Down
2 changes: 2 additions & 0 deletions app/prompts/SummarizeControlMeasureFeedback.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# PromptInput class that takes in the output from the ControlMeasureClassification prompt and shortens it to 3 sentences.

from ..prompts.BasePromptInput import BasePromptInput

class SummarizeControlMeasureFeedback(BasePromptInput):
Expand Down
2 changes: 2 additions & 0 deletions app/prompts/WhoItHarmsInContext.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# PromptInput class that checks whether the "Who it harms" input matches the "activity", "hazard" and "how it harms" inputs.

from ..prompts.BasePromptInput import BasePromptInput
from ..utils.RegexPatternMatcher import RegexPatternMatcher

Expand Down
2 changes: 2 additions & 0 deletions app/test_classes/BaseTestClass.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Base class used to test the accuracy of different prompts

from ..utils.LLMCaller import LLMCaller
from ..utils.RegexPatternMatcher import RegexPatternMatcher

Expand Down
2 changes: 2 additions & 0 deletions app/test_classes/TestBothPreventionAndMitigationInput.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Builds on TestControlMeasureClassificationPrompt.py to test % of times both prevention and mitigation correctly classified.

from ..test_classes.TestControlMeasureClassificationPrompt import TestControlMeasureClassificationPrompt
from ..utils.LLMCaller import LLMCaller
import numpy as np
Expand Down
2 changes: 2 additions & 0 deletions app/test_classes/TestControlMeasureClassificationPrompt.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Builds on TestModelAccuracyForCombinationOfPrompts.py to test accuracy of control measure classification.

from ..test_classes.TestModelAccuracyForCombinationOfPrompts import TestModelAccuracyForCombinationOfPrompts
from ..utils.LLMCaller import LLMCaller

Expand Down
2 changes: 2 additions & 0 deletions app/test_classes/TestModelAccuracy.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Builds on BaseTestClass to allow testing of multiple risk assessment examples (from data/example_risk_assessments.py)

from ..test_classes.BaseTestClass import BaseTestClass
from ..utils.LLMCaller import LLMCaller
import pandas as pd
Expand Down
2 changes: 2 additions & 0 deletions app/test_classes/TestModelAccuracyForCombinationOfPrompts.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Builds on TestModelAccuracy class to test the accuracy of multiple prompts used in sequence, e.g. the HarmCausedAndHazardEvent and ControlMeasureClassification prompts.

from ..test_classes.TestModelAccuracy import TestModelAccuracy
from ..utils.LLMCaller import LLMCaller

Expand Down
2 changes: 2 additions & 0 deletions app/test_classes/TestPromptOnSingleExample.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Builds on BaseTestClass.py to test prompt on single risk assessment example (this is used in unit tests)

from BaseTestClass import BaseTestClass
from ..utils.LLMCaller import LLMCaller

Expand Down
3 changes: 2 additions & 1 deletion app/test_scripts/risk_domain_test_for_how_it_harms_prompt.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# python -m app.test_scripts.risk_domain_test_for_how_it_harms_prompt
# Script that tests whether the "how it harms" input is from the same risk domain as the "activity" and "hazard" inputs.
# To run, enter in terminal: python -m app.test_scripts.risk_domain_test_for_how_it_harms_prompt

from ..test_classes.TestModelAccuracy import TestModelAccuracy
from ..test_utils.ExamplesGenerator import ExamplesGeneratorFromCorrectExamples
Expand Down
4 changes: 3 additions & 1 deletion app/test_scripts/risk_domain_test_for_who_it_harms_prompt.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# python -m app.test_scripts.risk_domain_test_for_who_it_harms_prompt
# Script that tests whether the "who it harms" input is from the same risk domain as the "activity" and "hazard" inputs.

# To run, enter in terminal: python -m app.test_scripts.risk_domain_test_for_who_it_harms_prompt

from ..test_classes.TestModelAccuracy import TestModelAccuracy
from ..test_utils.ExamplesGenerator import ExamplesGeneratorFromCorrectExamples
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# python -m app.test_scripts.test_control_measure_classification_prompts
# To run, enter in terminal: python -m app.test_scripts.test_control_measure_classification_prompts

# Script that tests the accuracy of control measure classification prompt and performs ablation study to see impact of few-shot and chain-of-thought prompting on accuracy.

from ..test_classes.TestBothPreventionAndMitigationInput import TestBothPreventionAndMitigationInput
from ..test_classes.TestPreventionInput__ControlMeasureClassifiationPrompt import TestPreventionInput__ControlMeasureClassifiationPrompt
Expand Down

This file was deleted.

4 changes: 3 additions & 1 deletion app/test_scripts/test_latency.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# python -m app.test_scripts.test_latency
# Script which tests the latency of different LLMs

# To run, enter in terminal: python -m app.test_scripts.test_latency

from ..utils.LLMCaller import *
from ..evaluation import evaluation_function, Params
Expand Down
4 changes: 3 additions & 1 deletion app/test_scripts/test_no_information_provided.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# python -m app.test_scripts.test_no_information_provided
# Script which tests the accuracy of the NoInformationProvided.py prompt

# To run, enter in terminal: python -m app.test_scripts.test_no_information_provided

from ..test_utils.InputAndExpectedOutput import InputAndExpectedOutputForSinglePrompt
from ..utils.LLMCaller import *
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# python -m app.test_scripts.test_summarize_control_measure_feedback_prompt
# To run, enter in terminal: python -m app.test_scripts.test_summarize_control_measure_feedback_prompt

from ..utils.LLMCaller import *
from ..test_classes.TestSummarizeControlMeasureFeedback import TestSummarizePreventionFeedback, TestSummarizeMitigationFeedback
Expand Down
2 changes: 2 additions & 0 deletions app/test_utils/ExamplesGenerator.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Series of classes used to create InputAndExpectedOutput objects for different tests.

from .InputAndExpectedOutput import InputAndExpectedOutputForSinglePrompt, InputAndExpectedOutputForCombinedPrompts

class ExamplesGenerator:
Expand Down
2 changes: 2 additions & 0 deletions app/test_utils/InputAndExpectedOutput.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Class which contains a prompt input object and the expected output for the prompt.

try:
from ..prompts.BasePromptInput import BasePromptInput
from ..data.RiskAssessment import RiskAssessment
Expand Down
Loading