Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add XPIA simulator and evaluator #3703

Closed
wants to merge 52 commits into from
Closed
Show file tree
Hide file tree
Changes from 46 commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
3366f03
preemptive ECI and IP simulator support
MilesHolland Aug 12, 2024
8e65d72
make ECI sim enum private
MilesHolland Aug 12, 2024
cccc2e7
Merge branch 'main' into feature/add-eci-and-ip-simulators
MilesHolland Aug 16, 2024
70843d3
add eci/ip evals v1
MilesHolland Aug 20, 2024
40384fa
changes
MilesHolland Aug 23, 2024
2388488
docstrings and rename ECI
MilesHolland Aug 23, 2024
2a7085d
update eci in api call, and record eval tests
MilesHolland Aug 23, 2024
9589ab0
update test runnage
MilesHolland Aug 23, 2024
41241dc
Merge branch 'main' into feature/add-eci-and-ip-simulators
MilesHolland Aug 23, 2024
fd2d292
rename package and fix examples
MilesHolland Aug 26, 2024
c4f73aa
update CL
MilesHolland Aug 26, 2024
3a8b5bf
more pr comments
MilesHolland Aug 27, 2024
1bf15bf
update recording
MilesHolland Aug 27, 2024
7f8a487
update eci test with recording
MilesHolland Aug 27, 2024
985202b
update sim tests
MilesHolland Aug 27, 2024
d8d2ebd
Merge branch 'main' into feature/add-eci-and-ip-simulators
MilesHolland Aug 27, 2024
a247f7a
xpia eval
MilesHolland Aug 28, 2024
0b48b02
remove docstring todo
MilesHolland Aug 28, 2024
bfca026
Merge branch 'feature/add-eci-and-ip-simulators' into feature/xpia-si…
MilesHolland Aug 28, 2024
157d764
Resolve merge conflicts
diondrapeck Aug 28, 2024
aee2f70
Rename XPIA to IndrectAttack
diondrapeck Aug 28, 2024
3f3e8d0
Add XPIA Simulator
diondrapeck Aug 29, 2024
854b952
Remove content safety reference
diondrapeck Aug 30, 2024
2677576
Add skip label to test
diondrapeck Aug 30, 2024
21844e4
Parse xpia response
diondrapeck Aug 30, 2024
285d214
Add context to evaluator
diondrapeck Aug 30, 2024
7f32bb7
revert adding demo
diondrapeck Aug 30, 2024
97e7d40
Change evaluator to follow chat protocol and accept conversation
diondrapeck Aug 30, 2024
e92adbc
Update docstring with example
diondrapeck Aug 30, 2024
6883f05
Update evaluation parsing and aggregation
diondrapeck Sep 2, 2024
735734c
Hide and update jailbreak param on adversarial simulator
diondrapeck Sep 2, 2024
3044851
Update IndirectAttackSimulator and IndirectAttackEvaluator docstring
diondrapeck Sep 2, 2024
acdf191
Update CHANGELOG
diondrapeck Sep 2, 2024
087d976
Add Q/A functionality
diondrapeck Sep 3, 2024
ed1f319
Fix evaluator docstring
diondrapeck Sep 3, 2024
19754e2
Merge branch 'main' into feature/xpia-sim-and-eval
diondrapeck Sep 3, 2024
0dc7275
Merge branch 'main' into feature/xpia-sim-and-eval
diondrapeck Sep 3, 2024
1eee767
Add xpia scenario
diondrapeck Sep 3, 2024
536bb8e
Update tests
diondrapeck Sep 3, 2024
6675a45
Merge branch 'feature/xpia-sim-and-eval' of https://github.com/micros…
diondrapeck Sep 3, 2024
cc596a0
Fix logging error
diondrapeck Sep 3, 2024
e33ec78
Ignore flake8 suggestion
diondrapeck Sep 3, 2024
aec16db
Updated tests to use new _jailbreak_type param
diondrapeck Sep 3, 2024
7c3a8c5
Update evaluator test
diondrapeck Sep 3, 2024
9889519
Record tests
diondrapeck Sep 4, 2024
52c8756
Update xpia simulator to return only one dataset
diondrapeck Sep 4, 2024
7c7f7f5
Add exception for conversation + q/a and add breaking change warning …
diondrapeck Sep 4, 2024
b4fdc3a
Add exception for conversation + q/a and add breaking change warning …
diondrapeck Sep 4, 2024
4506c49
Resolve merge conflicts
diondrapeck Sep 4, 2024
6ecc214
Update test
diondrapeck Sep 4, 2024
c37f570
Replace jailbreak param
diondrapeck Sep 4, 2024
70fc768
Resolve merge conflicts
diondrapeck Sep 4, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 50 additions & 47 deletions .cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -54,65 +54,66 @@
"benchmark/promptflow-serve/result-archive/**"
],
"words": [
"aoai",
"amlignore",
"mldesigner",
"faiss",
"serp",
"aoai",
"Apim",
"astext",
"attribited",
"azureai",
"azurecr",
"azureml",
"mlflow",
"vnet",
"openai",
"pfazure",
"azuremlsdktestpypi",
"Bhavik",
"centralus",
"chatml",
"cref",
"e2etest",
"e2etests",
"eastus",
"azureai",
"vectordb",
"Qdrant",
"Weaviate",
"Entra",
"env",
"e2etests",
"e2etest",
"tablefmt",
"logprobs",
"logit",
"faiss",
"geval",
"hnsw",
"chatml",
"UNLCK",
"junit",
"KHTML",
"Likert",
"llmlingua",
"logit",
"logprobs",
"meid",
"mgmt",
"MistralAI",
"mldesigner",
"mlflow",
"msal",
"msrest",
"myconn",
"numlines",
"azurecr",
"centralus",
"nunit",
"openai",
"pfazure",
"pfbytes",
"pfcli",
"pfutil",
"Policheck",
"azuremlsdktestpypi",
"rediraffe",
"pydata",
"ROBOCOPY",
"undoc",
"Qdrant",
"rediraffe",
"retriable",
"pfcli",
"pfutil",
"mgmt",
"wsid",
"westus",
"msrest",
"cref",
"msal",
"pfbytes",
"Apim",
"junit",
"nunit",
"astext",
"Likert",
"geval",
"ROBOCOPY",
"serp",
"Summ",
"Bhavik",
"meid",
"Entra",
"tablefmt",
"undoc",
"UNLCK",
"uvicorn",
"attribited",
"MistralAI",
"llmlingua",
"myconn"
"vectordb",
"vnet",
"Weaviate",
"westus",
"wsid",
"Xpia"
],
"ignoreWords": [
"openmpi",
Expand Down Expand Up @@ -242,6 +243,8 @@
"azureopenaimodelconfiguration",
"openaimodelconfiguration",
"usecwd",
"upia",
"xpia",
"locustio",
"euap",
"Rerank",
Expand Down
2 changes: 2 additions & 0 deletions src/promptflow-evals/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
## v0.3.3 (Upcoming)
### Features Added
- Add a new evaluator (ProtectedMaterialsEvaluator) and associated adversarial content simulator enum type (AdversarialScenario.ADVERSARIAL_CONTENT_PROTECTED_MATERIAL) for protected materials, which determines if given inputs contain materials protected by IP laws.
- Introduced `IndirectAttackSimulator` to simulate XPIA (cross domain prompt injected attack) jailbreak attacks on your AI system.
- Introduced `IndirectAttackEvaluator` to evaluate content for the presence of XPIA (cross domain prompt injected attacks) injected into conversation or Q/A context to interrupt normal expected functionality by eliciting manipulated content, intrusion and attempting to gather information outside the scope of your AI system.

### Bugs Fixed
- Fixed evaluators to accept (non-Azure) Open AI Configs.
Expand Down
2 changes: 2 additions & 0 deletions src/promptflow-evals/promptflow/evals/_common/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class Tasks:

CONTENT_HARM = "content harm"
PROTECTED_MATERIAL = "protected material"
XPIA = "xpia"


class _InternalAnnotationTasks:
Expand All @@ -52,6 +53,7 @@ class EvaluationMetrics:
SELF_HARM = "self_harm"
SEXUAL = "sexual"
PROTECTED_MATERIAL = "protected_material"
XPIA = "xpia"


class _InternalEvaluationMetrics:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,9 @@ def generate_payload(normalized_user_text: str, metric: str) -> Dict:
elif metric == _InternalEvaluationMetrics.ECI:
task = _InternalAnnotationTasks.ECI
include_metric = False
elif metric == EvaluationMetrics.XPIA:
task = Tasks.XPIA
include_metric = False
return (
{
"UserTextList": [normalized_user_text],
Expand Down Expand Up @@ -208,7 +211,7 @@ def parse_response( # pylint: disable=too-many-branches,too-many-statements
:rtype: List[List[Dict]]
"""

if metric_name in {EvaluationMetrics.PROTECTED_MATERIAL, _InternalEvaluationMetrics.ECI}:
if metric_name in {EvaluationMetrics.PROTECTED_MATERIAL, _InternalEvaluationMetrics.ECI, EvaluationMetrics.XPIA}:
if not batch_response or len(batch_response[0]) == 0 or metric_name not in batch_response[0]:
return {}
response = batch_response[0][metric_name]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from ._qa import QAEvaluator
from ._relevance import RelevanceEvaluator
from ._similarity import SimilarityEvaluator
from ._xpia import IndirectAttackEvaluator

__all__ = [
"CoherenceEvaluator",
Expand All @@ -36,4 +37,5 @@
"ContentSafetyEvaluator",
"ContentSafetyChatEvaluator",
"ProtectedMaterialsEvaluator",
"IndirectAttackEvaluator",
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from ._xpia import IndirectAttackEvaluator

__all__ = [
"IndirectAttackEvaluator",
]
Loading
Loading