outlier detection - check whether attack happened

tony92151 · Nov 7, 2022 · 2cc2849 · 2cc2849
1 parent cf9d239
commit 2cc2849
Show file tree

Hide file tree

Showing 6 changed files with 38 additions and 30 deletions.
diff --git a/python/fedml/core/security/constants.py b/python/fedml/core/security/constants.py
@@ -14,6 +14,7 @@
 DEFENSE_THREESIGMA_GEOMEDIAN = "3sigma_geo"
 DEFENSE_MULTIKRUM = "multikrum"
 DEFENSE_TRIMMED_MEAN = "trimmed_mean"
+ANOMALY_DETECTION = "anomaly_detection"
 
 ATTACK_METHOD_BYZANTINE_ATTACK = "byzantine"
 ATTACK_METHOD_DLG = "dlg"

diff --git a/python/fedml/core/security/defense/cross_round_defense.py b/python/fedml/core/security/defense/cross_round_defense.py
@@ -13,13 +13,15 @@
 # todo: pretraining round?
 class CrossRoundDefense(BaseDefenseMethod):
     def __init__(self, config):
+        self.potentially_poisoned_worker_list = None
         self.lazy_worker_list = None
         self.potential_malicious_client_idxs = []
-        self.upperbound = 0.95  # cosine similarity > upperbound is defined as ``very limited difference''-> lazy worker
-        self.lowerbound = 0.8  # cosine similarity < lowerbound: attack may happen; need further defense
+        # cosine similarity in [0, 2] 0 means 2 vectors are same
+        self.upperbound = 0.3  # cosine similarity > upperbound: attack may happen; need further defense
+        self.lowerbound = 0.0000001  # cosine similarity < lowerbound is defined as ``very limited difference''-> lazy worker
         self.client_cache = None
-        self.pretraining_round = 2
-        self.potentially_poisoned_worker_list = None
+        self.training_round = 1
+        self.is_attack_existing = True  # for the first round, true
 
     def run(
             self,
@@ -39,41 +41,47 @@ def defend_before_aggregation(
             raw_client_grad_list: List[Tuple[float, Dict]],
             extra_auxiliary_info: Any = None,
     ):
+        self.is_attack_existing = False
+        client_features = self._get_importance_feature(raw_client_grad_list)
+        if self.training_round == 1:
+            self.training_round += 1
+            self.client_cache = client_features
+            return raw_client_grad_list
         self.lazy_worker_list = []
         self.potentially_poisoned_worker_list = []
         # extra_auxiliary_info: global model
         global_model_feature = self._get_importance_feature_of_a_model(
             extra_auxiliary_info
         )
-        client_features = self._get_importance_feature(raw_client_grad_list)
-        if self.client_cache is None:
-            self.client_cache = client_features
         client_wise_scores, global_wise_scores = self.compute_client_cosine_scores(
             client_features, global_model_feature
         )
+        print(f"client_wise_scores = {client_wise_scores}")
+        print(f"global_wise_scores = {global_wise_scores}")
 
         for i in range(len(client_wise_scores)):
             if (
-                    client_wise_scores[i] > self.upperbound
-                    or global_wise_scores[i] > self.upperbound
+                    client_wise_scores[i] < self.lowerbound
+                    or global_wise_scores[i] < self.lowerbound
             ):
                 self.lazy_worker_list.append(i)  # will be directly kicked out later
             elif (
-                    client_wise_scores[i] < self.lowerbound
-                    or global_wise_scores[i] < self.upperbound
+                    client_wise_scores[i] > self.upperbound
+                    or global_wise_scores[i] > self.upperbound
             ):
+                self.is_attack_existing = True
                 self.potentially_poisoned_worker_list.append(i)
 
         for i in range(len(client_features) - 1, -1, -1):
-            if i in self.lazy_worker_list:
-                raw_client_grad_list.pop(i)
-            elif i not in self.potentially_poisoned_worker_list:
+            # if i in self.lazy_worker_list:
+            #     raw_client_grad_list.pop(i)
+            if i not in self.potentially_poisoned_worker_list:
                 self.client_cache[i] = client_features[i]
+        self.training_round += 1
+        print(f"self.potentially_poisoned_worker_list = {self.potentially_poisoned_worker_list}")
+        print(f"self.lazy_worker_list = {self.lazy_worker_list}")
         return raw_client_grad_list
 
-    def is_attack_existing(self):
-        return self.potentially_poisoned_worker_list.size() > 0
-
     def compute_client_cosine_scores(self, client_features, global_model_feature):
         client_wise_scores = []
         global_wise_scores = []

diff --git a/python/fedml/core/security/defense/foolsgold_defense.py b/python/fedml/core/security/defense/foolsgold_defense.py
@@ -1,6 +1,5 @@
 from typing import Callable, List, Tuple, Dict, Any
 import numpy as np
-from scipy import spatial
 from .defense_base import BaseDefenseMethod
 
 """
@@ -56,13 +55,9 @@ def defend_before_aggregation(
     # Takes in grad, compute similarity, get weightings
     @classmethod
     def fools_gold_score(cls, feature_vec_list):
+        import sklearn.metrics.pairwise as smp
         n_clients = len(feature_vec_list)
-        cs = np.zeros((n_clients, n_clients))
-        for i in range(n_clients):
-            for j in range(n_clients):
-                cs[i][j] = 1 - spatial.distance.cosine(feature_vec_list[i], feature_vec_list[j])
-        cs -= np.eye(n_clients)
-        # cs = smp.cosine_similarity(feature_vec_list) - np.eye(n_clients)
+        cs = smp.cosine_similarity(feature_vec_list) - np.eye(n_clients)
         maxcs = np.max(cs, axis=1)
         # pardoning
         for i in range(n_clients):

diff --git a/python/fedml/core/security/defense/outlier_detection.py b/python/fedml/core/security/defense/outlier_detection.py
@@ -28,6 +28,6 @@ def defend_before_aggregation(
             extra_auxiliary_info: Any = None,
     ):
         client_grad_list = self.cross_round_check.defend_before_aggregation(raw_client_grad_list, extra_auxiliary_info)
-        if self.cross_round_check.is_attack_existing():
-            client_grad_list = self.cross_round_check.defend_before_aggregation(client_grad_list, extra_auxiliary_info)
+        if self.cross_round_check.is_attack_existing:
+            client_grad_list = self.three_sigma_check.defend_before_aggregation(client_grad_list, extra_auxiliary_info)
         return client_grad_list
diff --git a/python/fedml/core/security/defense/three_sigma_krum_defense.py b/python/fedml/core/security/defense/three_sigma_krum_defense.py
@@ -119,12 +119,12 @@ def compute_avg_with_krum(self, raw_client_grad_list):
         importance_feature_list = self._get_importance_feature(raw_client_grad_list)
         krum_scores = compute_krum_score(
             importance_feature_list,
-            client_num_after_trim=math.floor(len(raw_client_grad_list) / 2),
+            client_num_after_trim=math.ceil(len(raw_client_grad_list) / 2) - 1,
         )
         score_index = torch.argsort(
             torch.Tensor(krum_scores)
         ).tolist()  # indices; ascending
-        score_index = score_index[0 : math.floor(len(raw_client_grad_list) / 2)]
+        score_index = score_index[0: math.ceil(len(raw_client_grad_list) / 2) - 1]
         honest_importance_feature_list = [
             importance_feature_list[i] for i in score_index
         ]

diff --git a/python/fedml/core/security/fedml_defender.py b/python/fedml/core/security/fedml_defender.py
@@ -3,6 +3,7 @@
 from .defense.RFA_defense import RFADefense
 from .defense.coordinate_wise_trimmed_mean_defense import CoordinateWiseTrimmedMeanDefense
 from .defense.crfl_defense import CRFLDefense
+from .defense.outlier_detection import OutlierDetection
 from .defense.three_sigma_defense import ThreeSigmaDefense
 from .defense.three_sigma_geomedian_defense import ThreeSigmaGeoMedianDefense
 from .defense.three_sigma_krum_defense import ThreeSigmaKrumDefense
@@ -30,7 +31,7 @@
     DEFENSE_MULTIKRUM,
     DEFENSE_TRIMMED_MEAN,
     DEFENSE_THREESIGMA_GEOMEDIAN,
-    DEFENSE_THREESIGMA_KRUM,
+    DEFENSE_THREESIGMA_KRUM, ANOMALY_DETECTION,
 )
 
 
@@ -85,6 +86,8 @@ def init(self, args):
                 self.defender = CRFLDefense(args)
             elif self.defense_type == DEFENSE_TRIMMED_MEAN:
                 self.defender = CoordinateWiseTrimmedMeanDefense(args)
+            elif self.defense_type == ANOMALY_DETECTION:
+                self.defender = OutlierDetection(args)
             else:
                 raise Exception("args.defense_type is not defined!")
         else:
@@ -137,7 +140,8 @@ def is_defense_before_aggregation(self):
             DEFENSE_THREESIGMA_KRUM,
             DEFENSE_KRUM,
             DEFENSE_MULTIKRUM,
-            DEFENSE_TRIMMED_MEAN
+            DEFENSE_TRIMMED_MEAN,
+            ANOMALY_DETECTION
         ]
 
     def is_defense_after_aggregation(self):