Skip to content

Commit

Permalink
Fixing Pylint errors.
Browse files Browse the repository at this point in the history
  • Loading branch information
trekhleb committed Dec 24, 2018
1 parent d727046 commit 3b89d57
Show file tree
Hide file tree
Showing 10 changed files with 71 additions and 111 deletions.
28 changes: 14 additions & 14 deletions homemade/anomaly_detection/gaussian_anomaly_detection.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Anomaly Detection Module"""

import numpy as np
import math
import numpy as np


class GaussianAnomalyDetection:
Expand All @@ -11,15 +11,15 @@ def __init__(self, data):
"""GaussianAnomalyDetection constructor"""

# Estimate Gaussian distribution.
(self.mu, self.sigma_squared) = GaussianAnomalyDetection.estimate_gaussian(data)
(self.mu_param, self.sigma_squared) = GaussianAnomalyDetection.estimate_gaussian(data)

# Save training data.
self.data = data

def multivariate_gaussian(self, data):
"""Computes the probability density function of the multivariate gaussian distribution"""

mu = self.mu
mu_param = self.mu_param
sigma_squared = self.sigma_squared

# Get number of training sets and features.
Expand All @@ -32,9 +32,9 @@ def multivariate_gaussian(self, data):
for example_index in range(num_examples):
for feature_index in range(num_features):
# Calculate the power of e.
e_power_dividend = (data[example_index, feature_index] - mu[feature_index]) ** 2
e_power_divider = 2 * sigma_squared[feature_index]
e_power = -1 * e_power_dividend / e_power_divider
power_dividend = (data[example_index, feature_index] - mu_param[feature_index]) ** 2
power_divider = 2 * sigma_squared[feature_index]
e_power = -1 * power_dividend / power_divider

# Calculate the prefix multiplier.
probability_prefix = 1 / math.sqrt(2 * math.pi * sigma_squared[feature_index])
Expand All @@ -51,14 +51,14 @@ def estimate_gaussian(data):
"""This function estimates the parameters of a Gaussian distribution using the data in X."""

# Get number of features and number of examples.
(num_examples, num_features) = data.shape
num_examples = data.shape[0]

# Estimate Gaussian parameters mu and sigma_squared for every feature.
mu = (1 / num_examples) * np.sum(data, axis=0)
sigma_squared = (1 / num_examples) * np.sum((data - mu) ** 2, axis=0)
mu_param = (1 / num_examples) * np.sum(data, axis=0)
sigma_squared = (1 / num_examples) * np.sum((data - mu_param) ** 2, axis=0)

# Return Gaussian parameters.
return mu, sigma_squared
return mu_param, sigma_squared

@staticmethod
def select_threshold(labels, probabilities):
Expand Down Expand Up @@ -104,15 +104,15 @@ def select_threshold(labels, probabilities):
recall = true_positives / (true_positives + false_negatives)

# F1.
f1 = 2 * precision * recall / (precision + recall)
f1_score = 2 * precision * recall / (precision + recall)

# Save history data.
precision_history.append(precision)
recall_history.append(recall)
f1_history.append(f1)
f1_history.append(f1_score)

if f1 > best_f1:
if f1_score > best_f1:
best_epsilon = epsilon
best_f1 = f1
best_f1 = f1_score

return best_epsilon, best_f1, precision_history, recall_history, f1_history
6 changes: 3 additions & 3 deletions homemade/k_means/k_means.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def train(self, max_iterations):
closest_centroids_ids = np.empty((num_examples, 1))

# Run K-Means.
for iteration_index in range(max_iterations):
for _ in range(max_iterations):
# Find the closest centroids for training examples.
closest_centroids_ids = KMeans.centroids_find_closest(self.data, centroids)

Expand Down Expand Up @@ -108,8 +108,8 @@ def centroids_compute(data, closest_centroids_ids, num_clusters):
:param num_clusters: number of clusters.
"""

# Get number of training examples and features.
(num_examples, num_features) = data.shape
# Get number of features.
num_features = data.shape[1]

# We need to return the following variables correctly.
centroids = np.zeros((num_clusters, num_features))
Expand Down
2 changes: 1 addition & 1 deletion homemade/neural_network/multilayer_perceptron.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def gradient_descent(
# Initialize cost history list.
cost_history = []

for iteration_index in range(max_iteration):
for _ in range(max_iteration):
# Get current cost.
cost = MultilayerPerceptron.cost_function(
data,
Expand Down
12 changes: 7 additions & 5 deletions homemade/utils/features/add_polynomials.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,24 @@
"""Add polynomial features to the features set"""

import numpy as np


def add_polynomials(x1, x2, polynomial_degree):
def add_polynomials(dataset_1, dataset_2, polynomial_degree):
"""Extends data set with polynomial features of certain degree.
Returns a new feature array with more features, comprising of
x1, x2, x1^2, x2^2, x1*x2, x1*x2^2, etc.
:param x1: first data set.
:param x2: second data set.
:param dataset_1: first data set.
:param dataset_2: second data set.
:param polynomial_degree: the max power of new features.
"""

polynomials = np.empty((x1.shape[0], 0))
polynomials = np.empty((dataset_1.shape[0], 0))

for i in range(1, polynomial_degree + 1):
for j in range(i + 1):
polynomial_feature = (x1 ** (i - j)) * (x2 ** j)
polynomial_feature = (dataset_1 ** (i - j)) * (dataset_2 ** j)
polynomials = np.concatenate((polynomials, polynomial_feature), axis=1)

return polynomials
10 changes: 6 additions & 4 deletions homemade/utils/features/add_sinusoids.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
"""Add sinusoid features to the features set"""

import numpy as np


def add_sinusoids(x, sinusoid_degree):
def add_sinusoids(dataset, sinusoid_degree):
"""Extends data set with sinusoid features.
Returns a new feature array with more features, comprising of
sin(x).
:param x: data set.
:param dataset: data set.
:param sinusoid_degree: multiplier for sinusoid parameter multiplications
"""

sinusoids = np.empty((x.shape[0], 0))
sinusoids = np.empty((dataset.shape[0], 0))

for degree in range(1, sinusoid_degree):
sinusoid_features = np.sin(degree * x)
sinusoid_features = np.sin(degree * dataset)
sinusoids = np.concatenate((sinusoids, sinusoid_features), axis=1)

return sinusoids
2 changes: 2 additions & 0 deletions homemade/utils/features/normalize.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Normalize features"""

import numpy as np


Expand Down
11 changes: 8 additions & 3 deletions homemade/utils/features/prepare_for_training.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import numpy as np
"""Prepares the dataset for training"""

import math
import numpy as np
from .normalize import normalize
from .add_sinusoids import add_sinusoids
from .add_polynomials import add_polynomials
Expand All @@ -9,7 +11,7 @@ def prepare_for_training(data, polynomial_degree=0, sinusoid_degree=0, normalize
"""Prepares data set for training on prediction"""

# Calculate the number of examples.
(num_examples, num_features) = data.shape
num_examples = data.shape[0]

# Prevent original data from being modified.
data_processed = np.copy(data)
Expand All @@ -34,7 +36,10 @@ def prepare_for_training(data, polynomial_degree=0, sinusoid_degree=0, normalize
middle_feature_index = math.floor(current_features_num / 2)

# Split features on halves.
(first_half, second_half) = np.split(data_processed, [middle_feature_index], axis=1)
features_split = np.split(data_processed, [middle_feature_index], axis=1)
first_half = features_split[0]
second_half = features_split[1]

# Generate polynomials.
data_processed = add_polynomials(first_half, second_half, polynomial_degree)

Expand Down
7 changes: 5 additions & 2 deletions homemade/utils/hypothesis/sigmoid.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
"""Sigmoid function"""

import numpy as np


def sigmoid(z):
def sigmoid(matrix):
"""Applies sigmoid function to NumPy matrix"""
return 1 / (1 + np.exp(-z))

return 1 / (1 + np.exp(-matrix))
6 changes: 4 additions & 2 deletions homemade/utils/hypothesis/sigmoid_gradient.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
"""Sigmoid gradient function"""

from .sigmoid import sigmoid


def sigmoid_gradient(z):
def sigmoid_gradient(matrix):
"""Computes the gradient of the sigmoid function evaluated at z."""

return sigmoid(z) * (1 - sigmoid(z))
return sigmoid(matrix) * (1 - sigmoid(matrix))
98 changes: 21 additions & 77 deletions notebooks/anomaly_detection/anomaly_detection_gaussian_demo.ipynb

Large diffs are not rendered by default.

0 comments on commit 3b89d57

Please sign in to comment.