Skip to content

Commit

Permalink
Merge pull request #11 from Materials-Data-Science-and-Informatics/sp…
Browse files Browse the repository at this point in the history
…eed_up_defect_calculation

Speed up defect detection calculation using Approximate Nearest Neighbor
  • Loading branch information
NinadBhat authored May 7, 2024
2 parents b3bf736 + 8bbdd4c commit 360ceeb
Show file tree
Hide file tree
Showing 5 changed files with 110 additions and 64 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Changelog

Here we provide notes that summarize the most important changes in each released version.

## v0.3.3

* Added a feature to use approximate nearest neighbors search for point defect detection.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "atomID"
version = "0.3.2"
version = "0.3.3"
description = "Python package to identify and annotate crystal structure data files"
authors = ["Ninad Bhat"]
readme = "README.md"
Expand Down
17 changes: 12 additions & 5 deletions src/atomid/annotate.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"""Annotate crystal class."""

from typing import Optional

import atomrdf as ardf
from ase.io import read as ase_read

Expand Down Expand Up @@ -57,7 +59,9 @@ def annotate_crystal_structure(self) -> None:
lattice_constant=lattice_constants,
)

def identify_defects(self, reference_data_file: str, ref_format: str) -> dict:
def identify_defects(
self, reference_data_file: str, ref_format: str, method: Optional[str] = None
) -> dict:
"""Identify defects in the crystal structure using the reference data file.
Parameters
Expand All @@ -77,12 +81,15 @@ def identify_defects(self, reference_data_file: str, ref_format: str) -> dict:
ref_positions = ref_ase.positions

defects: dict[str, dict[str, float]] = analyze_defects(
reference_positions_list=ref_positions,
actual_positions_list=actual_positions,
reference_positions=ref_positions,
actual_positions=actual_positions,
method=method,
)
return defects

def annotate_defects(self, reference_data_file: str, ref_format: str) -> None:
def annotate_defects(
self, reference_data_file: str, ref_format: str, method: Optional[str] = None
) -> None:
"""Annotate defects in the crystal structure using the reference data file.
Parameters
Expand All @@ -93,7 +100,7 @@ def annotate_defects(self, reference_data_file: str, ref_format: str) -> None:
The format of the file. If None, the format is guessed from the file extension
"""
defects = self.identify_defects(reference_data_file, ref_format)
defects = self.identify_defects(reference_data_file, ref_format, method)

vacancies = defects.get("Vacancies", {"count": 0, "fraction": 0})

Expand Down
2 changes: 1 addition & 1 deletion src/atomid/atomid.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def identify_defects_in_crystal_structure(
ref_positions = ref_ase.positions

defects: dict[str, dict[str, float]] = analyze_defects(
reference_positions_list=ref_positions, actual_positions_list=actual_positions
reference_positions=ref_positions, actual_positions=actual_positions
)

return defects
Expand Down
146 changes: 89 additions & 57 deletions src/atomid/point_defect_analysis/wigner_seitz_method.py
Original file line number Diff line number Diff line change
@@ -1,105 +1,137 @@
"""Point defect identification using the Wigner-Seitz method."""
"""Wigner-Seitz method for point defect analysis."""

from typing import Optional, Tuple
from typing import Callable, Dict, List, Optional, Tuple

import numpy as np


def find_nearest_atom(
atom: tuple, atom_positions: np.ndarray
) -> Tuple[np.signedinteger, list]:
"""Find the nearest atom to a given defect position.
def analyze_defects(
reference_positions: List[Tuple[float, float, float]],
actual_positions: List[Tuple[float, float, float]],
species_ref: Optional[List[str]] = None,
species_actual: Optional[List[str]] = None,
method: Optional[str] = None,
) -> Dict[str, Dict[str, float]]:
"""Analyze the lattice for vacancy, interstitial, and substitution defects.
Parameters
----------
atom : tuple
The position of the defect atom.
atom_positions : np.ndarra
The positions of the atoms in the lattice.
reference_positions : list of tuples
The expected positions of the atoms in the lattice.
actual_positions : list of tuples
The actual positions of the atoms in the lattice.
species_ref : list of str, optional
Species at each reference position.
species_actual : list of str, optional
Species at each actual position.
method : str, optional
The method to find nearest positions ('annoy' for using AnnoyIndex).
Returns
-------
nearest_index : int
The index of the nearest atom.
distance : float
The distance between the defect and the nearest atom.
dict
A dictionary containing the counts and fractions of vacancies, interstitials, and substitutions.
"""
distances = np.linalg.norm(atom_positions - atom, axis=1)
nearest_index: np.signedinteger = np.argmin(distances)
return nearest_index, distances[nearest_index]
reference_array = np.array(reference_positions)
actual_array = np.array(actual_positions)

atom_position_count = np.zeros(len(reference_array))
substitution_count = np.zeros(len(reference_array))
index_finder = create_index_finder(reference_array, method)

def analyze_defects(
reference_positions_list: list,
actual_positions_list: list,
species_ref: Optional[list] = None,
species_actual: Optional[list] = None,
) -> dict[str, dict[str, float]]:
"""Analyze the lattice for vacancy and interstitial defects.
for i, actual in enumerate(actual_array):
nearest_index = index_finder(actual)
atom_position_count[nearest_index] += 1
if (
species_ref
and species_actual
and species_actual[i] != species_ref[nearest_index]
):
substitution_count[nearest_index] += 1

defects: dict = calculate_defects(
reference_array, atom_position_count, substitution_count
)
return defects


def create_index_finder(
reference_array: np.ndarray, method: Optional[str] = None
) -> Callable:
"""Create a function to find the index of the nearest reference position.
Parameters
----------
reference_positions : list of tuples
The expected positions of the atoms in the lattice.
actual_positions : list of tuples
The actual positions of the atoms in the lattice.
reference_array : np.ndarray
The reference positions of the atoms.
method : str, optional
The method to find nearest positions ('annoy' for using AnnoyIndex).
Returns
-------
defect_analysis : dict
A dictionary containing the vacancy and interstitial defects.
function
A function that takes an actual position and returns the index of the nearest reference position.
"""
reference_positions: np.ndarray = np.array(reference_positions_list)
actual_positions: np.ndarray = np.array(actual_positions_list)
atom_position_count = np.zeros(len(reference_positions))
substitution_count = np.zeros(len(reference_positions))

# Process actual positions and compare with reference to identify defects
for i, actual in enumerate(actual_positions):
nearest_index, _ = find_nearest_atom(actual, reference_positions)
atom_position_count[nearest_index] += 1
if method == "annoy":
from annoy import AnnoyIndex

# Check for substitutions if species information is provided
if species_actual and species_ref:
if species_actual[i] != species_ref[nearest_index]:
substitution_count[nearest_index] += 1
t = AnnoyIndex(len(reference_array[0]), "euclidean")
for i, ref in enumerate(reference_array):
t.add_item(i, ref)
t.build(10)
return lambda x: t.get_nns_by_vector(x, 1)[0]
else:
return lambda x: np.argmin(np.sum((reference_array - x) ** 2, axis=1))

# Determine vacancies taking into account both atom positions and substitutions
vacancies = [
(i, tuple(pos))
for i, pos in enumerate(reference_positions)
if atom_position_count[i] == 0
and (not species_actual or substitution_count[i] == 0)
]

def calculate_defects(
reference_array: np.ndarray,
atom_position_count: np.ndarray,
substitution_count: np.ndarray,
) -> Dict:
"""Calculate the number and fraction of vacancies, interstitials, and substitutions.
Parameters
----------
reference_array : np.ndarray
The reference positions of the atoms.
atom_position_count : np.ndarray
The number of atoms at each reference position.
substitution_count : np.ndarray
The number of substitutions at each reference position.
Returns
-------
dict
A dictionary containing the counts and fractions of vacancies, interstitials, and substitutions.
"""
vacancies = [
(i, tuple(pos))
for i, pos in enumerate(reference_positions)
for i, pos in enumerate(reference_array)
if atom_position_count[i] == 0
]
interstitials = [
(i, tuple(pos))
for i, pos in enumerate(actual_positions)
for i, pos in enumerate(reference_array)
if atom_position_count[i] > 1
]
substitutions = [
(i, tuple(pos))
for i, pos in enumerate(reference_positions)
for i, pos in enumerate(reference_array)
if substitution_count[i] > 0
]

return {
"Vacancies": {
"count": len(vacancies),
"fraction": len(vacancies) / len(reference_positions),
"fraction": len(vacancies) / len(reference_array),
},
"Interstitials": {
"count": len(interstitials),
"fraction": len(interstitials) / len(actual_positions),
"fraction": len(interstitials) / len(reference_array),
},
"Substitutions": {
"count": len(substitutions),
"fraction": len(substitutions) / len(reference_positions),
"fraction": len(substitutions) / len(reference_array),
},
}

0 comments on commit 360ceeb

Please sign in to comment.