Skip to content

Commit 4e0372d

Browse files
authored
Adding Longest Common Subsequence (#315)
1 parent e639bc6 commit 4e0372d

File tree

3 files changed

+95
-3
lines changed

3 files changed

+95
-3
lines changed

pydatastructs/linear_data_structures/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
counting_sort,
3131
bucket_sort,
3232
cocktail_shaker_sort,
33-
quick_sort
33+
quick_sort,
34+
longest_common_subsequence
3435
)
3536
__all__.extend(algorithms.__all__)

pydatastructs/linear_data_structures/algorithms.py

Lines changed: 66 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@
1313
'counting_sort',
1414
'bucket_sort',
1515
'cocktail_shaker_sort',
16-
'quick_sort'
16+
'quick_sort',
17+
'longest_common_subsequence'
1718
]
1819

1920
def _merge(array, sl, el, sr, er, end, comp):
@@ -722,3 +723,67 @@ def partition(low, high, pick_pivot_element):
722723
array._modify(force=True)
723724

724725
return array
726+
727+
def longest_common_subsequence(seq1: OneDimensionalArray, seq2: OneDimensionalArray) -> OneDimensionalArray:
728+
"""
729+
Finds the longest common subsequence between the
730+
two given sequences.
731+
732+
Parameters
733+
========
734+
735+
seq1: OneDimensionalArray
736+
The first sequence.
737+
seq2: OneDimensionalArray
738+
The second sequence.
739+
740+
Returns
741+
=======
742+
743+
output: OneDimensionalArray
744+
The longest common subsequence.
745+
746+
Examples
747+
========
748+
749+
>>> from pydatastructs import longest_common_subsequence as LCS, OneDimensionalArray as ODA
750+
>>> arr1 = ODA(str, ['A', 'B', 'C', 'D', 'E'])
751+
>>> arr2 = ODA(str, ['A', 'B', 'C', 'G' ,'D', 'E', 'F'])
752+
>>> lcs = LCS(arr1, arr2)
753+
>>> str(lcs)
754+
"['A', 'B', 'C', 'D', 'E']"
755+
>>> arr1 = ODA(str, ['A', 'P', 'P'])
756+
>>> arr2 = ODA(str, ['A', 'p', 'P', 'S', 'P'])
757+
>>> lcs = LCS(arr1, arr2)
758+
>>> str(lcs)
759+
"['A', 'P', 'P']"
760+
761+
References
762+
==========
763+
764+
.. [1] https://en.wikipedia.org/wiki/Longest_common_subsequence_problem
765+
766+
Note
767+
====
768+
769+
The data types of elements across both the sequences
770+
should be same and should be comparable.
771+
"""
772+
row = len(seq1)
773+
col = len(seq2)
774+
check_mat = {0: [(0, []) for _ in range(col + 1)]}
775+
776+
for i in range(1, row + 1):
777+
check_mat[i] = [(0, []) for _ in range(col + 1)]
778+
for j in range(1, col + 1):
779+
if seq1[i-1] == seq2[j-1]:
780+
temp = check_mat[i-1][j-1][1][:]
781+
temp.append(seq1[i-1])
782+
check_mat[i][j] = (check_mat[i-1][j-1][0] + 1, temp)
783+
else:
784+
if check_mat[i-1][j][0] > check_mat[i][j-1][0]:
785+
check_mat[i][j] = check_mat[i-1][j]
786+
else:
787+
check_mat[i][j] = check_mat[i][j-1]
788+
789+
return OneDimensionalArray(seq1._dtype, check_mat[row][col][-1])

pydatastructs/linear_data_structures/tests/test_algorithms.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
from pydatastructs import (
22
merge_sort_parallel, DynamicOneDimensionalArray,
33
OneDimensionalArray, brick_sort, brick_sort_parallel,
4-
heapsort, matrix_multiply_parallel, counting_sort, bucket_sort, cocktail_shaker_sort, quick_sort)
4+
heapsort, matrix_multiply_parallel, counting_sort, bucket_sort, cocktail_shaker_sort, quick_sort, longest_common_subsequence)
5+
56

67
from pydatastructs.utils.raises_util import raises
78
import random
@@ -100,3 +101,28 @@ def test_matrix_multiply_parallel():
100101
J = [[2, 1, 2], [1, 2, 1], [2, 2, 2]]
101102
output = matrix_multiply_parallel(I, J, num_threads=1)
102103
assert expected_result == output
104+
105+
def test_longest_common_sequence():
106+
ODA = OneDimensionalArray
107+
expected_result = "['A', 'S', 'C', 'I', 'I']"
108+
109+
str1 = ODA(str, ['A', 'A', 'S', 'C', 'C', 'I', 'I'])
110+
str2 = ODA(str, ['A', 'S', 'S', 'C', 'I', 'I', 'I', 'I'])
111+
output = longest_common_subsequence(str1, str2)
112+
assert str(output) == expected_result
113+
114+
expected_result = "['O', 'V', 'A']"
115+
116+
I = ODA(str, ['O', 'V', 'A', 'L'])
117+
J = ODA(str, ['F', 'O', 'R', 'V', 'A', 'E', 'W'])
118+
output = longest_common_subsequence(I, J)
119+
assert str(output) == expected_result
120+
121+
X = ODA(int, [1, 2, 3, 4, 5, 6, 6, 5, 4, 3, 2, 1])
122+
Y = ODA(int, [1, 2, 3, 4, 4, 3, 2, 1])
123+
output = longest_common_subsequence(X, Y)
124+
assert str(output) == '[1, 2, 3, 4, 4, 3, 2, 1]'
125+
126+
Z = ODA(int, [])
127+
output = longest_common_subsequence(Y, Z)
128+
assert str(output) == '[]'

0 commit comments

Comments
 (0)