Adding Longest Common Subsequence (#315)

Arvind-raj06 · web-flow · commit 4e0372def48a · 2021-01-19T13:16:54.000+05:30
diff --git a/pydatastructs/linear_data_structures/__init__.py b/pydatastructs/linear_data_structures/__init__.py
@@ -30,6 +30,7 @@
     counting_sort,
     bucket_sort,
     cocktail_shaker_sort,
-    quick_sort
+    quick_sort,
+    longest_common_subsequence
 )
 __all__.extend(algorithms.__all__)
diff --git a/pydatastructs/linear_data_structures/algorithms.py b/pydatastructs/linear_data_structures/algorithms.py
@@ -13,7 +13,8 @@
     'counting_sort',
     'bucket_sort',
     'cocktail_shaker_sort',
-    'quick_sort'
+    'quick_sort',
+    'longest_common_subsequence'
 ]
 
 def _merge(array, sl, el, sr, er, end, comp):
@@ -722,3 +723,67 @@ def partition(low, high, pick_pivot_element):
         array._modify(force=True)
 
     return array
+
+def longest_common_subsequence(seq1: OneDimensionalArray, seq2: OneDimensionalArray) -> OneDimensionalArray:
+    """
+    Finds the longest common subsequence between the
+    two given sequences.
+
+    Parameters
+    ========
+
+    seq1: OneDimensionalArray
+        The first sequence.
+    seq2: OneDimensionalArray
+        The second sequence.
+
+    Returns
+    =======
+
+    output: OneDimensionalArray
+        The longest common subsequence.
+
+    Examples
+    ========
+
+    >>> from pydatastructs import longest_common_subsequence as LCS, OneDimensionalArray as ODA
+    >>> arr1 = ODA(str, ['A', 'B', 'C', 'D', 'E'])
+    >>> arr2 = ODA(str, ['A', 'B', 'C', 'G' ,'D', 'E', 'F'])
+    >>> lcs = LCS(arr1, arr2)
+    >>> str(lcs)
+    "['A', 'B', 'C', 'D', 'E']"
+    >>> arr1 = ODA(str, ['A', 'P', 'P'])
+    >>> arr2 = ODA(str, ['A', 'p', 'P', 'S', 'P'])
+    >>> lcs = LCS(arr1, arr2)
+    >>> str(lcs)
+    "['A', 'P', 'P']"
+
+    References
+    ==========
+
+    .. [1] https://en.wikipedia.org/wiki/Longest_common_subsequence_problem
+
+    Note
+    ====
+
+    The data types of elements across both the sequences
+    should be same and should be comparable.
+    """
+    row = len(seq1)
+    col = len(seq2)
+    check_mat = {0: [(0, []) for _ in range(col + 1)]}
+
+    for i in range(1, row + 1):
+        check_mat[i] = [(0, []) for _ in range(col + 1)]
+        for j in range(1, col + 1):
+            if seq1[i-1] == seq2[j-1]:
+                temp = check_mat[i-1][j-1][1][:]
+                temp.append(seq1[i-1])
+                check_mat[i][j] = (check_mat[i-1][j-1][0] + 1, temp)
+            else:
+                if check_mat[i-1][j][0] > check_mat[i][j-1][0]:
+                    check_mat[i][j] = check_mat[i-1][j]
+                else:
+                    check_mat[i][j] = check_mat[i][j-1]
+
+    return OneDimensionalArray(seq1._dtype, check_mat[row][col][-1])
diff --git a/pydatastructs/linear_data_structures/tests/test_algorithms.py b/pydatastructs/linear_data_structures/tests/test_algorithms.py
@@ -1,7 +1,8 @@
 from pydatastructs import (
     merge_sort_parallel, DynamicOneDimensionalArray,
     OneDimensionalArray, brick_sort, brick_sort_parallel,
-    heapsort, matrix_multiply_parallel, counting_sort, bucket_sort, cocktail_shaker_sort, quick_sort)
+    heapsort, matrix_multiply_parallel, counting_sort, bucket_sort, cocktail_shaker_sort, quick_sort, longest_common_subsequence)
+
 
 from pydatastructs.utils.raises_util import raises
 import random
@@ -100,3 +101,28 @@ def test_matrix_multiply_parallel():
     J = [[2, 1, 2], [1, 2, 1], [2, 2, 2]]
     output = matrix_multiply_parallel(I, J, num_threads=1)
     assert expected_result == output
+
+def test_longest_common_sequence():
+    ODA = OneDimensionalArray
+    expected_result = "['A', 'S', 'C', 'I', 'I']"
+
+    str1 = ODA(str, ['A', 'A', 'S', 'C', 'C', 'I', 'I'])
+    str2 = ODA(str, ['A', 'S', 'S', 'C', 'I', 'I', 'I', 'I'])
+    output = longest_common_subsequence(str1, str2)
+    assert str(output) == expected_result
+
+    expected_result = "['O', 'V', 'A']"
+
+    I = ODA(str, ['O', 'V', 'A', 'L'])
+    J = ODA(str, ['F', 'O', 'R', 'V', 'A', 'E', 'W'])
+    output = longest_common_subsequence(I, J)
+    assert str(output) == expected_result
+
+    X = ODA(int, [1, 2, 3, 4, 5, 6, 6, 5, 4, 3, 2, 1])
+    Y = ODA(int, [1, 2, 3, 4, 4, 3, 2, 1])
+    output = longest_common_subsequence(X, Y)
+    assert str(output) == '[1, 2, 3, 4, 4, 3, 2, 1]'
+
+    Z = ODA(int, [])
+    output = longest_common_subsequence(Y, Z)
+    assert str(output) == '[]'

Original file line number	Diff line number	Diff line change
`@@ -30,6 +30,7 @@`
`30`	`30`	`counting_sort,`
`31`	`31`	`bucket_sort,`
`32`	`32`	`cocktail_shaker_sort,`
`33`		`- quick_sort`
	`33`	`+ quick_sort,`
	`34`	`+ longest_common_subsequence`
`34`	`35`	`)`
`35`	`36`	`__all__.extend(algorithms.__all__)`