|
| 1 | +# https://leetcode.com/problems/select-k-disjoint-special-substrings |
| 2 | +# Tarjan's algorithm for SCC |
| 3 | +# Construct the following graph: |
| 4 | +# - graph[x] = letters covered by [first x, last x] |
| 5 | +# |
| 6 | +# Compress SCCs to get a DAG and taking leaves guarantees that you take the maximum amount of nodes |
| 7 | +# Corner case: |
| 8 | +# - K = 1, valid if there are at least 2 nodes in SCC graph (if there's only 1, that's the whole string) |
| 9 | + |
| 10 | +from collections import defaultdict |
| 11 | +import sys |
| 12 | +from bisect import bisect_left, bisect_right |
| 13 | + |
| 14 | +sys.setrecursionlimit(200000) |
| 15 | + |
| 16 | + |
| 17 | +def maxSubstringLength(s: str, K: int) -> bool: |
| 18 | + s = "{" + s + "{" # `{` will be the root of the constructed graph |
| 19 | + if K == 0: |
| 20 | + return True |
| 21 | + |
| 22 | + def get_scc(graph): |
| 23 | + n = len(graph) |
| 24 | + |
| 25 | + scc = [-1] * n # the component that node i belongs to |
| 26 | + comp = 1 # current component |
| 27 | + low_link = [-1] * n # smallest time reachable of a node reachable from the DFS subtree of node i |
| 28 | + node_id = [-1] * n # time at which node i was discovered |
| 29 | + time = 0 |
| 30 | + path = [] # nodes currently in the dfs traversal |
| 31 | + in_path = [False] * n |
| 32 | + |
| 33 | + def dfs(cur): |
| 34 | + nonlocal time, comp |
| 35 | + node_id[cur] = low_link[cur] = time |
| 36 | + path.append(cur) |
| 37 | + in_path[cur] = True |
| 38 | + time += 1 |
| 39 | + |
| 40 | + for adj in graph[cur]: |
| 41 | + if node_id[adj] == -1: # not yet visited |
| 42 | + dfs(adj) |
| 43 | + low_link[cur] = min(low_link[cur], low_link[adj]) |
| 44 | + |
| 45 | + elif in_path[adj]: # back edge, since we arrived at a node on the current path |
| 46 | + low_link[cur] = min(low_link[cur], node_id[adj]) |
| 47 | + |
| 48 | + else: |
| 49 | + # 1-way path to another component, NOT back edge |
| 50 | + continue |
| 51 | + |
| 52 | + if node_id[cur] == low_link[cur]: # root in dfs tree, assign SCC values to everything in this SCC |
| 53 | + while True: |
| 54 | + in_path[path[-1]] = False |
| 55 | + scc[path[-1]] = comp |
| 56 | + if path.pop() == cur: |
| 57 | + break |
| 58 | + comp += 1 |
| 59 | + |
| 60 | + for i in range(n): |
| 61 | + if node_id[i] == -1: |
| 62 | + dfs(i) |
| 63 | + return scc |
| 64 | + |
| 65 | + def compress_graph(graph): |
| 66 | + """compress each SCC of a graph into a single node""" |
| 67 | + scc = get_scc(graph) |
| 68 | + compressed = [set() for _ in range(max(scc) + 1)] |
| 69 | + for i in range(len(graph)): |
| 70 | + for adj in graph[i]: |
| 71 | + if scc[i] != scc[adj]: |
| 72 | + compressed[scc[i]].add(scc[adj]) |
| 73 | + return list(map(list, compressed)) # save memory by converting back to list |
| 74 | + |
| 75 | + n = len(s) |
| 76 | + |
| 77 | + locs = defaultdict(list) # index occurrences of each character |
| 78 | + for i, v in enumerate(s): |
| 79 | + locs[ord(v) - ord("a")].append(i) |
| 80 | + |
| 81 | + graph = [[] for _ in range(27)] # taking x forces you to take all oh graph[x] |
| 82 | + for k in locs: |
| 83 | + for l in locs: |
| 84 | + if k == l: |
| 85 | + continue |
| 86 | + start, end = locs[k][0], locs[k][-1] |
| 87 | + # check if taking k forces you to take l |
| 88 | + if bisect_right(locs[l], end) - bisect_left(locs[l], start) > 0: |
| 89 | + graph[k].append(l) |
| 90 | + |
| 91 | + graph = compress_graph(graph) |
| 92 | + nodes = set() # all nodes excluding the root |
| 93 | + for i in graph: |
| 94 | + nodes.update(i) |
| 95 | + # print(graph) |
| 96 | + |
| 97 | + leaves = [i for i in nodes if (len(graph[i]) == 0)] |
| 98 | + |
| 99 | + if K == 1: # special case: not allowed to take entire string |
| 100 | + return len(nodes) > 1 # must be at least 2 others nodes other than the root |
| 101 | + return len(leaves) >= K |
| 102 | + |
| 103 | + |
| 104 | +print(maxSubstringLength("awqbfnikfyzseidnttitakhofkzeqqnegixcndrsrelyiarmoyphbscfwjqxomjqlucqmubypcfdoxlm", 1)) |
| 105 | +print(maxSubstringLength( |
| 106 | + "nbuirvanjiccnsyyyoirleqsrwrvxepaglcidqplyryujytzqoncxjgwdmatytgwhzyhlsodrbzrpbbitovtdasazjtoyyfhowqqrzuvjveydceouscrfazzoblqhalhfybwheybkpcroijxvarrtqrqnmwslkpdducfeblvfecyjyulxgahxlzlyztssfzwvfujrriryslkvdwhmkcyebfhkadrahunvxivkwitilyzknwyujtylahgmlddymlbrbrniomepbmdieasuvdcqnzfwspxewbbpruxrznjxwnjjxvblxyrgv", |
| 107 | + 1)) |
| 108 | +print(maxSubstringLength("apqd", 1)) |
| 109 | +print(maxSubstringLength(s="abcdbaefghghfab", K=2)) |
| 110 | +print(maxSubstringLength(s="cdefdc", K=3)) # 0 |
| 111 | +print(maxSubstringLength(s="abeabe", K=0)) |
0 commit comments