Skip to content

Commit f4bd7bb

Browse files
committed
fix(v1.1.0): Output filepath didn't use relative locations.
1 parent 065ffc3 commit f4bd7bb

File tree

1 file changed

+61
-62
lines changed

1 file changed

+61
-62
lines changed

main.py

Lines changed: 61 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,14 @@
3636
def prepare_seq(seqs: dict, output_file_name: str):
3737
"""
3838
Prepare DNA sequences for alignment using Clustal Omega.
39-
39+
4040
Converts dictionary of sequences to FASTA format and runs Clustal Omega
4141
to create a multiple sequence alignment file.
42-
42+
4343
Args:
4444
seqs: Dictionary with sequence names as keys and sequences as values
4545
output_file_name: Base name for output alignment file
46-
46+
4747
Raises:
4848
SystemExit: If Clustal Omega execution fails
4949
"""
@@ -68,13 +68,13 @@ def prepare_seq(seqs: dict, output_file_name: str):
6868
def prepare_formatted_seq(aln_file_name: str) -> str:
6969
"""
7070
Format aligned sequences in triplet notation.
71-
72-
Reads a Clustal alignment file and reformats it to add spaces
71+
72+
Reads a Clustal alignment file and reformats it to add spaces
7373
after every three nucleotides for better readability.
74-
74+
7575
Args:
7676
aln_file_name: Path to the Clustal alignment file
77-
77+
7878
Returns:
7979
Formatted alignment string with triplet notation
8080
"""
@@ -114,10 +114,10 @@ def prepare_formatted_seq(aln_file_name: str) -> str:
114114
def split_line(line: str):
115115
"""
116116
Split an alignment line into sequence name and actual sequence.
117-
117+
118118
Args:
119119
line: A line from the alignment file
120-
120+
121121
Returns:
122122
Tuple of (name, sequence) or None if parsing fails
123123
"""
@@ -129,25 +129,25 @@ def split_line(line: str):
129129
return None
130130

131131

132-
def mark_sequence_line(sequence_line: str, search_sequence: str, pattern_index: int = None,
132+
def mark_sequence_line(sequence_line: str, search_sequence: str, pattern_index: int = None,
133133
preliminary_start: int = None, ignore_spaces: bool = False):
134134
"""
135135
Find all occurrences of a pattern in a sequence line.
136-
136+
137137
This function implements the core sequence search algorithm with two modes:
138138
1. Exact matching - spaces are significant
139139
2. Spaced matching - spaces are ignored during comparison
140-
140+
141141
The function also handles partial matches that might continue on the next line
142142
by tracking preliminary match state.
143-
143+
144144
Args:
145145
sequence_line: The text line containing the sequence to search
146146
search_sequence: The pattern to search for
147147
pattern_index: Index in search_sequence for continuing a previous match
148148
preliminary_start: Starting position of a preliminary match from previous line
149149
ignore_spaces: Whether to ignore spaces when matching
150-
150+
151151
Returns:
152152
Tuple of (matches, match_count, preliminary_state, preliminary_completed)
153153
- matches: List of (start, end) positions of matches
@@ -160,10 +160,10 @@ def mark_sequence_line(sequence_line: str, search_sequence: str, pattern_index:
160160
len_search = len(search_sequence)
161161
len_seq = len(sequence_line)
162162
preliminary = () # Stores state for potential match continuation
163-
163+
164164
# Track if a preliminary match was completed
165165
# None = preliminary match in progress
166-
# True = preliminary match successful
166+
# True = preliminary match successful
167167
# False = preliminary match failed or no preliminary match was attempted
168168
preliminary_completed = False if pattern_index is None else None
169169

@@ -189,30 +189,30 @@ def mark_sequence_line(sequence_line: str, search_sequence: str, pattern_index:
189189
pattern_i = 0 # Current position in pattern
190190
sequence_i = current_pos # Current position in sequence
191191
match_start_i = -1 # Starting position of potential match
192-
192+
193193
# Continue a match from previous line if pattern_index is provided
194194
if pattern_index is not None and pattern_index != -1:
195195
pattern_i = pattern_index # Resume pattern matching from this position
196196
if preliminary_start is not None:
197197
match_start_i = preliminary_start # Use provided start position
198198
pattern_index = -1 # Reset to avoid using this value again
199-
199+
200200
# Character-by-character comparison loop
201201
while pattern_i < len_search and sequence_i < len_seq:
202202
seq_char = sequence_line[sequence_i]
203-
203+
204204
# Skip spaces in the sequence
205205
if seq_char == ' ':
206206
sequence_i += 1
207207
# If we haven't started a match yet, update current position too
208208
if match_start_i == -1 and pattern_i == 0:
209209
current_pos = sequence_i
210210
continue
211-
211+
212212
# Mark the start of a potential match
213213
if match_start_i == -1:
214214
match_start_i = sequence_i
215-
215+
216216
# Character matches
217217
if seq_char == search_sequence[pattern_i]:
218218
pattern_i += 1
@@ -223,14 +223,14 @@ def mark_sequence_line(sequence_line: str, search_sequence: str, pattern_index:
223223
if preliminary_completed is None:
224224
preliminary_completed = False
225225
break
226-
226+
227227
# Complete match found
228228
if pattern_i == len_search:
229229
match_end_i = sequence_i
230230
matches.append((match_start_i, match_end_i))
231231
matches_num += 1
232232
current_pos = match_end_i # Continue search after this match
233-
233+
234234
# If this was a preliminary match continuation, mark it complete
235235
if preliminary_completed is None:
236236
preliminary_completed = True
@@ -239,34 +239,34 @@ def mark_sequence_line(sequence_line: str, search_sequence: str, pattern_index:
239239
# Track partial match at end of line for potential continuation
240240
if pattern_i > 0 and sequence_i == len_seq:
241241
preliminary = (match_start_i, sequence_i, pattern_i)
242-
242+
243243
current_pos += 1 # Try next position
244-
244+
245245
# Store partial match data if at end of sequence
246246
if current_pos == len_seq and match_start_i != -1 and pattern_i > 0:
247247
preliminary = (match_start_i, sequence_i, pattern_i)
248-
248+
249249
# Return results
250250
if len(matches) != 0:
251251
return matches, matches_num, preliminary, preliminary_completed
252252
else:
253253
return None, 0, preliminary, preliminary_completed if preliminary_completed is not None else False
254254

255255

256-
def process_lines(lines: list, search_sequence: str, sequences_num: int,
256+
def process_lines(lines: list, search_sequence: str, sequences_num: int,
257257
ignore_spaces: bool = False) -> tuple:
258258
"""
259259
Process all lines of the alignment to find pattern matches.
260-
260+
261261
This function handles matches that continue across multiple lines by tracking
262262
preliminary (partial) matches for each sequence name.
263-
263+
264264
Args:
265265
lines: All lines from the alignment file
266266
search_sequence: Pattern to search for
267267
sequences_num: Number of sequences in the alignment
268268
ignore_spaces: Whether to ignore spaces during matching
269-
269+
270270
Returns:
271271
Tuple of (results, matches_num):
272272
- results: List of matches for each line (None or list of (start, end) tuples)
@@ -276,48 +276,48 @@ def process_lines(lines: list, search_sequence: str, sequences_num: int,
276276
preliminary = {}
277277
matches_num = 0
278278
results = []
279-
279+
280280
# Number of lines between consecutive occurrences of the same sequence
281281
# (includes sequences + blank/marker lines)
282282
lines_until_next_sequence = sequences_num + 2
283-
283+
284284
# Process each line of the alignment
285285
for i, line in enumerate(lines):
286286
# Skip header lines (CLUSTAL format has 3 header lines)
287287
if i < 3:
288288
results.append(None)
289289
continue
290-
290+
291291
# Extract sequence name and content
292292
name_sequence_line = split_line(line)
293293
if name_sequence_line:
294294
name, sequence_line = name_sequence_line
295-
295+
296296
# Check if we have a partial match from previous occurrence of this sequence
297297
if name in preliminary and preliminary.get(name) != ():
298298
prev_start, _, prev_pattern_i = preliminary.get(name)
299-
299+
300300
# Continue matching from where we left off
301301
# If this is the immediate next line, use preliminary_start
302302
matches_compound = mark_sequence_line(
303-
sequence_line,
304-
search_sequence,
305-
pattern_index=prev_pattern_i,
303+
sequence_line,
304+
search_sequence,
305+
pattern_index=prev_pattern_i,
306306
preliminary_start=prev_start if i - lines_until_next_sequence < 0 else None,
307307
ignore_spaces=ignore_spaces
308308
)
309309
else:
310310
# Start fresh match for this sequence
311311
matches_compound = mark_sequence_line(sequence_line, search_sequence, ignore_spaces=ignore_spaces)
312-
312+
313313
# Process match results
314314
if matches_compound is not None:
315315
matches, matches_count, preliminary_value, preliminary_completed = matches_compound
316-
316+
317317
# Handle case where a preliminary match was completed
318318
# This means we need to mark the end of the previous line too
319-
if (preliminary_completed and
320-
name in preliminary and
319+
if (preliminary_completed and
320+
name in preliminary and
321321
preliminary.get(name) != ()):
322322
# Find previous line with this sequence
323323
prev_line_index = i - lines_until_next_sequence
@@ -330,10 +330,10 @@ def process_lines(lines: list, search_sequence: str, sequences_num: int,
330330
match_end = len(lines[prev_line_index].split(' ', 1)[1]) if ' ' in lines[prev_line_index] else 0
331331
results[prev_line_index].append((match_start, match_end))
332332
matches_num += 1
333-
333+
334334
# Update preliminary match state for this sequence
335335
preliminary[name] = preliminary_value
336-
336+
337337
# Add matches for current line
338338
if matches:
339339
matches_num += matches_count # Note: This could be a bug, should be matches_num not matches_count
@@ -344,7 +344,7 @@ def process_lines(lines: list, search_sequence: str, sequences_num: int,
344344
results.append(None)
345345
else:
346346
results.append(None)
347-
347+
348348
return results, matches_num
349349

350350

@@ -356,18 +356,18 @@ def save_matches_html(
356356
):
357357
"""
358358
Generate an HTML document with highlighted sequence matches.
359-
359+
360360
Creates an HTML file with the alignment where matched patterns
361361
are highlighted with yellow background.
362-
362+
363363
Args:
364364
file_path: Output HTML file path
365365
lines: Alignment lines to display
366366
matches_results: Match positions for each line
367367
page_title: HTML page title
368368
font_name: Font to use for sequence display
369369
max_width_percent: Maximum width of the sequence display area
370-
370+
371371
Returns:
372372
Boolean indicating success or failure
373373
"""
@@ -469,10 +469,10 @@ def save_matches_word(
469469
):
470470
"""
471471
Generate a Word document with highlighted sequence matches.
472-
472+
473473
Creates a Word document with the alignment where matched patterns
474474
are highlighted with the specified color.
475-
475+
476476
Args:
477477
file_path: Output DOCX file path
478478
lines: Alignment lines to display
@@ -481,7 +481,7 @@ def save_matches_word(
481481
font_size: Font size in points
482482
margin_inches: Document margins in inches
483483
highlight_color: Color to use for highlighting matches
484-
484+
485485
Returns:
486486
Boolean indicating success or failure
487487
"""
@@ -578,7 +578,7 @@ def save_matches_word(
578578
def main():
579579
"""
580580
Main program execution flow.
581-
581+
582582
Steps:
583583
1. Load sequence data from JSON
584584
2. Generate or reuse sequence alignment
@@ -595,8 +595,8 @@ def main():
595595
under certain conditions; refer to the LICENSE file for details.
596596
""")
597597

598-
output_path = os.path.join(os.path.dirname(__file__), "output")
599-
input_path = os.path.join(os.path.dirname(__file__), "input")
598+
output_path = "output"
599+
input_path = "input"
600600

601601
os.makedirs(output_path, exist_ok=True)
602602
os.makedirs(input_path, exist_ok=True)
@@ -619,17 +619,16 @@ def main():
619619
if os.path.exists(path_hash) and os.path.isfile(path_hash):
620620
with open(path_hash, "r", encoding="utf-8") as file:
621621
last_hash = file.read()
622-
622+
623623
# Calculate hash of current sequences
624624
new_hash = hashlib.md5(str(sequences).encode("utf-8"), usedforsecurity=False).hexdigest()
625-
625+
626626
# Generate alignment if hash changed or alignment file doesn't exist
627627
if last_hash != new_hash or not os.path.exists(os.path.join(output_path, "sequences.aln")) or not os.path.isfile(os.path.join(output_path, "sequences.aln")):
628628
with open(path_hash, "w", encoding="utf-8") as file:
629629
file.write(new_hash)
630-
print("Computing DNA sequence alignment...", end='')
631630
prepare_seq(sequences, os.path.join(output_path, "sequences.aln"))
632-
print("\rComputed DNA sequence alignment. \n")
631+
print("Computed DNA sequence alignment.\n")
633632
else:
634633
print("Reusing unchanged DNA alignment file.\n")
635634

@@ -642,11 +641,11 @@ def main():
642641

643642
# Initialize match results list
644643
match_results = []
645-
644+
646645
# Get search pattern from user
647646
search_word = input("Input DNA sequence to search for (e.g. \"ACC\" or \"\" to disable marking)\n > ").strip()
648647
print()
649-
648+
650649
if len(search_word) != 0:
651650
# Determine search mode: exact or ignoring spaces
652651
skip_spaces = True if "space" in input("Search mode (exact/spaced)\n > ") else False
@@ -659,13 +658,13 @@ def main():
659658
print(f"{match_num} matches found.\n")
660659
else:
661660
print("Entered empty search phrase.\n")
662-
661+
663662
# Create output documents with highlighted matches
664663
if save_matches_html(html_output_filename, text_lines, match_results):
665664
print(f"Saved HTML to '{html_output_filename}'")
666665
if save_matches_word(word_output_filename, text_lines, match_results):
667666
print(f"Saved Word document to '{word_output_filename}'")
668-
667+
669668
print("\nExiting...")
670669

671670

0 commit comments

Comments
 (0)