From 08d83ca0b49d24ef51308aff8810ee4710101152 Mon Sep 17 00:00:00 2001 From: suprajamannava17 Date: Sun, 8 Dec 2024 01:46:44 -0600 Subject: [PATCH] #113 search multiple terms --- .../src/components/OrcaDashboardComponent.js | 12 +- server/services/file_search_operations.py | 135 +++++++++--------- server/usecases/search_orca_data.py | 4 +- 3 files changed, 78 insertions(+), 73 deletions(-) diff --git a/client-app/src/components/OrcaDashboardComponent.js b/client-app/src/components/OrcaDashboardComponent.js index b1ff31de..a3324a43 100644 --- a/client-app/src/components/OrcaDashboardComponent.js +++ b/client-app/src/components/OrcaDashboardComponent.js @@ -43,10 +43,12 @@ const OrcaDashboardComponent = () => { return; } + const searchTermsArray = searchTerms.split(",").map((term) => term.trim().toUpperCase()); + const data = { file_path: filePath.toString(), - search_terms: searchTerms.split(","), - sections: sections.split(","), + search_terms: searchTermsArray, + sections: sections.split(",").map((section) => section.trim()), specify_lines: specifyLines.toString(), }; @@ -81,10 +83,12 @@ const OrcaDashboardComponent = () => { return; } + const searchTermsArray = searchTerms.split(",").map((term) => term.trim().toUpperCase()); // Clean and split terms + const data = { file_path: filePath.toString(), - search_terms: searchTerms.split(","), - sections: sections.split(","), + search_terms: searchTermsArray, // Send the array of search terms + sections: sections.split(",").map((section) => section.trim()), // Clean section inputs specify_lines: specifyLines.toString(), }; diff --git a/server/services/file_search_operations.py b/server/services/file_search_operations.py index 9d823038..0e281d40 100644 --- a/server/services/file_search_operations.py +++ b/server/services/file_search_operations.py @@ -2,9 +2,9 @@ import re def extract_sections(file_path, search_terms, sections, specify_lines, use_total_lines, total_lines): - ''' - Extracts the data from orca log file based on search terms and sections. - ''' + """ + Extracts the data from the ORCA log file based on search terms and sections. + """ with open(file_path, 'r', encoding='utf-8') as f: lines = f.readlines() @@ -27,10 +27,8 @@ def is_content_line(line, term, header_pattern=None): def is_end_pattern(lines, index): """ - Check if the current line and subsequent lines - form an end pattern. - Returns: - True if an end pattern is detected, False otherwise. + Check if the current line and subsequent lines form an end pattern. + Returns True if an end pattern is detected, False otherwise. """ if index + 3 >= len(lines): return False @@ -43,85 +41,88 @@ def is_end_pattern(lines, index): starts_with_delimiter = (current_line.startswith('-') or current_line.startswith('*')) repeated_in_following_lines = (current_line in two_lines_after or - current_line in three_lines_after) + current_line in three_lines_after) next_line_not_delimiter = not (next_line.startswith('---') or next_line.startswith('***')) is_end_pattern_flag = ((starts_with_delimiter and - repeated_in_following_lines and - next_line_not_delimiter) or is_next_two_lines_empty) + repeated_in_following_lines and + next_line_not_delimiter) or is_next_two_lines_empty) return is_end_pattern_flag + # Iterate over each term to search for matches for term in search_terms: line_num = 0 term_line_num = [] - terms_num = 0 for line in lines: if term in line: term_line_num.append(line_num) - terms_num += 1 line_num += 1 - for i in sections: - section_lines = specify_lines[i-1].split() - start_line = term_line_num[i-1] - line_empty = 0 - search_term = search_terms[0] # Update it when supporting multiple search terms - document_content += lines[start_line] - - if section_lines[0].upper() == 'WHOLE' and not use_total_lines: - while line_empty == 0: - if search_term not in lines[start_line].strip() and is_content_line( - lines[start_line], search_term, header_pattern): - document_content += lines[start_line] - if is_end_pattern(lines, start_line): - break - start_line += 1 - - if section_lines[0].upper() == 'WHOLE' and use_total_lines: - for _ in range(total_lines - start_line + term_line_num[i-1]): - if search_term not in lines[start_line].strip() and is_content_line( - lines[start_line], search_term, header_pattern): - document_content += lines[start_line] - if is_end_pattern(lines, start_line): - break - start_line += 1 - line_empty = 1 - - elif section_lines[0].upper() == 'FIRST': - line_count = 0 - while line_count < int(section_lines[1]): - if search_term not in lines[start_line].strip() and is_content_line( - lines[start_line], search_term, header_pattern): - document_content += lines[start_line] - line_count += 1 - if is_end_pattern(lines, start_line): - break - start_line += 1 - - elif section_lines[0].upper() == 'LAST': - temp_content = [] - while start_line < len(lines): - if is_end_pattern(lines, start_line): - break - if is_content_line(lines[start_line], search_term, header_pattern): - temp_content.append(lines[start_line]) - start_line += 1 - document_content += ''.join(temp_content[-int(section_lines[1]):]) - - elif section_lines[0].upper() == 'SPECIFIC': - specific_lines = [int(l) for l in section_lines[1].split(",")] - for l in specific_lines: - if start_line + l < len(lines) and not is_end_pattern(lines, start_line + l): - if is_content_line(lines[start_line + l], search_term, header_pattern): - document_content += lines[start_line + l] + # Process each section for the current search term + for i in range(len(sections)): + section_lines = specify_lines[i].split() + start_line = term_line_num[i] if i < len(term_line_num) else None + if start_line is None: + continue # Skip this section if the term is not found + + line_empty = 0 + document_content += lines[start_line] + + if section_lines[0].upper() == 'WHOLE' and not use_total_lines: + while line_empty == 0: + if term not in lines[start_line].strip() and is_content_line( + lines[start_line], term, header_pattern): + document_content += lines[start_line] + if is_end_pattern(lines, start_line): + break + start_line += 1 + + if section_lines[0].upper() == 'WHOLE' and use_total_lines: + for _ in range(total_lines - start_line + term_line_num[i]): + if term not in lines[start_line].strip() and is_content_line( + lines[start_line], term, header_pattern): + document_content += lines[start_line] + if is_end_pattern(lines, start_line): + break + start_line += 1 + line_empty = 1 + + elif section_lines[0].upper() == 'FIRST': + line_count = 0 + while line_count < int(section_lines[1]): + if term not in lines[start_line].strip() and is_content_line( + lines[start_line], term, header_pattern): + document_content += lines[start_line] + line_count += 1 + if is_end_pattern(lines, start_line): + break + start_line += 1 + + elif section_lines[0].upper() == 'LAST': + temp_content = [] + while start_line < len(lines): + if is_end_pattern(lines, start_line): + break + if is_content_line(lines[start_line], term, header_pattern): + temp_content.append(lines[start_line]) + start_line += 1 + document_content += ''.join(temp_content[-int(section_lines[1]):]) + + elif section_lines[0].upper() == 'SPECIFIC': + specific_lines = [int(l) for l in section_lines[1].split(",")] + for l in specific_lines: + if start_line + l < len(lines) and not is_end_pattern(lines, start_line + l): + if is_content_line(lines[start_line + l], term, header_pattern): + document_content += lines[start_line + l] return document_content + def save_document_to_bytes(document): - ''' + """ Save the Word document to a byte string - ''' + """ file_stream = BytesIO() document.save(file_stream) return file_stream.getvalue() diff --git a/server/usecases/search_orca_data.py b/server/usecases/search_orca_data.py index 765ef0b0..50e3e8e1 100644 --- a/server/usecases/search_orca_data.py +++ b/server/usecases/search_orca_data.py @@ -9,7 +9,7 @@ def preview_document_use_case(data): based on the provided data. ''' file_path = data.get('file_path') - search_terms = data.get('search_terms') + search_terms = data.get('search_terms', []) sections = data.get('sections') temp_specify_lines = data.get('specify_lines') use_total_lines = data.get('use_total_lines', False) @@ -38,7 +38,7 @@ def find_sections_use_case(data): provided search query. ''' file_path = data.get('file_path') - search_terms = data.get('search_terms') + search_terms = data.get('search_terms', []) sections = data.get('sections') temp_specify_lines = data.get('specify_lines') use_total_lines = data.get('use_total_lines', False)