Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#113 search multiple terms #122

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions client-app/src/components/OrcaDashboardComponent.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,12 @@ const OrcaDashboardComponent = () => {
return;
}

const searchTermsArray = searchTerms.split(",").map((term) => term.trim().toUpperCase());

const data = {
file_path: filePath.toString(),
search_terms: searchTerms.split(","),
sections: sections.split(","),
search_terms: searchTermsArray,
sections: sections.split(",").map((section) => section.trim()),
specify_lines: specifyLines.toString(),
};

Expand Down Expand Up @@ -81,10 +83,12 @@ const OrcaDashboardComponent = () => {
return;
}

const searchTermsArray = searchTerms.split(",").map((term) => term.trim().toUpperCase()); // Clean and split terms

const data = {
file_path: filePath.toString(),
search_terms: searchTerms.split(","),
sections: sections.split(","),
search_terms: searchTermsArray, // Send the array of search terms
sections: sections.split(",").map((section) => section.trim()), // Clean section inputs
specify_lines: specifyLines.toString(),
};

Expand Down
135 changes: 68 additions & 67 deletions server/services/file_search_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
import re

def extract_sections(file_path, search_terms, sections, specify_lines, use_total_lines, total_lines):
'''
Extracts the data from orca log file based on search terms and sections.
'''
"""
Extracts the data from the ORCA log file based on search terms and sections.
"""
with open(file_path, 'r', encoding='utf-8') as f:
lines = f.readlines()

Expand All @@ -27,10 +27,8 @@ def is_content_line(line, term, header_pattern=None):

def is_end_pattern(lines, index):
"""
Check if the current line and subsequent lines
form an end pattern.
Returns:
True if an end pattern is detected, False otherwise.
Check if the current line and subsequent lines form an end pattern.
Returns True if an end pattern is detected, False otherwise.
"""
if index + 3 >= len(lines):
return False
Expand All @@ -43,85 +41,88 @@ def is_end_pattern(lines, index):

starts_with_delimiter = (current_line.startswith('-') or current_line.startswith('*'))
repeated_in_following_lines = (current_line in two_lines_after or
current_line in three_lines_after)
current_line in three_lines_after)
next_line_not_delimiter = not (next_line.startswith('---') or next_line.startswith('***'))

is_end_pattern_flag = ((starts_with_delimiter and
repeated_in_following_lines and
next_line_not_delimiter) or is_next_two_lines_empty)
repeated_in_following_lines and
next_line_not_delimiter) or is_next_two_lines_empty)

return is_end_pattern_flag

# Iterate over each term to search for matches
for term in search_terms:
line_num = 0
term_line_num = []
terms_num = 0
for line in lines:
if term in line:
term_line_num.append(line_num)
terms_num += 1
line_num += 1

for i in sections:
section_lines = specify_lines[i-1].split()
start_line = term_line_num[i-1]
line_empty = 0
search_term = search_terms[0] # Update it when supporting multiple search terms
document_content += lines[start_line]

if section_lines[0].upper() == 'WHOLE' and not use_total_lines:
while line_empty == 0:
if search_term not in lines[start_line].strip() and is_content_line(
lines[start_line], search_term, header_pattern):
document_content += lines[start_line]
if is_end_pattern(lines, start_line):
break
start_line += 1

if section_lines[0].upper() == 'WHOLE' and use_total_lines:
for _ in range(total_lines - start_line + term_line_num[i-1]):
if search_term not in lines[start_line].strip() and is_content_line(
lines[start_line], search_term, header_pattern):
document_content += lines[start_line]
if is_end_pattern(lines, start_line):
break
start_line += 1
line_empty = 1

elif section_lines[0].upper() == 'FIRST':
line_count = 0
while line_count < int(section_lines[1]):
if search_term not in lines[start_line].strip() and is_content_line(
lines[start_line], search_term, header_pattern):
document_content += lines[start_line]
line_count += 1
if is_end_pattern(lines, start_line):
break
start_line += 1

elif section_lines[0].upper() == 'LAST':
temp_content = []
while start_line < len(lines):
if is_end_pattern(lines, start_line):
break
if is_content_line(lines[start_line], search_term, header_pattern):
temp_content.append(lines[start_line])
start_line += 1
document_content += ''.join(temp_content[-int(section_lines[1]):])

elif section_lines[0].upper() == 'SPECIFIC':
specific_lines = [int(l) for l in section_lines[1].split(",")]
for l in specific_lines:
if start_line + l < len(lines) and not is_end_pattern(lines, start_line + l):
if is_content_line(lines[start_line + l], search_term, header_pattern):
document_content += lines[start_line + l]
# Process each section for the current search term
for i in range(len(sections)):
section_lines = specify_lines[i].split()
start_line = term_line_num[i] if i < len(term_line_num) else None
if start_line is None:
continue # Skip this section if the term is not found

line_empty = 0
document_content += lines[start_line]

if section_lines[0].upper() == 'WHOLE' and not use_total_lines:
while line_empty == 0:
if term not in lines[start_line].strip() and is_content_line(
lines[start_line], term, header_pattern):
document_content += lines[start_line]
if is_end_pattern(lines, start_line):
break
start_line += 1

if section_lines[0].upper() == 'WHOLE' and use_total_lines:
for _ in range(total_lines - start_line + term_line_num[i]):
if term not in lines[start_line].strip() and is_content_line(
lines[start_line], term, header_pattern):
document_content += lines[start_line]
if is_end_pattern(lines, start_line):
break
start_line += 1
line_empty = 1

elif section_lines[0].upper() == 'FIRST':
line_count = 0
while line_count < int(section_lines[1]):
if term not in lines[start_line].strip() and is_content_line(
lines[start_line], term, header_pattern):
document_content += lines[start_line]
line_count += 1
if is_end_pattern(lines, start_line):
break
start_line += 1

elif section_lines[0].upper() == 'LAST':
temp_content = []
while start_line < len(lines):
if is_end_pattern(lines, start_line):
break
if is_content_line(lines[start_line], term, header_pattern):
temp_content.append(lines[start_line])
start_line += 1
document_content += ''.join(temp_content[-int(section_lines[1]):])

elif section_lines[0].upper() == 'SPECIFIC':
specific_lines = [int(l) for l in section_lines[1].split(",")]
for l in specific_lines:
if start_line + l < len(lines) and not is_end_pattern(lines, start_line + l):
if is_content_line(lines[start_line + l], term, header_pattern):
document_content += lines[start_line + l]

return document_content


def save_document_to_bytes(document):
'''
"""
Save the Word document to a byte string
'''
"""
file_stream = BytesIO()
document.save(file_stream)
return file_stream.getvalue()
4 changes: 2 additions & 2 deletions server/usecases/search_orca_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def preview_document_use_case(data):
based on the provided data.
'''
file_path = data.get('file_path')
search_terms = data.get('search_terms')
search_terms = data.get('search_terms', [])
sections = data.get('sections')
temp_specify_lines = data.get('specify_lines')
use_total_lines = data.get('use_total_lines', False)
Expand Down Expand Up @@ -38,7 +38,7 @@ def find_sections_use_case(data):
provided search query.
'''
file_path = data.get('file_path')
search_terms = data.get('search_terms')
search_terms = data.get('search_terms', [])
sections = data.get('sections')
temp_specify_lines = data.get('specify_lines')
use_total_lines = data.get('use_total_lines', False)
Expand Down
Loading