diff --git a/pyresparser/resume_parser.py b/pyresparser/resume_parser.py index 8817057..8a6117b 100644 --- a/pyresparser/resume_parser.py +++ b/pyresparser/resume_parser.py @@ -36,10 +36,15 @@ def __init__( 'total_experience': None, } self.__resume = resume - if not isinstance(self.__resume, io.BytesIO): - ext = os.path.splitext(self.__resume)[1].split('.')[1] + + if isinstance(self.__resume, str): + ext = '' else: - ext = self.__resume.name.split('.')[1] + if not isinstance(self.__resume, io.BytesIO): + ext = os.path.splitext(self.__resume)[1].split('.')[1] + else: + ext = self.__resume.name.split('.')[1] + self.__text_raw = utils.extract_text(self.__resume, '.' + ext) self.__text = ' '.join(self.__text_raw.split()) self.__nlp = nlp(self.__text) diff --git a/pyresparser/utils.py b/pyresparser/utils.py index 94ea373..31a8679 100644 --- a/pyresparser/utils.py +++ b/pyresparser/utils.py @@ -153,6 +153,10 @@ def extract_text_from_doc(doc_path): return ' ' +def extract_text_from_string(string): + ''' Helper function to return input string for testing ''' + return string + def extract_text(file_path, extension): ''' Wrapper function to detect the file extension and call text @@ -162,6 +166,7 @@ def extract_text(file_path, extension): :param extension: extension of file `file_name` ''' text = '' + if extension == '.pdf': for page in extract_text_from_pdf(file_path): text += ' ' + page @@ -169,6 +174,9 @@ def extract_text(file_path, extension): text = extract_text_from_docx(file_path) elif extension == '.doc': text = extract_text_from_doc(file_path) + else: + text = extract_text_from_string(file_path) + return text diff --git a/OmkarResume.pdf b/test/fixtures/OmkarResume.pdf similarity index 100% rename from OmkarResume.pdf rename to test/fixtures/OmkarResume.pdf diff --git a/test_name.py b/test/test_pyresparser.py similarity index 58% rename from test_name.py rename to test/test_pyresparser.py index e3fc245..d10c306 100644 --- a/test_name.py +++ b/test/test_pyresparser.py @@ -6,6 +6,7 @@ import urllib from urllib.request import Request, urlopen from pyresparser import ResumeParser +from pathlib import Path def get_remote_data(): try: @@ -21,9 +22,9 @@ def get_remote_data(): return 'File not found. Please provide correct URL for resume file.' def get_local_data(): - data = ResumeParser('OmkarResume.pdf').get_extracted_data() + data = ResumeParser(str(Path(__file__).parent.resolve() / 'fixtures/OmkarResume.pdf')).get_extracted_data() return data - + def test_remote_name(): data = get_remote_data() assert 'Omkar Pathak' == data[0]['name'] @@ -32,10 +33,25 @@ def test_remote_phone_number(): data = get_remote_data() assert '8087996634' == data[0]['mobile_number'] -def test_local_name(): +def test_local_skills(): data = get_local_data() - assert 'Omkar Pathak' == data['name'] + assert 'C++' in data['skills'] def test_local_phone_number(): data = get_local_data() assert '8087996634' == data['mobile_number'] + +def test_extract_string(): + + string = (f"Joe Bloggs email: joe.bloggs@test.com \n" + f"Professional Experience \n" + f"Microsoft \n Jan 2017 - Mar 2020 \n" + f"Analyst \n" + f"Created monthly Excel and Powerpoint reports highlighting KPIs in a clear and simple format. \n" + f"Used predictive modelling to detect patterns in customer behaviour using Python. \n" + f"Education \n" + f"University of Oxford \n" + f"BSc in Computer Science \n") + + data = ResumeParser(string).get_extracted_data() + assert 'Excel' in data['skills']