Skip to content

Commit

Permalink
Multiple files added
Browse files Browse the repository at this point in the history
  • Loading branch information
SOUMEE2000 committed Feb 4, 2023
1 parent 1a90231 commit f1bca79
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 20 deletions.
Binary file modified Demo/Interface_Results.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
22 changes: 14 additions & 8 deletions Models.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,23 +39,29 @@ def get_HF_embeddings(sentences):
def get_doc2vec_embeddings(JD, text_resume):
nltk.download("punkt")
data = [JD]

resume_embeddings = []

tagged_data = [TaggedDocument(words=word_tokenize(_d.lower()), tags=[str(i)]) for i, _d in enumerate(data)]
#print (tagged_data)

model = gensim.models.doc2vec.Doc2Vec(vector_size=512, min_count=3, epochs=80)
model.build_vocab(tagged_data)
model.train(tagged_data, total_examples=model.corpus_count, epochs=80)
JD_embeddings = np.transpose(model.docvecs['0'].reshape(-1,1))
text_resume = word_tokenize(text_resume.lower())
resume_embeddings = model.infer_vector(text_resume)
resume_embeddings = np.transpose(resume_embeddings.reshape(-1,1))

for i in text_resume:
text = word_tokenize(i.lower())
embeddings = model.infer_vector(text)
resume_embeddings.append(np.transpose(embeddings.reshape(-1,1)))
return (JD_embeddings, resume_embeddings)


def cosine(embeddings1, embeddings2):
# get the match percentage
matchPercentage = cosine_similarity(np.array(embeddings1), np.array(embeddings2))
matchPercentage = np.round(matchPercentage, 4)*100 # round to two decimal
print("Your resume matches about" + str(matchPercentage[0])+ "% of the job description.")
return str(matchPercentage[0][0])
score_list = []
for i in embeddings1:
matchPercentage = cosine_similarity(np.array(i), np.array(embeddings2))
matchPercentage = np.round(matchPercentage, 4)*100 # round to two decimal
print("Your resume matches about" + str(matchPercentage[0])+ "% of the job description.")
score_list.append(str(matchPercentage[0][0]))
return score_list
21 changes: 13 additions & 8 deletions Resume_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,26 +12,31 @@ def extract_data(feed):
return data # build more code to return a dataframe


def compare(uploaded_file, JD, flag = 'HuggingFace-BERT'):
def compare(uploaded_files, JD, flag = 'HuggingFace-BERT'):

if flag == 'HuggingFace-BERT':
JD_embeddings = None
resume_embeddings = None
resume_embeddings = []

if uploaded_file is not None:
df = extract_data(uploaded_file)
resume_embeddings = get_HF_embeddings(df)
if JD is not None:
JD_embeddings = get_HF_embeddings(JD)
if uploaded_files is not None:
for i in uploaded_files:
df = extract_data(i)
resume_embeddings.append(get_HF_embeddings(df))
if JD_embeddings is not None and resume_embeddings is not None:
cos = cosine(resume_embeddings, JD_embeddings)
#st.write("Score is: ", cos)

else:
if uploaded_file is not None:
df = extract_data(uploaded_file)
df = []
if uploaded_files is not None:
for i in uploaded_files:
data = extract_data(i)
df.append(data)

JD_embeddings, resume_embeddings = get_doc2vec_embeddings(JD, df)
cos = cosine(resume_embeddings, JD_embeddings)
if JD_embeddings is not None and resume_embeddings is not None:
cos = cosine(resume_embeddings, JD_embeddings)
#st.write("Cosine similarity is: ", cos)
return cos
Binary file modified __pycache__/Models.cpython-37.pyc
Binary file not shown.
Binary file modified __pycache__/Resume_Scanner.cpython-37.pyc
Binary file not shown.
17 changes: 13 additions & 4 deletions application.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,26 @@

with tab1:
st.title("Applicant Tracking System")
uploaded_file = st.file_uploader('**Choose your resume.pdf file:** ', type="pdf")
st.write(uploaded_file)
uploaded_files = st.file_uploader('**Choose your resume.pdf file:** ', type="pdf", accept_multiple_files = True)
#st.write(uploaded_files)
st.write("")
JD = st.text_area("**Enter the job description:**")
comp_pressed = st.button("Compare!")
if comp_pressed:
score = compare(uploaded_file, JD, flag)
#st.write(uploaded_files[0].name)
score = compare(uploaded_files, JD, flag)

with tab2:
st.header("Results")
my_dict = {}
if comp_pressed:
st.write("Cosine similarity is: ", score)
for i in range(len(score)):
my_dict[uploaded_files[i].name] = score[i]
print(my_dict)
sorted_dict = dict(sorted(my_dict.items()))
print(sorted_dict)
for i in sorted_dict.items():
with st.expander(str(i[0])):
st.write("Score is: ", i[1])
else:
st.write("#### Throw in some Resumes to see the score :)")

0 comments on commit f1bca79

Please sign in to comment.