-
Notifications
You must be signed in to change notification settings - Fork 0
/
simpletex.py
169 lines (148 loc) · 5.38 KB
/
simpletex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
import streamlit as st
from sklearn.feature_extraction.text import TfidfVectorizer
from PIL import Image
import base64
import toml
# Load the TOML file
config = toml.load('config.toml')
# Now you can access the values in the TOML file as a dictionary
# For example, to get a value of a key 'key_name' in the TOML file
value = config['theme']
st.set_page_config(page_title="SimpleTex: Your Simple Keyword Extractor")
def extract_keywords(text):
# Create a TF-IDF vectorizer
tfidf = TfidfVectorizer(stop_words='english')
# Fit and transform the text with the vectorizer
tfidf_matrix = tfidf.fit_transform([text])
# Get feature names to use as keywords
feature_names = tfidf.get_feature_names_out()
# Extract scores
tfidf_scores = tfidf_matrix.toarray().flatten()
# Create a dictionary of keywords and their corresponding TF-IDF scores
keyword_scores = dict(zip(feature_names, tfidf_scores))
# Sort keywords by scores in descending order
sorted_keywords = sorted(keyword_scores.items(), key=lambda x: x[1], reverse=True)
# Select top 10 keywords
top_keywords = sorted_keywords[:10]
return top_keywords
def load_image(image_path):
with open(image_path, "rb") as image_file:
encoded_string = base64.b64encode(image_file.read()).decode()
return encoded_string
# Load the logo image
logo_path = "simpletex_logo.png"
logo_base64 = load_image(logo_path)
# Custom CSS to style the app
st.markdown(f"""
<style>
.block-container {{
padding-top: 2rem;
padding-bottom: 2rem;
padding-left: 2rem;
padding-right: 2rem;
background-color: #b6a081;
border-radius: 10px;
border: 1px solid #F0C775;
background: #B6A081;
backdrop-filter: blur(2px);
width: 80%;
margin: auto;
}}
.stApp {{
background-color: #284867;
display: flex;
justify-content: center;
align-items: center;
}}
.stTextArea textarea {{
background-color: #f5f5dc;
color: #333333;
font-size: 16px;
border: 2px solid #cccccc;
border-radius: 10px;
padding: 10px;
transition: border-color 0.3s;
}}
.stTextArea textarea:focus {{
border-color: #F0C775;
}}
.stButton button {{
background-color: #284867;
color: white;
font-size: 16px;
border-radius: 5px;
padding: 10px 20px;
transition: background-color 0.3s;
}}
.stButton button:active {{
background-color: #f0c775;
}}
.css-1aumxhk, .css-1v0mbdj, .css-1d391kg {{
margin-bottom: 20px;
}}
.center-logo {{
display: flex;
justify-content: center;
align-items: center;
margin-bottom: 20px;
}}
.logo-img {{
max-width: 200px;
}}
h1.title {{
margin-top: -30px;
text-align: center;
color: #4a4a4a;
}}
.expander-header {{
color: #4a4a4a !important;
}}
.expander-content {{
display: none; /* Hide expander content by default */
}}
</style>
<div class="center-logo">
<img class="logo-img" src="data:image/png;base64,{logo_base64}" />
</div>
<h1 class="title">Simple Keyword Extractor</h1>
""", unsafe_allow_html=True)
# Create two columns for layout of equal width
col1, col2 = st.columns(2)
# Left column for user input and instructions
with col1:
user_input = st.text_area("Enter text here", height=250)
submit_button = st.button("Submit")
if submit_button and user_input.strip():
st.write("You have submitted the text. Please wait for the results...")
with st.expander("How to Use This Tool", expanded=False):
st.markdown("""
- **Step 1:** Paste or type your text into the text area above.
- **Step 2:** Press Enter or click outside the text box to process the text.
- **Step 3:** View the extracted keywords and their importance scores below the text box.
- **Step 4:** If installed, view the optional word cloud visualizing keyword importance.
""", unsafe_allow_html=True)
# Right column for displaying results
with col2:
if user_input:
# Extract keywords
keywords = extract_keywords(user_input)
# Display the keywords
st.subheader('Extracted Keywords:')
col1, col2 = st.columns(2)
half = len(keywords) // 2
for i, (word, score) in enumerate(keywords):
if i < half:
col1.write(f"{word} (Score: {score:.2f})")
else:
col2.write(f"{word} (Score: {score:.2f})")
# Optional: Display a word cloud
try:
from wordcloud import WordCloud
import matplotlib.pyplot as plt
wordcloud = WordCloud(width=800, height=400, background_color='white').generate_from_frequencies(dict(keywords))
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
st.pyplot(plt)
except ImportError:
st.error("WordCloud module is not installed. Please run 'pip install wordcloud' to enable this feature. Thanks!")