-
Notifications
You must be signed in to change notification settings - Fork 0
/
Trending.py
179 lines (149 loc) · 5.19 KB
/
Trending.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
import streamlit as st
import pandas as pd
import numpy as np
from collections import Counter
import re
# from nltk.util import ngrams
# import nltk
import os
#changes
# Same Work as Glob Glob. List CSV from directory CSV Files
csv_folder_path = "CSV Files"
csv_files = []
for file in os.listdir(csv_folder_path):
if file.endswith(".csv"):
csv_files.append(file)
# Display a dropdown to select a CSV files
selected_csv = st.selectbox("Select a CSV file", csv_files)
df2 = pd.read_csv(os.path.join(csv_folder_path, selected_csv))
df = pd.read_csv(os.path.join(csv_folder_path, selected_csv))
#DF analysis
df["Sum"] = round(df2.iloc[:, -12:].sum(axis=1),2)
df["6-Months"] = round(df2.iloc[:, -6:].sum(axis=1) - df2.iloc[:, -12:-6].sum(axis=1),2)
df["3-Months"] = round(df2.iloc[:, -3:].sum(axis=1) - df2.iloc[:, -6:-3].sum(axis=1),2)
df["1-Month"] = round(df2.iloc[:, -1:].sum(axis=1) - df2.iloc[:, -2:-1].sum(axis=1),2)
# st.write(df)
keywords = df['Month']
all_keywords = ' '.join(keywords)
#r is raw string. \w+ --> w meands alphanumeric + underscores and plus means more than one
# this will return a list
words = re.findall(r'\w+', all_keywords) #returns a list
for i in range(len(words)):
words[i] = words[i].lower()
# Counting the frequency
word_counts = Counter(words) #returns a dict
total_words = sum(word_counts.values())
# Calculate the frequency as a percentage and sort
word_freq_percentage = {}
for word, count in word_counts.items():
word_freq_percentage[word] = (count / total_words) * 100
# Sorted - sorted(iterable, key=None, reverse=False)
# key is for how to sort, lamba recieve the (key,value) and returns item[1] - value pair for sorting
sorted_word_freq_percentage = {}
sorted_items = sorted(word_freq_percentage.items(), key=lambda item: item[1], reverse=True)
for key, value in sorted_items:
sorted_word_freq_percentage[key] = value
# Top 10 occured Keywords
top_keywords = list(sorted_word_freq_percentage.keys())[:20]
st.header("Keyword Filter")
selected_keyword2 = st.text_input("Enter Keyword")
# Create checkboxes for each keyword and their frequency
st.sidebar.header("Keyword Filter Checkbox")
checkboxes = {}
for keyword in top_keywords:
selected = st.sidebar.checkbox(f"{keyword} ({sorted_word_freq_percentage[keyword]:.2f}%)", value=False)
checkboxes[keyword] = selected
# st.write("checking chechboxes", checkboxes)
# print("selected exits:", selected)
# Filter the data based on the selected keyword
selected_keyword = []
for keyword, selected in checkboxes.items():
if selected: #checboxes dictionary contains all top keywords with true or false in front of them
selected_keyword.append(keyword)
#Filter from checkbox
# Display the selected keyword and filtered data
st.write(f"Displaying data for keyword: {', '.join(selected_keyword)}")
if "All" in selected_keyword:
filtered_data = df
else:
#all returns true if all the conditions inside it are True.
# text is each element in Month Column
filtered_data = df[df['Month'].apply(lambda text: all(keyword.lower() in text.lower() for keyword in selected_keyword))]
# Apply keyword filter to the DataFrame
df = filtered_data
#Filter from Search Bar
df = df[df["Month"].str.contains(selected_keyword2, case=False, na=False)]
df_12M = df.iloc[:,-16:-4]
df_Keywords = df.iloc[:, 1:2]
df_1M = df.iloc[:, -1:]
df_Sum = df.iloc[:, -4:-3]
df_6M = df.iloc[:, -3:-2]
df_3M = df.iloc[:, -2:-1]
def df2arr2(df):
arr = df.to_numpy()
list1 = []
for i in arr:
list1.append(i)
return list1
def df2arr(df):
arr = df.to_numpy()
list1 = []
for i in arr:
for j in i:
list1.append(j)
return list1
arr_12M, arr_Keywords, arr_Sum, arr_6M, arr_3M, arr_1M = [],[],[],[],[],[]
arr_12M = df2arr2(df_12M)
arr_6M = df2arr(df_6M)
arr_3M = df2arr(df_3M)
arr_1M = df2arr(df_1M)
arr_Sum = df2arr(df_Sum)
arr_Keywords = df2arr(df_Keywords)
data_df = pd.DataFrame(
{
"Keywords": arr_Keywords,
"12M": arr_12M,
"6M": arr_6M,
"3M": arr_3M,
"1M": arr_1M,
"Sum": arr_Sum,
}
)
#Handling No data Error
try:
st.data_editor(
data_df,
column_config={
"Keywords": st.column_config.TextColumn(
"Keywords", help="Related Keywords", max_chars=100
),
"12M": st.column_config.BarChartColumn(
"12M",
help="Trending in the last 12 months",
y_min=0,
y_max=100,
),
"6M": st.column_config.NumberColumn(
"6M",
help="Trending in the last 6 months",
format="%.2f",
),
"3M": st.column_config.NumberColumn(
"3M",
help="Trending in the last 3 months",
format="%.2f",
),
"1M": st.column_config.NumberColumn(
"1M",
help="Trending in the last 1 month",
format="%.2f",
),
"Sum": st.column_config.NumberColumn(
"Sum", help="Most Searched Trend", format="%.2f"
),
},
hide_index=True,
width=1000, height=1000,
)
except:
st.warning("No data available.")