-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit ce4ca83
Showing
22 changed files
with
1,537,042 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
# This workflow will install Python dependencies, run tests and lint with a single version of Python | ||
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python | ||
|
||
name: Python application | ||
|
||
on: | ||
push: | ||
branches: [ "main" ] | ||
pull_request: | ||
branches: [ "main" ] | ||
|
||
permissions: | ||
contents: read | ||
|
||
jobs: | ||
build: | ||
|
||
runs-on: ubuntu-latest | ||
|
||
steps: | ||
- uses: actions/checkout@v4 | ||
- name: Set up Python 3.10 | ||
uses: actions/setup-python@v3 | ||
with: | ||
python-version: "3.10" | ||
- name: Install dependencies | ||
run: | | ||
python -m pip install --upgrade pip | ||
pip install flake8 pytest | ||
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi | ||
- name: Lint with flake8 | ||
run: | | ||
# stop the build if there are Python syntax errors or undefined names | ||
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics | ||
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide | ||
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics | ||
#- name: Test with pytest | ||
#run: | | ||
#pytest |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
files |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
# [Tikvah](https://t.me/s/tikvahethiopia) Telegram channel analysis repo | ||
! this repo analysis is done for **learning purpose** |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
import json | ||
|
||
data_file_path = 'cleared_unwanted_keys.json' | ||
keys_to_remove_file_path = 'stop_words.json' | ||
output_file_path = 'final_filtered_data.json' | ||
|
||
with open(data_file_path, 'r', encoding='utf-8') as file: | ||
key_value_pairs = json.load(file) | ||
|
||
with open(keys_to_remove_file_path, 'r', encoding='utf-8') as file: | ||
keys_to_remove = json.load(file) | ||
|
||
|
||
key_value_dict = {k: v for k, v in key_value_pairs.items()} | ||
|
||
filtered_dict = {k: v for k, v in key_value_dict.items() if k not in keys_to_remove} | ||
|
||
filtered_key_value_pairs = [{k:v} for k, v in filtered_dict.items()] | ||
|
||
with open(output_file_path, 'w', encoding='utf-8') as file: | ||
json.dump(filtered_dict, file, ensure_ascii=False, indent=4) | ||
|
||
print(f"Filtered data saved to {output_file_path}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
import json | ||
import re | ||
|
||
file_path = 'word_dictionary.json' | ||
|
||
valid_keys_file = 'valid_keys.json' | ||
invalid_keys_file = 'invalid_keys.json' | ||
|
||
with open(file_path, 'r', encoding='utf-8') as file: | ||
data = json.load(file) | ||
|
||
symbols_pattern = re.compile(r'^[\W_]+$', re.UNICODE) # Matches only non-alphanumeric characters (symbols) | ||
numbers_pattern = re.compile(r'^\d+$') # Matches only numbers | ||
emoji_pattern = re.compile("[\U00010000-\U0010FFFF]", flags=re.UNICODE) # Matches emojis | ||
|
||
valid_keys = {} | ||
invalid_keys = {} | ||
|
||
def is_only_emoji(s): | ||
return all(emoji_pattern.match(c) for c in s) | ||
|
||
# Separate keys based on whether they are only symbols, only numbers, or only emojis | ||
for k, v in data.items(): | ||
if symbols_pattern.match(k) or numbers_pattern.match(k) or is_only_emoji(k): | ||
invalid_keys[k] = v | ||
else: | ||
valid_keys[k] = v | ||
|
||
# Save valid keys to a JSON file | ||
with open(valid_keys_file, 'w', encoding='utf-8') as file: | ||
json.dump(valid_keys, file, ensure_ascii=False, indent=4) | ||
|
||
# Save invalid keys to a JSON file | ||
with open(invalid_keys_file, 'w', encoding='utf-8') as file: | ||
json.dump(invalid_keys, file, ensure_ascii=False, indent=4) | ||
|
||
print(f"Valid keys saved to {valid_keys_file}") | ||
print(f"Invalid keys saved to {invalid_keys_file}") |
Oops, something went wrong.