Skip to content

Commit

Permalink
Pair char preprocessor added (#5)
Browse files Browse the repository at this point in the history
  • Loading branch information
hmhard authored Aug 12, 2024
1 parent fe9b844 commit 1741cd2
Show file tree
Hide file tree
Showing 22 changed files with 1,192,158 additions and 1,106,098 deletions.
5 changes: 3 additions & 2 deletions clear-non-alpha.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
import re

# file_path = 'outputs/word_pair_dictionary.json'
file_path = 'outputs/word_dictionary.json'

valid_keys_file = 'outputs/valid_keys.json'
Expand All @@ -17,11 +18,11 @@
invalid_keys = {}

def is_only_emoji(s):
return all(emoji_pattern.match(c) for c in s)
return all(emoji_pattern.search(c) for c in s)

# Separate keys based on whether they are only symbols, only numbers, or only emojis
for k, v in data.items():
if symbols_pattern.match(k) or numbers_pattern.match(k) or is_only_emoji(k):
if symbols_pattern.search(k) or numbers_pattern.search(k) or is_only_emoji(k):
invalid_keys[k] = v
else:
valid_keys[k] = v
Expand Down
41 changes: 26 additions & 15 deletions index.html
Original file line number Diff line number Diff line change
Expand Up @@ -4,32 +4,42 @@
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Tikvah Telegram Channel Too 500 words</title>
<title>Tikvah Telegram Channel Top used words</title>
<script src="https://cdn.tailwindcss.com"></script>

<style>
#chartdiv {
width: 100%;
height: 550px;
height: 580px;
}

#chartdiv-pair {
width: 100%;
height: 580px;
}
</style>
</head>

<body>

<div class=" text-2xl text-center text-pink-500 underline py-3"><a href="https://t.me/s/tikvahethiopia">Tikvah Telegram Channel</a> Most Used 500 Words Until 2024-08-07</div>
<div class=" text-2xl text-center text-pink-500 underline py-3"><a href="https://t.me/s/tikvahethiopia">Tikvah Telegram Channel</a> Most Used 300 Words Until 2024-08-07</div>


<section class="section">

<div id="chartdiv"></div>
<div class=" text-2xl text-center text-pink-500 underline py-7"> Most Used 250 Pair Words Until 2024-08-07</div>

<div id="chartdiv-pair"></div>



<div class="has-text-centered"></div>
</section>
<div class="text-center underline">
<a href="https://github.com/hmhard/tikvah-tg-channel-analysis">View on Github</a>
</div>
<script src="./top-500.js" type="text/javascript"></script>
<script src="./top-300.js" type="text/javascript"></script>
<script src="./top-pair-250.js" type="text/javascript"></script>
<!-- Load d3.js -->
<script src="https://code.jquery.com/jquery-3.3.1.slim.min.js" integrity="sha256-3edrmyuQ0w65f8gfBsqowzjJe2iM6n0nKciPUp8y+7E=" crossorigin="anonymous"></script>

Expand All @@ -43,39 +53,40 @@
tag: item,
value: data[item]
}));
var pair_datas = Object.keys(pair_data).map(item => ({
tag: item,
value: pair_data[item]
}));
var myWords = Object.keys(data);
var pairWords = Object.keys(pair_data);
$(window).on('load', function() {
loadText();

$('#redraw').on('click', function() {
loadText();
});
});

function loadText() {
drawWorldCloud(pairWords.join(' '), pair_datas, "chartdiv-pair");


drawWorldCloud(myWords.join(' '));
drawWorldCloud(myWords.join(' '), datas, "chartdiv");
}


function drawWorldCloud(sentence) {
function drawWorldCloud(sentence, datav, div) {

am4core.useTheme(am4themes_animated);

var chart = am4core.create("chartdiv", am4plugins_wordCloud.WordCloud);
var chart = am4core.create(div, am4plugins_wordCloud.WordCloud);
var series = chart.series.push(new am4plugins_wordCloud.WordCloudSeries());

series.accuracy = 4;
series.step = 15;
series.rotationThreshold = 0.7;
series.maxCount = 5000;
series.minWordLength = 2; // 最少頻度
series.minWordLength = 2;
series.labels.template.tooltipText = "{word}: {value}";
series.fontFamily = "'M PLUS 1p', sans-serif";
series.maxFontSize = am4core.percent(50);

series.data = datas
series.data = datav
series.dataFields.word = "tag";
series.dataFields.value = "value";

Expand Down
Loading

0 comments on commit 1741cd2

Please sign in to comment.