Skip to content

Commit

Permalink
switch to MeCab and fix asyncio use.
Browse files Browse the repository at this point in the history
Nagisa fails to install due to issues with the DyNET build
clab/dynet#1662
asyncio does not accept plain coroutines anymore
  • Loading branch information
tlaufkoetter committed May 7, 2023
1 parent 4f68de1 commit 83a580e
Showing 1 changed file with 5 additions and 6 deletions.
11 changes: 5 additions & 6 deletions src/segmentize_and_lookup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import sys
import regex as re
import nagisa
import MeCab
import requests
import asyncio as aio

Expand Down Expand Up @@ -50,18 +50,17 @@ async def get_meaning(dictionary, word, progress_bar):

async def main(meanings):
words = set()

wakati = MeCab.Tagger('-Owakati')
with open(sys.argv[1], 'r') as file:
content = file.read()
for word in filter(
words = {word for word in filter(
validate_word,
nagisa.wakati(content)):
words.add(word)
wakati.parse(content).split())}

print("Extracted {} words".format(len(words)))

progress_bar = ProgressBar(len(words))
coroutines = [get_meaning(meanings, word, progress_bar) for word in words]
coroutines = [aio.create_task(get_meaning(meanings, word, progress_bar)) for word in words]
await aio.wait(coroutines)
print("\nDone.")

Expand Down

0 comments on commit 83a580e

Please sign in to comment.