Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 27 additions & 47 deletions bin/pythainlp
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,51 +1,31 @@
#!python3
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import argparse
from pythainlp import __version__
parser = argparse.ArgumentParser()
parser.add_argument("-t", "--text", default=None, help="text", type=str)
parser.add_argument("-seg", "--segment", help="word segment", action="store_true")
parser.add_argument("-c", "--corpus", help="mange corpus", action="store_true")
parser.add_argument("-pos", "--postag", help="postag", action="store_true")
parser.add_argument("-soundex", "--soundex", help="soundex", default=None)
parser.add_argument("-e", "--engine", default="newmm", help="the engine", type=str)
parser.add_argument("-pos-e", "--postag_engine", default="perceptron", help="the engine for word tokenize", type=str)
parser.add_argument("-pos-c", "--postag_corpus", default="orchid", help="corpus for postag", type=str)
args = parser.parse_args()

if args.corpus:
from pythainlp.corpus import *
print("PyThaiNLP Corpus")
temp=""
while temp!="exit":
print("\n1. Install\n2. Remove\n3. Update\n4. Exit\n")
temp=input("Choose 1, 2, 3, or 4: ")
if temp=="1":
name=input("Corpus name:")
download(name)
elif temp=="2":
name=input("Corpus name:")
remove(name)
elif temp=="3":
name=input("Corpus name:")
download(name)
elif temp=="4":
break
else:
print("Choose 1, 2, 3, or 4:")
elif args.text!=None:
from pythainlp.tokenize import word_tokenize
tokens=word_tokenize(args.text, engine=args.engine)
if args.segment:
print("|".join(tokens))
elif args.postag:
from pythainlp.tag import pos_tag
print("\t".join([i[0]+"/"+i[1] for i in pos_tag(tokens, engine=args.postag_engine, corpus=args.postag_corpus)]))
elif args.soundex!=None:
from pythainlp.soundex import soundex
if args.engine=="newmm":
args.engine="lk82"
print(soundex(args.soundex, engine=args.engine))
import sys

from pythainlp import cli


parser = argparse.ArgumentParser(
usage="pythainlp namespace command [options]"
)

parser.add_argument(
"namespace",
type=str,
default="",
nargs="?",
help="[%s]" % "|".join(cli.available_namespaces)
)

args = parser.parse_args(sys.argv[1:2])

cli.exit_if_empty(args.namespace, parser)

if hasattr(cli, args.namespace):
namespace = getattr(cli, args.namespace)
namespace.App(sys.argv)
else:
print(f"PyThaiNLP {__version__}")
print(f"Namespace not available: {args.namespace}\nPlease run with --help for alternatives")

25 changes: 25 additions & 0 deletions pythainlp/cli/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import sys

from . import corpus
from . import tokenization
from . import soundex
from . import tagging

available_namespaces = sorted(['corpus', 'tokenization', 'soundex', 'tagging'])

cli_name = "pythainlp"


def make_usage(s):
prog = f"{cli_name} {s}"

return dict(
prog=prog,
usage="%(prog)s command [options]"
)


def exit_if_empty(d, parser):
if not d:
parser.print_help()
sys.exit(0)
40 changes: 40 additions & 0 deletions pythainlp/cli/corpus.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import argparse

from pythainlp import corpus, cli


class App:
def __init__(self, argv):
parser = argparse.ArgumentParser(**cli.make_usage("corpus"))

parser.add_argument(
"--name",
type=str,
help="corpus's name",
)

parser.add_argument(
"command",
type=str,
default="",
nargs="?",
help="[download|remove]"
)

args = parser.parse_args(argv[2:])

cli.exit_if_empty(args.command, parser)
command = args.command

if hasattr(App, command):
getattr(App, command)(args)
else:
print("No command available: %s" % command)

@staticmethod
def download(args):
corpus.download(args.name)

@staticmethod
def remove(args):
corpus.remove(args.name)
29 changes: 29 additions & 0 deletions pythainlp/cli/soundex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import argparse

from pythainlp import cli
from pythainlp.soundex import soundex


class App:

def __init__(self, argv):
parser = argparse.ArgumentParser("sounddex")
parser.add_argument(
"--text",
type=str,
help="text",
)

parser.add_argument(
"--engine",
type=str,
help="[udom83|lk82|metasound] (default: udom83)",
default="udom83"
)

args = parser.parse_args(argv[2:])

cli.exit_if_empty(args.text, parser)

sx = soundex(args.text, engine=args.engine)
print(sx)
82 changes: 82 additions & 0 deletions pythainlp/cli/tagging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import argparse

from pythainlp import cli
from pythainlp.tag import pos_tag


class SubAppBase:
def __init__(self, name, argv):
parser = argparse.ArgumentParser(name)
parser.add_argument(
"--text",
type=str,
help="input text",
)

parser.add_argument(
"--engine",
type=str,
help="default: %s" % self.default_engine,
default=self.default_engine
)

parser.add_argument(
"--corpus",
type=str,
help="default: %s" % self.default_corpus,
)

parser.add_argument(
'--sep',
type=str,
help="default: %s" % self.default_sep,
default=self.default_sep
)

args = parser.parse_args(argv)

print(f"Using engine={args.engine}")

self.args = args

result = self.run(
args.text.split(args.sep), engine=args.engine, corpus=args.corpus
)

result_str = map(lambda x: "%s/%s" % x, result)

print(" ".join(result_str))


class POSTaggingApp(SubAppBase):
def __init__(self, *args, **kwargs):

self.default_engine = "perceptron"
self.default_corpus = "orchid"
self.default_sep = "|"
self.run = pos_tag

super().__init__(*args, **kwargs)


class App:
def __init__(self, argv):
parser = argparse.ArgumentParser(**cli.make_usage("tagging"))
parser.add_argument(
"command",
type=str,
nargs="?",
help="[pos]"
)

args = parser.parse_args(argv[2:3])
command = args.command

cli.exit_if_empty(args.command, parser)

argv = argv[3:]

if command == "pos":
POSTaggingApp("Part-of-Speech tagging", argv)
else:
raise ValueError(f"no command:{subcommand}")
76 changes: 76 additions & 0 deletions pythainlp/cli/tokenization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import argparse

from pythainlp import cli
from pythainlp.tokenize import word_tokenize, syllable_tokenize


class SubAppBase:
def __init__(self, name, argv):
parser = argparse.ArgumentParser(
**cli.make_usage("tokenization " + name)
)
parser.add_argument(
"--text",
type=str,
help="input text",
)

parser.add_argument(
"--engine",
type=str,
help="default: %s" % self.default_engine,
default=self.default_engine
)

args = parser.parse_args(argv)

self.args = args

cli.exit_if_empty(args.text, parser)

print(f"Using engine={args.engine}")
result = self.run(args.text, engine=args.engine)
print(self.separator.join(result))


class WordTokenizationApp(SubAppBase):
def __init__(self, *args, **kwargs):

self.default_engine = "newmm"
self.separator = "|"
self.run = word_tokenize

super().__init__(*args, **kwargs)


class SyllableTokenizationApp(SubAppBase):
def __init__(self, *args, **kwargs):

self.default_engine = "ssg"
self.separator = "~"
self.run = syllable_tokenize

super().__init__(*args, **kwargs)


class App:
def __init__(self, argv):
parser = argparse.ArgumentParser(**cli.make_usage("tokenization"))
parser.add_argument(
"command",
type=str,
nargs="?",
help="[word|syllable]"
)

args = parser.parse_args(argv[2:3])
command = args.command

cli.exit_if_empty(command, parser)

argv = argv[3:]

if command == "word":
WordTokenizationApp("word", argv)
elif command == "syllable":
SyllableTokenizationApp("syllable", argv)