Skip to content

Commit 58d4d00

Browse files
authored
Merge pull request #251 from PyThaiNLP/cli-improvement
Better CLI
2 parents 365f6f8 + 35e5766 commit 58d4d00

File tree

6 files changed

+279
-47
lines changed

6 files changed

+279
-47
lines changed

bin/pythainlp

100644100755
Lines changed: 27 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,31 @@
1-
#!python3
1+
#!/usr/bin/env python
22
# -*- coding: utf-8 -*-
33

44
import argparse
5-
from pythainlp import __version__
6-
parser = argparse.ArgumentParser()
7-
parser.add_argument("-t", "--text", default=None, help="text", type=str)
8-
parser.add_argument("-seg", "--segment", help="word segment", action="store_true")
9-
parser.add_argument("-c", "--corpus", help="mange corpus", action="store_true")
10-
parser.add_argument("-pos", "--postag", help="postag", action="store_true")
11-
parser.add_argument("-soundex", "--soundex", help="soundex", default=None)
12-
parser.add_argument("-e", "--engine", default="newmm", help="the engine", type=str)
13-
parser.add_argument("-pos-e", "--postag_engine", default="perceptron", help="the engine for word tokenize", type=str)
14-
parser.add_argument("-pos-c", "--postag_corpus", default="orchid", help="corpus for postag", type=str)
15-
args = parser.parse_args()
16-
17-
if args.corpus:
18-
from pythainlp.corpus import *
19-
print("PyThaiNLP Corpus")
20-
temp=""
21-
while temp!="exit":
22-
print("\n1. Install\n2. Remove\n3. Update\n4. Exit\n")
23-
temp=input("Choose 1, 2, 3, or 4: ")
24-
if temp=="1":
25-
name=input("Corpus name:")
26-
download(name)
27-
elif temp=="2":
28-
name=input("Corpus name:")
29-
remove(name)
30-
elif temp=="3":
31-
name=input("Corpus name:")
32-
download(name)
33-
elif temp=="4":
34-
break
35-
else:
36-
print("Choose 1, 2, 3, or 4:")
37-
elif args.text!=None:
38-
from pythainlp.tokenize import word_tokenize
39-
tokens=word_tokenize(args.text, engine=args.engine)
40-
if args.segment:
41-
print("|".join(tokens))
42-
elif args.postag:
43-
from pythainlp.tag import pos_tag
44-
print("\t".join([i[0]+"/"+i[1] for i in pos_tag(tokens, engine=args.postag_engine, corpus=args.postag_corpus)]))
45-
elif args.soundex!=None:
46-
from pythainlp.soundex import soundex
47-
if args.engine=="newmm":
48-
args.engine="lk82"
49-
print(soundex(args.soundex, engine=args.engine))
5+
import sys
6+
7+
from pythainlp import cli
8+
9+
10+
parser = argparse.ArgumentParser(
11+
usage="pythainlp namespace command [options]"
12+
)
13+
14+
parser.add_argument(
15+
"namespace",
16+
type=str,
17+
default="",
18+
nargs="?",
19+
help="[%s]" % "|".join(cli.available_namespaces)
20+
)
21+
22+
args = parser.parse_args(sys.argv[1:2])
23+
24+
cli.exit_if_empty(args.namespace, parser)
25+
26+
if hasattr(cli, args.namespace):
27+
namespace = getattr(cli, args.namespace)
28+
namespace.App(sys.argv)
5029
else:
51-
print(f"PyThaiNLP {__version__}")
30+
print(f"Namespace not available: {args.namespace}\nPlease run with --help for alternatives")
31+

pythainlp/cli/__init__.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import sys
2+
3+
from . import corpus
4+
from . import tokenization
5+
from . import soundex
6+
from . import tagging
7+
8+
available_namespaces = sorted(['corpus', 'tokenization', 'soundex', 'tagging'])
9+
10+
cli_name = "pythainlp"
11+
12+
13+
def make_usage(s):
14+
prog = f"{cli_name} {s}"
15+
16+
return dict(
17+
prog=prog,
18+
usage="%(prog)s command [options]"
19+
)
20+
21+
22+
def exit_if_empty(d, parser):
23+
if not d:
24+
parser.print_help()
25+
sys.exit(0)

pythainlp/cli/corpus.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import argparse
2+
3+
from pythainlp import corpus, cli
4+
5+
6+
class App:
7+
def __init__(self, argv):
8+
parser = argparse.ArgumentParser(**cli.make_usage("corpus"))
9+
10+
parser.add_argument(
11+
"--name",
12+
type=str,
13+
help="corpus's name",
14+
)
15+
16+
parser.add_argument(
17+
"command",
18+
type=str,
19+
default="",
20+
nargs="?",
21+
help="[download|remove]"
22+
)
23+
24+
args = parser.parse_args(argv[2:])
25+
26+
cli.exit_if_empty(args.command, parser)
27+
command = args.command
28+
29+
if hasattr(App, command):
30+
getattr(App, command)(args)
31+
else:
32+
print("No command available: %s" % command)
33+
34+
@staticmethod
35+
def download(args):
36+
corpus.download(args.name)
37+
38+
@staticmethod
39+
def remove(args):
40+
corpus.remove(args.name)

pythainlp/cli/soundex.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import argparse
2+
3+
from pythainlp import cli
4+
from pythainlp.soundex import soundex
5+
6+
7+
class App:
8+
9+
def __init__(self, argv):
10+
parser = argparse.ArgumentParser("sounddex")
11+
parser.add_argument(
12+
"--text",
13+
type=str,
14+
help="text",
15+
)
16+
17+
parser.add_argument(
18+
"--engine",
19+
type=str,
20+
help="[udom83|lk82|metasound] (default: udom83)",
21+
default="udom83"
22+
)
23+
24+
args = parser.parse_args(argv[2:])
25+
26+
cli.exit_if_empty(args.text, parser)
27+
28+
sx = soundex(args.text, engine=args.engine)
29+
print(sx)

pythainlp/cli/tagging.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
import argparse
2+
3+
from pythainlp import cli
4+
from pythainlp.tag import pos_tag
5+
6+
7+
class SubAppBase:
8+
def __init__(self, name, argv):
9+
parser = argparse.ArgumentParser(name)
10+
parser.add_argument(
11+
"--text",
12+
type=str,
13+
help="input text",
14+
)
15+
16+
parser.add_argument(
17+
"--engine",
18+
type=str,
19+
help="default: %s" % self.default_engine,
20+
default=self.default_engine
21+
)
22+
23+
parser.add_argument(
24+
"--corpus",
25+
type=str,
26+
help="default: %s" % self.default_corpus,
27+
)
28+
29+
parser.add_argument(
30+
'--sep',
31+
type=str,
32+
help="default: %s" % self.default_sep,
33+
default=self.default_sep
34+
)
35+
36+
args = parser.parse_args(argv)
37+
38+
print(f"Using engine={args.engine}")
39+
40+
self.args = args
41+
42+
result = self.run(
43+
args.text.split(args.sep), engine=args.engine, corpus=args.corpus
44+
)
45+
46+
result_str = map(lambda x: "%s/%s" % x, result)
47+
48+
print(" ".join(result_str))
49+
50+
51+
class POSTaggingApp(SubAppBase):
52+
def __init__(self, *args, **kwargs):
53+
54+
self.default_engine = "perceptron"
55+
self.default_corpus = "orchid"
56+
self.default_sep = "|"
57+
self.run = pos_tag
58+
59+
super().__init__(*args, **kwargs)
60+
61+
62+
class App:
63+
def __init__(self, argv):
64+
parser = argparse.ArgumentParser(**cli.make_usage("tagging"))
65+
parser.add_argument(
66+
"command",
67+
type=str,
68+
nargs="?",
69+
help="[pos]"
70+
)
71+
72+
args = parser.parse_args(argv[2:3])
73+
command = args.command
74+
75+
cli.exit_if_empty(args.command, parser)
76+
77+
argv = argv[3:]
78+
79+
if command == "pos":
80+
POSTaggingApp("Part-of-Speech tagging", argv)
81+
else:
82+
raise ValueError(f"no command:{subcommand}")

pythainlp/cli/tokenization.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
import argparse
2+
3+
from pythainlp import cli
4+
from pythainlp.tokenize import word_tokenize, syllable_tokenize
5+
6+
7+
class SubAppBase:
8+
def __init__(self, name, argv):
9+
parser = argparse.ArgumentParser(
10+
**cli.make_usage("tokenization " + name)
11+
)
12+
parser.add_argument(
13+
"--text",
14+
type=str,
15+
help="input text",
16+
)
17+
18+
parser.add_argument(
19+
"--engine",
20+
type=str,
21+
help="default: %s" % self.default_engine,
22+
default=self.default_engine
23+
)
24+
25+
args = parser.parse_args(argv)
26+
27+
self.args = args
28+
29+
cli.exit_if_empty(args.text, parser)
30+
31+
print(f"Using engine={args.engine}")
32+
result = self.run(args.text, engine=args.engine)
33+
print(self.separator.join(result))
34+
35+
36+
class WordTokenizationApp(SubAppBase):
37+
def __init__(self, *args, **kwargs):
38+
39+
self.default_engine = "newmm"
40+
self.separator = "|"
41+
self.run = word_tokenize
42+
43+
super().__init__(*args, **kwargs)
44+
45+
46+
class SyllableTokenizationApp(SubAppBase):
47+
def __init__(self, *args, **kwargs):
48+
49+
self.default_engine = "ssg"
50+
self.separator = "~"
51+
self.run = syllable_tokenize
52+
53+
super().__init__(*args, **kwargs)
54+
55+
56+
class App:
57+
def __init__(self, argv):
58+
parser = argparse.ArgumentParser(**cli.make_usage("tokenization"))
59+
parser.add_argument(
60+
"command",
61+
type=str,
62+
nargs="?",
63+
help="[word|syllable]"
64+
)
65+
66+
args = parser.parse_args(argv[2:3])
67+
command = args.command
68+
69+
cli.exit_if_empty(command, parser)
70+
71+
argv = argv[3:]
72+
73+
if command == "word":
74+
WordTokenizationApp("word", argv)
75+
elif command == "syllable":
76+
SyllableTokenizationApp("syllable", argv)

0 commit comments

Comments
 (0)