Skip to content

Commit

Permalink
feat: ruby
Browse files Browse the repository at this point in the history
Signed-off-by: yihong0618 <zouzou0208@gmail.com>
  • Loading branch information
yihong0618 committed Dec 31, 2023
1 parent dce68bc commit 7b87026
Show file tree
Hide file tree
Showing 7 changed files with 552 additions and 268 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ epubhv tests/test_epub # will generate all epub files to epub-v

# you can specify the punctuation style
epubhv e.epub --convert s2t --punctuation auto
# you can add `ruby` for Japanese(furigana) and Chinese(pinyin)
epubhv e.epub --h --ruby
```

## Contribution
Expand All @@ -54,6 +56,7 @@ pdm run all
- @[jiak94](https://github.com/jiak94) support OpenCC
- @[OverflowCat ](https://github.com/OverflowCat) add punctuation styles.
- @[jt-wang](https://github.com/jt-wang) Type and PDM!
- [furigana4epub](https://github.com/Mumumu4/furigana4epub)

## Similar projects

Expand Down
17 changes: 14 additions & 3 deletions epubhv/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ class Options:
h: bool
convert: str
punctuation: str
ruby: bool


def main() -> None:
Expand All @@ -21,13 +22,19 @@ def main() -> None:
"--v",
dest="v",
action="store_true",
help="change all the epub files to vertical",
help="change all the epub files to vertical.",
)
parser.add_argument(
"--h",
dest="h",
action="store_true",
help="change all the epub files to hortical",
help="change all the epub files to hortical.",
)
parser.add_argument(
"--ruby",
dest="ruby",
action="store_true",
help="Ruby it for Chinese and Japanese.",
)

parser.add_argument(
Expand Down Expand Up @@ -89,6 +96,7 @@ def main() -> None:
h=raw_args.h,
convert=raw_args.convert,
punctuation=raw_args.punctuation,
ruby=raw_args.ruby,
)

epub_files = Path(options.epub)
Expand All @@ -109,13 +117,16 @@ def main() -> None:
file_path=f,
convert_to=options.convert,
convert_punctuation=options.punctuation,
need_ruby=options.ruby,
)
epubhv.run(method=method)
except Exception as e:
print(f"{str(f)} {method} is failed by {str(e)}")
else:
print(f"{str(epub_files)} is {method}")
epubhv: EPUBHV = EPUBHV(file_path=epub_files, convert_to=options.convert)
epubhv: EPUBHV = EPUBHV(
file_path=epub_files, convert_to=options.convert, need_ruby=options.ruby
)
epubhv.run(method=method)
else:
raise Exception("Please make sure it is a dir contains epub or is a epub file.")
Expand Down
124 changes: 76 additions & 48 deletions epubhv/epubhv.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,23 @@
"""
Follow these steps to change epub books to vertical or horizontal.
"""
import logging
import os
import shutil
import zipfile
from collections import defaultdict
from pathlib import Path
from typing import List, Dict, Optional
from typing import Dict, List, Optional

import cssutils
import opencc
from bs4 import BeautifulSoup as bs
from bs4 import NavigableString, PageElement, ResultSet, Tag
from cssutils import CSSParser
from cssutils.css import CSSStyleSheet
import opencc
from bs4 import BeautifulSoup as bs, NavigableString, PageElement, ResultSet, Tag

from epubhv.punctuation import Punctuation
from epubhv.yomituki import RubySoup, string_containers

cssutils.log.setLevel(logging.CRITICAL) # type: ignore

Expand Down Expand Up @@ -68,10 +69,14 @@ def __init__(
file_path: Path,
convert_to: Optional[str] = None,
convert_punctuation: Optional[str] = "auto",
need_ruby: bool = False,
) -> None:
# declare instance fields
self.epub_file: Path
self.has_css_file: bool = False
# for language ruby
self.need_ruby: bool = need_ruby
self.ruby_language = None
self.files_dict: Dict[str, List[Path]] = {}
self.book_path: Path
self.book_name: str
Expand Down Expand Up @@ -135,18 +140,31 @@ def make_epub_values(self) -> None:
self.opf_file = opf_files[0]
self.opf_dir = self.opf_file.parent.absolute()

def __make_ruby_language(self, soup):
if self.need_ruby:
# if we need ruby we need to find the ruby language
languages = soup.find("dc:language")
if languages:
self.ruby_language = languages.contents[0]
else:
print(
"There's no language meta data in meta file, we use Japanese as default. we can not ruby it"
)
self.need_ruby = False

def change_epub_to_vertical(self) -> None:
"""
steps:
1. check if have CSS files
2. check the epub spine `page-progression-direction` add to it
3. check `primary-writing-mode` in opf file's meta, if have change it to vertical-rl, if not add it.
4. if we have add CSS files we need to check if have `html` attribute
3. check `primary-writing-mode` in opf file's meta, if have changed it to vertical-rl, if not add it.
4. if we have added CSS files we need to check if have `html` attribute
5. if have `html` attribute add vertical-rl to it
6. if have not `html` we add it
7. if we do not have css file, we add one with html `vertical-rl` and change all the html to add the css files
"""
soup: bs = load_opf_meta_data(self.opf_file)
self.__make_ruby_language(soup)
# change it to rtl -> right to left
spine: Optional[Tag | NavigableString] = soup.find("spine")
assert spine is not None
Expand Down Expand Up @@ -236,6 +254,7 @@ def change_epub_to_horizontal(self) -> None:
4. check all css files and remove all "writing-mode", "-webkit-writing-mode", "-epub-writing-mode" to make it default that is horizontal
"""
soup: bs = load_opf_meta_data(self.opf_file)
self.__make_ruby_language(soup)
# change it to ltr -> left to right
spine: Optional[Tag | NavigableString] = soup.find("spine")
assert spine is not None
Expand Down Expand Up @@ -271,7 +290,7 @@ def change_epub_to_horizontal(self) -> None:
file.write(css_style) # type: ignore

def convert(self, method: str = "to_vertical") -> None:
if self.converter is None:
if self.converter is None and not self.need_ruby:
return

html_file: Path
Expand All @@ -283,55 +302,64 @@ def convert(self, method: str = "to_vertical") -> None:
with open(html_file, "r", encoding="utf-8", errors="ignore") as f:
content: str = f.read()
soup: bs = bs(content, "html.parser")

html_element: Optional[Tag | NavigableString] = soup.find("html")
assert html_element is not None
text_elements: ResultSet[PageElement] = html_element.find_all(string=True) # type: ignore

element: Tag
for element in text_elements: # type: ignore
old_text = element.string
if old_text is not None:
new_text = self.converter.convert(old_text) # type: ignore
punc = self.convert_punctuation
if punc != "none":
if punc == "auto":
if self.convert_to is None:
punc = "s2t" if method == "to_vertical" else "t2t"
# default: convert “‘’” to 「『』」 in vertical mode,
# but not to “‘’” in horizontal mode
else:
punc = self.convert_to
source, target = punc.split("2")
punc_converter = Punctuation()
new_text = punc_converter.convert( # type: ignore
new_text,
horizontal=method == "to_horizontal",
source_locale=punc_converter.map_locale(source), # type: ignore
target_locale=punc_converter.map_locale(target), # type: ignore
)
element.string.replace_with(new_text) # type: ignore
html_element.replace_with(html_element)

with open(html_file, "w", encoding="utf-8") as file:
html_element.replace_with(html_element)

with open(html_file, "w", encoding="utf-8", errors="ignore") as file:
file.write(soup.prettify())
if self.converter:
html_element: Optional[Tag | NavigableString] = soup.find("html")
assert html_element is not None
text_elements: ResultSet[PageElement] = html_element.find_all(string=True) # type: ignore

element: Tag
for element in text_elements: # type: ignore
old_text = element.string
if old_text is not None:
new_text = self.converter.convert(old_text) # type: ignore
punc = self.convert_punctuation
if punc != "none":
if punc == "auto":
if self.convert_to is None:
punc = "s2t" if method == "to_vertical" else "t2t"
# default: convert “‘’” to 「『』」 in vertical mode,
# but not to “‘’” in horizontal mode
else:
punc = self.convert_to
source, target = punc.split("2")
punc_converter = Punctuation()
new_text = punc_converter.convert( # type: ignore
new_text,
horizontal=method == "to_horizontal",
source_locale=punc_converter.map_locale(source), # type: ignore
target_locale=punc_converter.map_locale(target), # type: ignore
)
element.string.replace_with(new_text) # type: ignore
html_element.replace_with(html_element)

with open(html_file, "w", encoding="utf-8") as file:
html_element.replace_with(html_element)

with open(html_file, "w", encoding="utf-8", errors="ignore") as file:
file.write(soup.prettify())
if self.need_ruby:
ruby_soup = bs(
content, "html.parser", string_containers=string_containers
)
# TODO fix this maybe support unruby
r = RubySoup(self.ruby_language, True)
r.ruby_soup(ruby_soup.body)
with open(html_file, "w", encoding="utf-8", errors="ignore") as file:
file.write(ruby_soup.prettify())

def pack(self, method: str = "to_vertical") -> None:
lang: str = "original"
if self.convert_to is not None:
lang = self.convert_to
if self.need_ruby:
lang = f"{lang}-ruby"
if method == "to_vertical":
book_name_v: str = f"{self.book_name}-v-{lang}.epub"
book_name: str = f"{self.book_name}-v-{lang}.epub"
else:
book_name_v: str = f"{self.book_name}-h-{lang}.epub"
book_name: str = f"{self.book_name}-h-{lang}.epub"

shutil.make_archive(
base_name=book_name_v, format="zip", root_dir=self.book_path
)
os.rename(src=book_name_v + ".zip", dst=book_name_v)
shutil.make_archive(base_name=book_name, format="zip", root_dir=self.book_path)
os.rename(src=book_name + ".zip", dst=book_name)
shutil.rmtree(self.book_path)

def run(self, method: str = "to_vertical") -> None:
Expand Down
Loading

0 comments on commit 7b87026

Please sign in to comment.