Adds more tests and refactors.

menganha · Nov 20, 2022 · 4fab9dd · 4fab9dd
1 parent 8031093
commit 4fab9dd
Show file tree

Hide file tree

Showing 6 changed files with 201 additions and 115 deletions.
diff --git a/pyblog/blog.py b/pyblog/blog.py
@@ -2,6 +2,7 @@
 import json
 import shutil
 import sys
+from collections.abc import Iterator
 from importlib import resources
 from pathlib import Path
 
@@ -20,7 +21,7 @@ class Blog:
     TAG_TEMPLATE = 'tag.html'
     ALL_TAGS_TEMPLATE = 'all_tags.html'
     INDEX_TEMPLATE = 'index.html'
-    CSS_FILE_NAME = 'style.css'
+    CSS_FILE_NAME = 'data/style.css'
     CONFIG_FILE_NAME = 'config.json'
     HOME_MAX_POSTS = 10
 
@@ -40,7 +41,7 @@ def __init__(self, main_path: Path):
         self.template_environment.globals.update({'current_year': f'{dt.date.today().year}'})
         self.config_path = main_path / self.CONFIG_FILE_NAME
 
-    def create(self, author: str = None, website_name: str = None):
+    def create(self):
         if self.is_pyblog():
             print(f'Error! Input path {self.main_path.resolve()} seems to contain another pyblog')
             sys.exit(1)
@@ -57,8 +58,7 @@ def create(self, author: str = None, website_name: str = None):
             shutil.copytree(data_directory, self.data_path)
 
         # Create config file. TODO: think of adding, e.g., a enum for better control
-        config = {'website_name': website_name if website_name else self.main_path.resolve().name,
-                  'author': author if author else ''}
+        config = {'website_name': self.main_path.resolve().name, 'author': ''}
         json_encoded = json.dumps(config)
         self.config_path.write_text(json_encoded)
 
@@ -77,6 +77,7 @@ def is_pyblog(self) -> bool:
             return False
 
     def build_home_page(self, posts: list[Post]):
+        # TODO!!!!!!!!!!!!!!!!!!!: Pass a container containing the website path to properly link it?
         index_template = self.template_environment.get_template(self.INDEX_TEMPLATE)
         index_html = index_template.render(latest_posts=posts)
         target_path = self.website_path / 'index.html'
@@ -97,22 +98,21 @@ def build_tag_pages(self, all_posts: list[Post]):
         target_path = self.website_path / f'tags.html'
         target_path.write_text(all_tags_html)
 
-    def _get_post_target_html_path(self, post_path: Path) -> Path:
-        return self.website_posts_path / post_path.parent.relative_to(self.posts_path) / f'{post_path.stem}.html'
+    def markdown_post_paths(self) -> Iterator[Path]:
+        return self.posts_path.rglob('*md')
 
-    def get_all_public_posts(self) -> list[Post]:
-        """ Retrieves and enriches all posts and sorts them by date """
-        all_public_posts = []
-        for post_path in self.posts_path.rglob('*md'):
-            target_path = self._get_post_target_html_path(post_path)
-            post = Post(post_path, target_path, self.website_path)
-            if post.is_public():
-                all_public_posts.append(post)
-        all_public_posts.sort(key=lambda x: x.date, reverse=True)
-        return all_public_posts
+    def orphan_target_paths(self) -> Iterator[Path]:
+        """ Returns the html paths of the current build that do not have a corresponding markdown path """
+        for target_path in self.website_posts_path.rglob('*.html'):
+            if not list(self.posts_path.rglob(f'{target_path.stem}.md')):
+                yield target_path
 
     def build_post(self, post: Post):
         post_template = self.template_environment.get_template(self.POST_TEMPLATE)
-        html_content = post.get_markdown_html()
-        post_html = post_template.render(post=post, content=html_content)
-        post._html_target_path.write_text(post_html)  # TODO: Change it to something more beautiful!
+        html_content = post.get_html()
+        html_page = post_template.render(post=post, content=html_content)
+        post.target_path.write_text(html_page)
+
+    def get_post_target_html_path(self, post_path: Path) -> Path:
+        """ Target paths are named with the same name of the input markdown file name """
+        return self.website_posts_path / post_path.parent.relative_to(self.posts_path) / f'{post_path.stem}.html'
diff --git a/pyblog/command_line.py b/pyblog/command_line.py
@@ -2,9 +2,14 @@
 import http.server
 import shutil
 import socketserver
+import sys
 from pathlib import Path
 
 from pyblog.blog import Blog
+from pyblog.post import Post
+
+DEFAULT_TEST_PORT = 9090
+DEFAULT_TEST_HOST = 'localhost'
 
 
 def parse_cli_arguments():
@@ -14,70 +19,84 @@ def parse_cli_arguments():
 
     parser_init = subparsers.add_parser('init', help='Creates a new pyblog website')
     parser_init.add_argument('path', help='Initializes all the relevant files for the website on the input path')
-    parser_init.add_argument('--name', help='Name of the website')  # ! remove them
-    parser_init.add_argument('--author', help='Author of the website')
 
     parser_build = subparsers.add_parser('build', help='Builds the website')
     parser_build.add_argument('--force', help='Force a clean rebuild of the entire website', action='store_true')
+
     subparsers.add_parser('test', help='Creates a local server to check the blog locally')
+
     return parser.parse_args()
 
 
+def init(path: Path):
+    pyblog = Blog(path.expanduser())
+    pyblog.create()
+    print(f' New Pyblog successfully created on {path}!')
+
+
+def build(force: bool):
+    pyblog = Blog(Path('.'))
+    if not pyblog.is_pyblog():
+        print('Error: The current path does not contain a pyblog')
+        return 1
+    else:
+        pyblog.load_config()
+        shutil.copy(pyblog.css_file_path, pyblog.website_path)
+        all_public_posts = []
+        needs_rebuild = False
+        for path in pyblog.markdown_post_paths():
+            target_path = pyblog.get_post_target_html_path(path)
+            post = Post(path, target_path)
+            all_public_posts.append(post)
+            if post.is_public() and (post.is_dirty(target_path) or force):
+                print(f'Processing post: {post.path}')
+                pyblog.build_post(post)
+                if not needs_rebuild:
+                    needs_rebuild = True
+
+        all_public_posts.sort(key=lambda x: x.date, reverse=True)
+
+        # Cleanup: If a post was deleted after it had been published, then we need to delete the corresponding html file.
+        for target_path in pyblog.orphan_target_paths():
+            target_path.unlink()
+
+        if needs_rebuild:
+            latest_posts = all_public_posts[:pyblog.HOME_MAX_POSTS]  # Maybe handle this within the pyblog instance
+            print(f'Building index...')
+            pyblog.build_home_page(latest_posts)
+            print(f'Building tag pages...')
+            pyblog.build_tag_pages(all_public_posts)
+        print(f'Done!')
+
+
+def serve():
+    pyblog = Blog(Path('.'))
+    if not pyblog.is_pyblog():
+        print('Error: The current path does not contain a pyblog')
+        return 1
+    else:
+        import functools
+        handler = functools.partial(http.server.SimpleHTTPRequestHandler, directory=pyblog.website_path)
+        with socketserver.TCPServer((DEFAULT_TEST_HOST, DEFAULT_TEST_PORT), handler) as httpd:
+            print(f'Test server running on: http://{DEFAULT_TEST_HOST}:{DEFAULT_TEST_PORT}')
+            try:
+                httpd.serve_forever()
+            except KeyboardInterrupt:
+                httpd.server_close()
+
+
 def execute():
     args = parse_cli_arguments()
 
     if args.command == 'init':
-
-        pyblog = Blog(Path(args.path).expanduser())
-        pyblog.create(args.name, args.author)
-        print(f' New Pyblog successfully created on {args.path}!')
+        init(args.path)
 
     elif args.command == 'build':
-
-        pyblog = Blog(Path('.'))
-        if not pyblog.is_pyblog():
-            print('Error: The current path does not contain a pyblog')
-            return 1
-        else:
-            pyblog.load_config()
-            shutil.copy(pyblog.css_file_path, pyblog.website_path)
-            all_public_posts = pyblog.get_all_public_posts()
-            # TODO:! Implemente smarter rebuild. if source is dirty or has been deleted. If no post is rebuild then do not rebuild the aggregate pages.
-            #   Also rebuild if the templates have changed
-            needs_rebuild = False
-            for post in all_public_posts:
-                if post.is_dirty() or args.force:
-                    print(f'Processing post: {post.path}')
-                    pyblog.build_post(post)
-                    if not needs_rebuild:
-                        needs_rebuild = True
-            if needs_rebuild:
-                latest_posts = all_public_posts[:pyblog.HOME_MAX_POSTS]  # Maybe handle this within the pyblog instance
-                print(f'Building index...')
-                pyblog.build_home_page(latest_posts)
-                print(f'Building tag pages...')
-                pyblog.build_tag_pages(all_public_posts)
-            print(f'Done!')
+        build(args.force)
 
     elif args.command == 'test':
-
-        pyblog = Blog(Path('.'))
-        if not pyblog.is_pyblog():
-            print('Error: The current path does not contain a pyblog')
-            return 1
-        else:
-            import functools
-
-            PORT = 9090
-            ADDRESS = 'localhost'
-            Handler = functools.partial(http.server.SimpleHTTPRequestHandler, directory=pyblog.website_path)
-            with socketserver.TCPServer((ADDRESS, PORT), Handler) as httpd:
-                print(f'Test server running on: http://{ADDRESS}:{PORT}')
-                try:
-                    httpd.serve_forever()
-                except KeyboardInterrupt:
-                    httpd.server_close()
+        serve()
 
 
 if __name__ == '__main__':
-    execute()
+    sys.exit(execute())
diff --git a/pyblog/data/templates/index.html b/pyblog/data/templates/index.html
@@ -7,7 +7,7 @@ <h2>{% block section_title %}Latest Posts{% endblock %}</h2>
 <section class="post">
     {% for post in latest_posts %}
     <div class="post_container">
-        <h2><a href="/{{ post.path }}">{{ post.title }}</a>. {{ post.date }}</h2>
+        <h2><a href="/{{ post.target_path }}">{{ post.title }}</a>. {{ post.date }}</h2>
     </div>
     {% endfor %}
 </section>

diff --git a/pyblog/post.py b/pyblog/post.py
@@ -15,21 +15,21 @@
 class Post:
     MANDATORY_LABELS = ['draft', 'date']
     DEFAULT_TAG = 'blog'
-    INVALID_LABELS = ['_metadata', 'markdown_file_path', '_html_target_path', 'title', 'path']
+    INVALID_LABELS = ['_metadata', 'target_path', 'source_path', 'title']
+
     TITLE_REGEXP = re.compile(r'^\s?#\s(.*)', flags=re.MULTILINE)
     METADATA_REGEXP = re.compile(r'^\s?(\w+):\s(.+)', flags=re.MULTILINE)
 
-    def __init__(self, markdown_file_path: Path, html_target_path: Path, website_absolute_path: Path):
+    def __init__(self, source_path: Path, target_path: Path):
         # self.raw_text = raw_text.strip()
-        self.markdown_file_path = markdown_file_path
-        self._html_target_path = html_target_path
-        self._metadata = self.parse_metadata(markdown_file_path)
-        self.path = html_target_path.relative_to(website_absolute_path)
+        self.source_path = source_path
+        self.target_path = target_path
+        self._metadata = self.parse_metadata()
 
-    def is_dirty(self) -> bool:
+    def is_dirty(self, target_path: Path) -> bool:
         """ Checks whether the post needs to be rebuilt """
-        file_mtime = self.markdown_file_path.stat().st_mtime
-        target_mtime = self._html_target_path.stat().st_mtime if self._html_target_path.exists() else 0
+        file_mtime = self.source_path.stat().st_mtime
+        target_mtime = target_path.stat().st_mtime if target_path.exists() else 0
         return file_mtime > target_mtime
 
     def is_public(self) -> bool:
@@ -38,8 +38,9 @@ def is_public(self) -> bool:
     def __getattr__(self, item):
         return self._metadata[item]
 
-    def get_markdown_html(self) -> str:
-        with self.markdown_file_path.open() as file:
+    def get_html(self) -> str:
+        """ transforms markdown to html """
+        with self.source_path.open() as file:
             raw_text = file.read()
         title = self._metadata['title']
         index = raw_text.find(title)
@@ -48,14 +49,13 @@ def get_markdown_html(self) -> str:
         markdown_text = raw_text[index + len(title):].strip()
         return markdown.markdown(markdown_text)
 
-    @staticmethod
-    def parse_metadata(path: Path) -> dict[str, str]:
+    def parse_metadata(self) -> dict[str, str]:
         """
         Gets all the labels like "label: value" at the beginning of the post and also retrieve the title following
         this labels
         TODO: Make it so that it doesn't read the whole file, iterating over each line individually
         """
-        with path.open() as file:
+        with self.source_path.open() as file:
             raw_text = file.read().strip()
 
         metadata_matches = []