Skip to content

Commit

Permalink
feat: Add support for saving poems to MySQL
Browse files Browse the repository at this point in the history
  • Loading branch information
palp1tate committed May 24, 2024
1 parent 05bd111 commit 8fd5aa2
Show file tree
Hide file tree
Showing 7 changed files with 191 additions and 0 deletions.
6 changes: 6 additions & 0 deletions config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
mysql:
host: 127.0.0.1
port: 3306
database: gushiwen
user: root
password: 123456
24 changes: 24 additions & 0 deletions model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from sqlalchemy import (
Column,
String,
BigInteger,
DateTime,
)
from sqlalchemy.dialects.mysql import LONGTEXT
from sqlalchemy.ext.declarative import declarative_base

Base = declarative_base()


class Poem(Base):
__tablename__ = "poem"
id = Column(BigInteger, primary_key=True, autoincrement=True)
name = Column(String(256))
author = Column(String(256))
dynasty = Column(String(256))
content = Column(LONGTEXT)
trans = Column(LONGTEXT)
annotation = Column(LONGTEXT)
appreciation = Column(LONGTEXT)
background = Column(LONGTEXT)
created_at = Column(DateTime)
38 changes: 38 additions & 0 deletions poem.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
Navicat Premium Data Transfer
Source Server : Docker
Source Server Type : MySQL
Source Server Version : 80300 (8.3.0)
Source Host : 127.0.0.1:3306
Source Schema : gushiwen
Target Server Type : MySQL
Target Server Version : 80300 (8.3.0)
File Encoding : 65001
Date: 24/05/2024 09:34:09
*/

SET NAMES utf8mb4;
SET FOREIGN_KEY_CHECKS = 0;

-- ----------------------------
-- Table structure for poem
-- ----------------------------
DROP TABLE IF EXISTS `poem`;
CREATE TABLE `poem` (
`id` bigint UNSIGNED NOT NULL AUTO_INCREMENT,
`name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL,
`author` varchar(256) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL DEFAULT NULL,
`dynasty` varchar(256) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL DEFAULT NULL,
`content` longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL,
`trans` longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL,
`annotation` longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL,
`appreciation` longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL,
`background` longtext CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NULL,
`created_at` datetime NOT NULL,
PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb4 COLLATE = utf8mb4_unicode_ci ROW_FORMAT = Dynamic;

SET FOREIGN_KEY_CHECKS = 1;
3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
beautifulsoup4==4.11.1
requests==2.32.2
PyYAML==6.0.1
SQLAlchemy==2.0.30
mysql-connector-python==8.4.0
51 changes: 51 additions & 0 deletions shige_db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from datetime import datetime

from sqlalchemy.orm import sessionmaker

from model import Poem
from shige import fetch_html, extract_poem_urls, fetch_poem_details
from utils import init_engine

if __name__ == "__main__":
try:
engine = init_engine()
if engine is None:
print("Failed to initialize the engine.")
exit(1)

url = input(
"Please enter the URL(example:https://so.gushiwen.cn/gushi/tangshi.aspx): "
)
poem_urls = []
html_content = fetch_html(url)
if html_content:
poem_urls.extend(extract_poem_urls(html_content))
else:
print("Failed to fetch or parse HTML content.")
exit(1)

for url in poem_urls:
details = fetch_poem_details(url)
new_session = sessionmaker(engine)
with new_session() as session:
try:
poem = Poem(
name=details["name"],
author=details["author"],
dynasty=details["dynasty"],
content=details["content"],
trans=details["trans"],
annotation=details["annotation"],
appreciation=details["appreciation"],
background=details["background"],
created_at=datetime.now(),
)
session.add(poem)
session.commit()
print(f"Saved details for poem: {details['name']}")
except Exception as e:
session.rollback()
print(f"An error occurred while saving the poem: {e}")
print("All poems saved successfully.")
except Exception as e:
print(f"An error occurred: {e}")
42 changes: 42 additions & 0 deletions single_shige_db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from datetime import datetime

from sqlalchemy.orm import sessionmaker

from model import Poem
from shige import fetch_poem_details
from utils import init_engine

if __name__ == "__main__":
try:
engine = init_engine()
if engine is None:
print("Failed to initialize the engine.")
exit(1)

url = input(
"Please enter the single poem URL(example:https://so.gushiwen.cn/shiwenv_45c396367f59.aspx): "
)
details = fetch_poem_details(url)
new_session = sessionmaker(engine)
with new_session() as session:
try:
poem = Poem(
name=details["name"],
author=details["author"],
dynasty=details["dynasty"],
content=details["content"],
trans=details["trans"],
annotation=details["annotation"],
appreciation=details["appreciation"],
background=details["background"],
created_at=datetime.now(),
)
session.add(poem)
session.commit()
print(f"Saved details for poem: {details['name']}")
except Exception as e:
session.rollback()
print(f"An error occurred while saving the poem: {e}")
print("Poem saved successfully.")
except Exception as e:
print(f"An error occurred: {e}")
27 changes: 27 additions & 0 deletions utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import yaml
from sqlalchemy import create_engine


def load_configuration(file_path: str) -> dict:
try:
with open(file_path, "r") as f:
return yaml.safe_load(f)
except Exception as exc:
raise Exception(f"Failed to load configuration from {file_path}: {exc}")


def init_engine():
try:
conf = load_configuration("./config.yaml")
mysql_conf = conf["mysql"]

dsn = (
f"mysql+mysqlconnector://{mysql_conf['user']}:{mysql_conf['password']}@"
f"{mysql_conf['host']}:{mysql_conf['port']}/{mysql_conf['database']}"
)

engine = create_engine(dsn, pool_recycle=3600, future=True)
return engine
except Exception as e:
print(f"Error occurred while initializing the engine: {e}")
return None

0 comments on commit 8fd5aa2

Please sign in to comment.