-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreate_tables.py
107 lines (83 loc) · 3.55 KB
/
create_tables.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# coding: utf-8
# create_tables.py
import os
import mysql.connector
from sqlalchemy import create_engine
from sqlalchemy.orm import relationship, sessionmaker
from sqlalchemy import Column, ForeignKey, Integer, String, Text, BigInteger
from sqlalchemy.ext.declarative import declarative_base
import db_info
class TableCreator:
def __init__(self, dbname):
user = os.environ["MYSQL_USERNAME"]
password = os.environ["MYSQL_PASSWORD"]
host = 'reading.cjnyfwqsuidc.us-west-1.rds.amazonaws.com'
self.table = dict()
try:
self.engine = create_engine('mysql+mysqlconnector://%s:%s@%s:3306/%s' % (user, password, host, dbname),
echo=False)
DBSession = sessionmaker(bind=self.engine)
self.session = DBSession()
except:
raise Exception
def __del__(self):
# print self
try:
self.session.close()
except AttributeError:
print "Wrong in __del__: the instance has no attribute 'session'"
def dbcommit(self):
self.session.commit()
def createindextables(self):
# establish the database schema
# self means it is instance method, not class method or static method
Base = declarative_base()
class Urllist(Base):
__tablename__ = 'urllist'
id = Column(BigInteger, primary_key=True, autoincrement=False)
# for primary_key, unique=True and nullable=False are implicated
# for primary_key, autoincrement=True by default
# If no other index, clustered index is created automatically for primary key
# using hash(title) as id can avoid repetitive articles
url = Column(String(256), nullable=False)
# maximum of 256 characters, varchar(256)
title = Column(String(256))
# String(80)
content = Column(Text)
# varchar(10240), varchar(20000)
self.table['urllist'] = Urllist
# print 2
class Wordlist(Base):
__tablename__ = 'wordlist'
word = Column(String(20), nullable=False)
id = Column(Integer, primary_key=True)
self.table['wordlist'] = Wordlist
class Wordlocation(Base):
__tablename__ = 'wordlocation'
wordid = Column(Integer, ForeignKey('wordlist.id'))
word = relationship(Wordlist)
urlid = Column(BigInteger, ForeignKey('urllist.id'), nullable=False)
url = relationship(Urllist)
location = Column(Integer)
id = Column(Integer, primary_key=True)
self.table['wordlocation'] = Wordlocation
class Link(Base):
__tablename__ = 'link'
fromid = Column(BigInteger, ForeignKey('urllist.id'), nullable=False)
urlfrom = relationship(Urllist)
toid = Column(BigInteger, ForeignKey('urllist.id'), nullable=False)
urlto = relationship(Urllist)
id = Column(Integer, primary_key=True)
self.table['link'] = Link
class Linkword(Base):
__tablename__ = 'linkword'
linkid = Column(Integer, ForeignKey('link.id'), nullable=False)
link = relationship(Link)
wordid = Column(Integer, ForeignKey('wordlist.id'), nullable=False)
word = relationship(Wordlist)
id = Column(Integer, primary_key=True)
self.table['linkword'] = Linkword
# print 3
# print self.engine
Base.metadata.create_all(self.engine)
# print 4