Skip to content

Commit

Permalink
Merge pull request #14 from pablominue/pablo-develop
Browse files Browse the repository at this point in the history
Version 0.2.5. Updated Query Module to use properties
  • Loading branch information
pablominue authored Jul 7, 2024
2 parents 10764ae + 4201024 commit 61de1c4
Show file tree
Hide file tree
Showing 8 changed files with 157 additions and 39 deletions.
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ Allows to instantiate a SQL Connection to execute and fetch results (i.e., use t
- mysql
- pymssql
- pymysql
- pyodbc
- sqlalchemy
- trino
- trino

### Table Module

Allows to create tables on a SQL Database given a pandas DataFrame. Also contains the option to insert the data of the dataframe in the
new table by calling the insert module
Binary file added dist/pysqltools-0.2.5-py3-none-any.whl
Binary file not shown.
Binary file added dist/pysqltools-0.2.5.tar.gz
Binary file not shown.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "pysqltools"
version = "0.2.4"
version = "0.2.5"
description = "PySQLTools"
authors = ["Pablo Minué"]
license = "None"
Expand Down
137 changes: 110 additions & 27 deletions pysqltools/src/SQL/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,19 @@

import datetime
import re
from typing import Generator, Union
from typing import Any, Generator, Union

import sqlparse
from multimethod import multimethod

from pysqltools.src.SQL.exceptions import QueryFormattingError


class QueryException(Exception):
def __init__(self, *args: object) -> None:
super().__init__(*args)


class SQLString(str):
"""
String Class used to format queries without adding single quotes.
Expand Down Expand Up @@ -79,6 +84,12 @@ class Query:
The query module provides a Query class to work with Query objects, which will allow to modify the
SQL Queries on an easy way with the class methods, and easily access the sql string with the sql
attribute of the objects.
--------------------------
### Parameters
- sql: property. The string containing the SQL Query
- parsed: sqlparse object from the SQL Query
- options: via kwargs. Current options:
- indent_query (bool) default True: Re-indent the query for output
To add parameters to the query, use {{parameter}} on the SQL String.
Expand All @@ -98,56 +109,130 @@ class Query:
`query = Query(sql = sql).format(table_param = "MyTable")`
"""

def __init__(self, sql: str) -> None:
self.sql = sql.lower()
def __init__(self, sql: str, *args, **kwargs) -> None:
self._sql = sql.lower()
self.parsed = sqlparse.parse(sql)[0]
self.options = kwargs

@property
def sql(self):
"""
Contains the string with the SQL Statement. If the flag `indent_query` has been set on the
constructor, the sql will be returned with automatic indentation.
"""
if self.options.get("indent_query", True):
self._sql = str(
sqlparse.format(
self._sql,
keyword_case="lower",
id_case="lower",
indent_columns=True,
reindent=True,
)
)
return self._sql
else:
return self._sql

@sql.setter
def sql(self, sql: str):
self._sql = sql.lower()

@property
def ctes(self) -> Generator:
"""
returns a generator containing all the CTEs on the query
returns a generator containing all the CTEs on the query. The generator returns
the CTE identifier as first argument and the CTE Content as second argument.
"""
cte_regex = re.compile(
r"""(?i)\b(\w+)\s+as\s+\((.*?)\)(?=\s*,|\s*select|\s*insert|\s*update|\s*delete|\s*with|\Z)""",
re.DOTALL | re.IGNORECASE | re.MULTILINE,
)

matches = cte_regex.findall(self.sql)

self._ctes = []
for _, match in enumerate(matches, 1):
cte_name, cte_content = match
yield (cte_name, cte_content)
self._ctes.append((cte_name, cte_content))

yield from self._ctes

@ctes.setter
def ctes(self) -> None:
raise QueryException("ctes is read-only")

@property
def parameters(self) -> Generator:
"""returns a generator containing all the Parameters on the query.
Parameters must be between {{ }}"""
regex = re.compile(r"(?<={{)\S*(?=}})")
yield from regex.findall(self.sql)
self._parameters = regex.findall(self.sql)
yield from self._parameters

def __non_greedy_regex(self, keyword_start: str, keyword_end: str) -> Generator:
""""""
@parameters.setter
def parameters(self) -> None:
raise QueryException("parameters is read-only")

@property
def tables(self) -> Generator:
"""Returns a generator containing all the detected tables. CTEs identifiers excluded"""
regex = re.compile(
rf"(?<={keyword_start}).*?(?={keyword_end})",
r"(?<=from|join).*?\s*\S*",
re.DOTALL | re.IGNORECASE | re.MULTILINE,
)
yield from [i.strip() for i in regex.findall(self.sql)]
self._tables = [r.strip() for r in regex.findall(self.sql)]
self._tables = [
t for t in self._tables if t not in [c[0] for c in list(self.ctes)]
]
yield from self._tables

@tables.setter
def tables(self, *args: Any) -> None:
raise QueryException("tables is read-only")

@property
def selects(self) -> Generator:
"""returns a generator containing all the Select contents on the query"""
yield from [i.strip() for i in self.__non_greedy_regex("select", "from")]
self._selects = [i.strip() for i in self.__non_greedy_regex("select", "from")]
yield from self._selects

@selects.setter
def selects(self) -> None:
raise QueryException("selects is read-only")

@property
def windows(self) -> Generator:
"""returns a generator containing all the Window Functions on the query"""
yield from [i.strip() for i in self.__non_greedy_regex("over", r"\)")]
self._windows = [i.strip() for i in self.__non_greedy_regex("over", r"\)")]
yield from self._windows

def tables(self) -> Generator:
"""Returns a generator containing all the detected tables"""
@windows.setter
def windows(self) -> None:
raise QueryException("windows is read-only")

def __non_greedy_regex(self, keyword_start: str, keyword_end: str) -> Generator:
""""""
regex = re.compile(
r"(?<=from|join).*?\s*\S*",
rf"(?<={keyword_start}).*?(?={keyword_end})",
re.DOTALL | re.IGNORECASE | re.MULTILINE,
)
results = regex.findall(self.sql)
results = [r.strip() for r in results]
yield from results
yield from [i.strip() for i in regex.findall(self.sql)]

def iter_query_lines(self) -> Generator:
"""
Iterate the SQL Query line by line.
"""
yield from self.sql.split("\n")

def get_ctes_dict(self) -> Generator:
"""
Get the CTEs on a dictionary
"""
cte_dict = {}
for k, v in self.ctes:
cte_dict.update({k: v})
return cte_dict

def format(self, **kwargs) -> "Query":
"""
Expand All @@ -173,18 +258,16 @@ def get_cte_by_identifier(self, identifier: str) -> Union[None, str]:
"""
Pass the identifier of one of the query CTEs and get the string containing the content of the CTE.
"""
ctes = {i: c for i, c in self.ctes()}
if identifier in ctes:
return ctes.get(identifier)
if identifier in self.get_ctes_dict():
return self.get_ctes_dict().get(identifier)
else:
return None

def replace_cte(self, identifier: str, new_cte_content: str) -> "Query":
"""
Given a CTE identifier, change its content with a new string
"""
ctes = {i: c for i, c in self.ctes()}
if identifier in ctes:
if identifier in self.get_ctes_dict():
self.sql = self.sql.replace(
self.get_cte_by_identifier(identifier), new_cte_content
)
Expand All @@ -197,7 +280,7 @@ def __str__(self):

def __dict__(self):
return {
"tables": list(self.tables()),
"ctes": list(self.ctes),
"parameters": list(self.parameters()),
"tables": list(self.tables),
"ctes": self.get_ctes_dict(),
"parameters": list(self.parameters),
}
35 changes: 33 additions & 2 deletions pysqltools/src/SQL/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,30 @@
import sqlparse

from .constants import TYPE_MAPPING
from .insert import insert_pandas


class Table:
"""
Class to manipulate SQL Tables
"""

def __init__(self, table: str, schema: Union[str, None] = None) -> None:
self.table = table
if schema:
self.table = f"{schema}.{table}"

def create_from_df(self, df: pd.DataFrame) -> str:
def create_from_df(
self, df: pd.DataFrame, insert_data: bool = False, **insert_kwargs: Any
) -> str:
"""
Get the SQL statement to create a SQL table based on a Pandas DataFrame. If the insert_data argument is set to True,
you must pass the same arguments as to the `pysqltools.SQL.insert_pandas` as **kwargs. Example:
```python
table = Table(table = "myTable", schema = "dbo")
table.create_from_df(df, insert_data = True, connection = myConnection, batch_size = 10000)
```
"""
columns = dict(
zip(
df.dtypes.index.to_list(),
Expand All @@ -30,4 +45,20 @@ def create_from_df(self, df: pd.DataFrame) -> str:
for k, v in columns.items():
sql += f"{k} {v}, "
sql = sql[:-2] + " )"
return sqlparse.format(sql, encoding="utf-8")
if not insert_data:
return sqlparse.format(sql, encoding="utf-8")
if "batch_size" in insert_kwargs:
batch_size = insert_kwargs["batch_size"]
else:
batch_size = 1000
try:
insert_pandas(
df,
connection=insert_kwargs["connection"],
table=self.table,
batch_size=batch_size,
)
except TypeError:
raise TypeError(
"Please include the insert arguments into the create_table_from_df method"
)
2 changes: 1 addition & 1 deletion pysqltools/src/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@
Source code for the pysqltools package
"""

from pysqltools.src.SQL.insert import generate_insert_query
from pysqltools.src.SQL.insert import generate_insert_query, insert_pandas
from pysqltools.src.SQL.query import Query, SQLString
from pysqltools.src.SQL.table import Table
12 changes: 6 additions & 6 deletions tests/test_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def test_ctes():
with open("tests/queries/test_cte.sql", "r", encoding="utf-8") as f:
sql = f.read()
q = Query(sql=sql)
ctes = {cte[0]: cte[1] for cte in q.ctes()}
ctes = {cte[0]: cte[1] for cte in q.ctes}

assert len(ctes) == 2

Expand All @@ -21,7 +21,7 @@ def test_selects():
sql = f.read()
q = Query(sql=sql)

selects = {s for s in q.selects()}
selects = {s for s in q.selects}

assert len(selects) == 3

Expand All @@ -31,7 +31,7 @@ def test_windows():
sql = f.read()
q = Query(sql=sql)

windows = {w for w in q.windows()}
windows = {w for w in q.windows}
assert len(windows) == 1


Expand All @@ -40,8 +40,8 @@ def test_tables():
sql = f.read()
q = Query(sql=sql)

tables = [t for t in q.tables()]
assert len(tables) == 5
tables = [t for t in q.tables]
assert len(tables) == 3


def test_parameter():
Expand Down Expand Up @@ -79,7 +79,7 @@ def test_cte_replacement():
)
"""
q.replace_cte("test_2_cte", new_cte_content=new_cte)
assert new_cte in q.sql
assert "new_cte_value" in q.sql


def test_create_table_string():
Expand Down

0 comments on commit 61de1c4

Please sign in to comment.