Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 29 additions & 7 deletions augur/application/db/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import logging
import sqlalchemy as s
from sqlalchemy import func
from sqlalchemy.exc import DataError

Check warning on line 7 in augur/application/db/lib.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 W0611: Unused DataError imported from sqlalchemy.exc (unused-import) Raw Output: augur/application/db/lib.py:7:0: W0611: Unused DataError imported from sqlalchemy.exc (unused-import)
from sqlalchemy.dialects import postgresql
from sqlalchemy.exc import OperationalError
from psycopg2.errors import DeadlockDetected
Expand Down Expand Up @@ -167,7 +167,7 @@

try:
working_commits = fetchall_data_from_sql_text(query)
except:

Check warning on line 170 in augur/application/db/lib.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 W0702: No exception type(s) specified (bare-except) Raw Output: augur/application/db/lib.py:170:4: W0702: No exception type(s) specified (bare-except)
working_commits = []

return working_commits
Expand All @@ -183,7 +183,7 @@

try:
missing_commit_hashes = fetchall_data_from_sql_text(fetch_missing_hashes_sql)
except:

Check warning on line 186 in augur/application/db/lib.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 W0702: No exception type(s) specified (bare-except) Raw Output: augur/application/db/lib.py:186:4: W0702: No exception type(s) specified (bare-except)
missing_commit_hashes = []

return missing_commit_hashes
Expand All @@ -203,7 +203,7 @@
return session.query(CollectionStatus).filter(getattr(CollectionStatus,f"{collection_type}_status" ) == CollectionState.COLLECTING.value).count()


def facade_bulk_insert_commits(logger, records):

Check warning on line 206 in augur/application/db/lib.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 W0621: Redefining name 'logger' from outer scope (line 19) (redefined-outer-name) Raw Output: augur/application/db/lib.py:206:31: W0621: Redefining name 'logger' from outer scope (line 19) (redefined-outer-name)

with get_session() as session:

Expand All @@ -225,24 +225,46 @@

facade_bulk_insert_commits(logger, firsthalfRecords)
facade_bulk_insert_commits(logger, secondhalfRecords)
elif len(records) == 1 and isinstance(e,DataError) and "time zone displacement" in f"{e}":
elif len(records) == 1:
commit_record = records[0]
#replace incomprehensible dates with epoch.
#2021-10-11 11:57:46 -0500

# placeholder_date = "1970-01-01 00:00:15 -0500"
placeholder_date = commit_record['author_timestamp']
placeholder_date = commit_record['cmt_author_timestamp']

postgres_valid_timezones = {
-1200, -1100, -1000, -930, -900, -800, -700,
-600, -500, -400, -300, -230, -200, -100, 000,
100, 200, 300, 330, 400, 430, 500, 530, 545, 600,
630, 700, 800, 845, 900, 930, 1000, 1030, 1100, 1200,
1245, 1300, 1400
}

# Reconstruct timezone portion of the date string to UTC
placeholder_date = re.split("[-+]", placeholder_date)
placeholder_date.pop()
placeholder_date = "-".join(placeholder_date) + "+0000"
placeholder_date_segments = re.split(" ", placeholder_date)
tzdata = placeholder_date_segments.pop()

if ":" in tzdata:
tzdata = tzdata.replace(":", "")

if int(tzdata) not in postgres_valid_timezones:
tzdata = "+0000"
else:
raise e

placeholder_date_segments.append(tzdata)

placeholder_date = " ".join(placeholder_date_segments)

#Check for improper utc timezone offset
#UTC timezone offset should be between -14:00 and +14:00

commit_record['author_timestamp'] = placeholder_date
commit_record['committer_timestamp'] = placeholder_date
# analyzecommit.generate_commit_record() defines the keys on the commit_record dictionary
commit_record['cmt_author_timestamp'] = placeholder_date
commit_record['cmt_committer_timestamp'] = placeholder_date

logger.warning(f"commit with invalid timezone set to UTC: {commit_record['cmt_commit_hash']}")

session.execute(
s.insert(Commit),
Expand All @@ -253,7 +275,7 @@
raise e


def batch_insert_contributors(logger, data: Union[List[dict], dict]) -> Optional[List[dict]]:

Check warning on line 278 in augur/application/db/lib.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 W0621: Redefining name 'logger' from outer scope (line 19) (redefined-outer-name) Raw Output: augur/application/db/lib.py:278:30: W0621: Redefining name 'logger' from outer scope (line 19) (redefined-outer-name)

batch_size = 1000

Expand All @@ -264,7 +286,7 @@



def bulk_insert_dicts(logger, data: Union[List[dict], dict], table, natural_keys: List[str], return_columns: Optional[List[str]] = None, string_fields: Optional[List[str]] = None, on_conflict_update:bool = True) -> Optional[List[dict]]:

Check warning on line 289 in augur/application/db/lib.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 W0621: Redefining name 'logger' from outer scope (line 19) (redefined-outer-name) Raw Output: augur/application/db/lib.py:289:22: W0621: Redefining name 'logger' from outer scope (line 19) (redefined-outer-name)

if isinstance(data, list) is False:

Expand Down
Loading