Skip to content

Scrubbing and Fix Atomacos #211

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Jun 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,14 @@ cache

# db
*.db
*.db-journal

# VSCode
.VSCode
.vsCode

# Generated performance charts
performance

# Generated when adding editable dependencies in requirements.txt (-e)
src
src
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ source .venv/bin/activate
pip install wheel
pip install -r requirements.txt
pip install -e .
python -m spacy download en_core_web_trf
alembic upgrade head
pytest
```
Expand Down
Binary file added assets/test_scrub_image.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
70 changes: 69 additions & 1 deletion openadapt/config.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
"""Script containing configurations for the openadapt application.

Usage:

from openadapt import config
...
config.<setting>
...

"""

import multiprocessing
import os
import pathlib
Expand All @@ -13,12 +24,16 @@
"DB_ECHO": False,
"DB_FNAME": "openadapt.db",
"OPENAI_API_KEY": "<set your api key in .env>",
#"OPENAI_MODEL_NAME": "gpt-4",
# "OPENAI_MODEL_NAME": "gpt-4",
"OPENAI_MODEL_NAME": "gpt-3.5-turbo",
# may incur significant performance penalty
"RECORD_READ_ACTIVE_ELEMENT_STATE": False,
# TODO: remove?
"REPLAY_STRIP_ELEMENT_STATE": True,
# ACTION EVENT CONFIGURATIONS
"ACTION_TEXT_SEP": "-",
"ACTION_TEXT_NAME_PREFIX": "<",
"ACTION_TEXT_NAME_SUFFIX": ">"
}


Expand All @@ -38,8 +53,61 @@ def getenv_fallback(var_name):
ROOT_DIRPATH = pathlib.Path(__file__).parent.parent.resolve()
DB_FPATH = ROOT_DIRPATH / DB_FNAME
DB_URL = f"sqlite:///{DB_FPATH}"
DIRNAME_PERFORMANCE_PLOTS = "performance"

if multiprocessing.current_process().name == "MainProcess":
for key, val in locals().items():
if not key.startswith("_") and key.isupper():
logger.info(f"{key}={val}")


# SCRUBBING CONFIGURATIONS
SCRUB_ENABLED = True
SCRUB_CHAR = "*"
SCRUB_LANGUAGE = "en"
SCRUB_CONFIG_TRF = {
"nlp_engine_name": "spacy",
"models": [
{
"lang_code": "en",
"model_name": "en_core_web_trf"
}
],
}
DEFAULT_SCRUB_FILL_COLOR = (255,)
SCRUB_IGNORE_ENTITIES = [
# 'US_PASSPORT',
# 'US_DRIVER_LICENSE',
# 'CRYPTO',
# 'UK_NHS',
# 'PERSON',
# 'CREDIT_CARD',
# 'US_BANK_NUMBER',
# 'PHONE_NUMBER',
# 'US_ITIN',
# 'AU_ABN',
"DATE_TIME",
# 'NRP',
# 'SG_NRIC_FIN',
# 'AU_ACN',
# 'IP_ADDRESS',
# 'EMAIL_ADDRESS',
"URL",
# 'IBAN_CODE',
# 'AU_TFN',
# 'LOCATION',
# 'AU_MEDICARE',
# 'US_SSN',
# 'MEDICAL_LICENSE'
]
SCRUB_KEYS_HTML = [
"text",
"canonical_text",
"title",
"state",
"task_description",
"key_char",
"canonical_key_char",
"key_vk",
"children",
]
12 changes: 4 additions & 8 deletions openadapt/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import numpy as np
import sqlalchemy as sa

from openadapt import db, utils, window
from openadapt import config, db, utils, window


# https://groups.google.com/g/sqlalchemy/c/wlr7sShU6-k
Expand Down Expand Up @@ -133,9 +133,9 @@ def canonical_key(self):
)

def _text(self, canonical=False):
sep = self._text_sep
name_prefix = self._text_name_prefix
name_suffix = self._text_name_suffix
sep = config.ACTION_TEXT_SEP
name_prefix = config.ACTION_TEXT_NAME_PREFIX
name_suffix = config.ACTION_TEXT_NAME_SUFFIX
if canonical:
key_attr = self.canonical_key
key_name_attr = self.canonical_key_name
Expand Down Expand Up @@ -201,10 +201,6 @@ def __str__(self):
rval = " ".join(attrs)
return rval

_text_sep = "-"
_text_name_prefix = "<"
_text_name_suffix = ">"

@classmethod
def from_children(cls, children_dicts):
children = [
Expand Down
7 changes: 3 additions & 4 deletions openadapt/record.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import fire
import mss.tools

from openadapt import config, crud, utils, window
from openadapt import config, crud, scrub, utils, window


EVENT_TYPES = ("screen", "action", "window")
Expand Down Expand Up @@ -389,7 +389,7 @@ def read_window_events(
# File "...\env\lib\site-packages\loguru\_logger.py", line 1964, in _log
# for handler in core.handlers.values):
# RuntimeError: dictionary changed size during iteration
_window_data = dict(window_data)
_window_data = window_data
_window_data.pop("state")
logger.info(f"{_window_data=}")
if window_data != prev_window_data:
Expand Down Expand Up @@ -524,8 +524,7 @@ def record(
"""

utils.configure_logging(logger, LOG_LEVEL)

logger.info(f"{task_description=}")
logger.info(f"{scrub.scrub_text(task_description)=}")

recording = create_recording(task_description)
recording_timestamp = recording.timestamp
Expand Down
Loading