-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
11 changed files
with
98 additions
and
30 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
class Extractor(object): | ||
""" | ||
Base class for email extractors. Subclass this class to make new extractors to extract new types of information. Each extractor should be responsible for extracting different pieces of information (no stable conflict resolution mechanism as of yet). | ||
""" | ||
|
||
def extract(): | ||
""" | ||
Examine email content and metadata to extract information. | ||
Argument: a dict with email metadata and raw email content. | ||
Returns: a new dict with only the extracted data. | ||
""" | ||
|
||
raise NotImplementedError |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
from . import * | ||
|
||
extractors = [] |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
import time | ||
|
||
from sqs_helpers import get_notifications, process_notifications | ||
from pipeline import process_notification | ||
|
||
MAX_NOTIFICATION_BATCH_SIZE=1 | ||
|
||
if __name__ == '__main__': | ||
while True: | ||
newest_notifications = get_notifications(MAX_NOTIFICATION_BATCH_SIZE) | ||
if len(newest_notifications) > 0: | ||
process_notifications(newest_notifications, process_notification) | ||
time.sleep(20) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
from contextlib import contextmanager | ||
|
||
from pymongo import MongoClient | ||
from bson.objectid import ObjectId | ||
|
||
client = MongoClient(os.environ.get('MONGO_URI')) | ||
db = client[os.environ.get('MONGO_URI').split('/')[-1]] | ||
users = db.users | ||
raw_data = db.raw_data | ||
processed_data = db.processed_data | ||
|
||
@contextmanager | ||
def get_raw_email(object_id): | ||
raw_email = raw_data.find_one({'_id':ObjectId(objectId)}) | ||
if processed_data.find_one({'email_id':raw_email['id']}): | ||
yield None | ||
return | ||
del raw_email['_id'] | ||
try: | ||
yield raw_email | ||
raw_data.remove({'_id':ObjectId(objectId)}) | ||
except Exception as e: | ||
raise e | ||
|
||
def store_processed_data(processed_data_dict): | ||
processed_data.insert(processed_data_dict) |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
from mongo import get_raw_email, store_processed_email | ||
from extractors import extractors | ||
|
||
def process_email(raw_email): | ||
results = {} | ||
results['email_id'] = raw_email['id'] | ||
for extractor in extractors: | ||
results.update(extractor.extract(raw_email)) | ||
|
||
def process_notification(notification): | ||
object_id = notification['object_id'] | ||
with get_raw_email(object_id) as raw_email: | ||
if raw_email is None: | ||
return | ||
processed_email = process_email(raw_email) | ||
store_processed_email(processed_email) | ||
return processed_email |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
argparse==1.2.1 | ||
boto==2.32.1 | ||
pymongo==2.7.2 | ||
wsgiref==0.1.2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,6 @@ | ||
from pymongo import MongoClient | ||
import boto, os | ||
from boto.sqs.jsonmessage import JSONMessage | ||
|
||
client = MongoClient(os.environ.get('MONGO_URI')) | ||
db = client[os.environ.get('MONGO_URI').split('/')[-1]] | ||
users = db.users | ||
raw_data = db.raw_data | ||
processed_data = db.processed_data | ||
|
||
sqs_conn = boto.sqs.connect_to_region("us-east-1") | ||
importer_queue = sqs_conn.get_queue(os.environ.get('SQS_QUEUE')) | ||
importer_queue.set_message_class(JSONMessage) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
from sqs_conn import * | ||
|
||
def send_to_queue(dict): | ||
notification = JSONMessage() | ||
notification.update(dict) | ||
importer_queue.write(notification) | ||
|
||
def get_notifications(num): | ||
return importer_queue.get_notifications(num_notifications=n, wait_time_seconds=20) | ||
|
||
def delete_notification(notification): | ||
importer_queue.delete_notification(notification) | ||
|
||
def delete_notifications(list_of_notifications): | ||
importer_queue.delete_notification_batch(list_of_notifications) | ||
|
||
def process_notifications(list_of_notifications, func): | ||
for notification in list_of_notifications: | ||
func(notification) | ||
delete_notification(notification) |