Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Week 4 Submission #65

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/corise-dagster.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/inspectionProfiles/profiles_settings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

42 changes: 42 additions & 0 deletions week_1/project/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import csv

import requests

from dagster import asset


@asset
def cereals():
response = requests.get("https://docs.dagster.io/assets/cereal.csv")
lines = response.text.split("\n")
return [row for row in csv.DictReader(lines)]


@asset
def nabisco_cereals(cereals):
"""Cereals manufactured by Nabisco"""
return [row for row in cereals if row["mfr"] == "N"]


@asset
def cereal_protein_fractions(cereals):
"""
For each cereal, records its protein content as a fraction of its total mass.
"""
result = {}
for cereal in cereals:
total_grams = float(cereal["weight"]) * 28.35
result[cereal["name"]] = float(cereal["protein"]) / total_grams

return result


@asset
def highest_protein_nabisco_cereal(nabisco_cereals, cereal_protein_fractions):
"""
The name of the nabisco cereal that has the highest protein content.
"""
sorted_by_protein = sorted(
nabisco_cereals, key=lambda cereal: cereal_protein_fractions[cereal["name"]]
)
return sorted_by_protein[-1]["name"]
29 changes: 22 additions & 7 deletions week_1/project/week_1.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
# CoRise Dagster Course assignment #1

import csv
from datetime import datetime
from typing import List

from dagster import In, Nothing, Out, job, op, usable_as_dagster_type
from pydantic import BaseModel

from operator import attrgetter

# Create new Type 'Stock'
@usable_as_dagster_type(description="Stock data")
class Stock(BaseModel):
date: datetime
Expand All @@ -27,7 +31,7 @@ def from_list(cls, input_list: list):
low=float(input_list[5]),
)


# Create new Type 'Aggregation'
@usable_as_dagster_type(description="Aggregation of stock data")
class Aggregation(BaseModel):
date: datetime
Expand All @@ -50,16 +54,27 @@ def get_s3_data(context):
return output


@op
def process_data():
pass
@op (
ins={"StockList": In(dagster_type=List[Stock])},
out={"Aggregation": Out(dagster_type=Aggregation)},
description="get highest stock"
)
def process_data(StockList):
hi_stock : Stock = max(StockList, key=attrgetter("high"))
stock_agg = Aggregation(date=hi_stock.date, high=hi_stock.high)
return stock_agg


@op
def put_redis_data():
@op(
ins={"agg": In(dagster_type=Aggregation)},
tags={"kind": "redis"},
description="Save to Redis - pass for now",
)
def put_redis_data(agg: Aggregation):
pass


@job
def week_1_pipeline():
pass
s3_fetch = process_data(get_s3_data())
put_redis_data(s3_fetch)
65 changes: 65 additions & 0 deletions week_1/project/week_1_review.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import csv
from datetime import datetime
from typing import List

from dagster import In, Nothing, Out, job, op, usable_as_dagster_type
from pydantic import BaseModel


@usable_as_dagster_type(description="Stock data")
class Stock(BaseModel):
date: datetime
close: float
volume: int
open: float
high: float
low: float

@classmethod
def from_list(cls, input_list: list):
"""Do not worry about this class method for now"""
return cls(
date=datetime.strptime(input_list[0], "%Y/%m/%d"),
close=float(input_list[1]),
volume=int(float(input_list[2])),
open=float(input_list[3]),
high=float(input_list[4]),
low=float(input_list[5]),
)


@usable_as_dagster_type(description="Aggregation of stock data")
class Aggregation(BaseModel):
date: datetime
high: float


@op(
config_schema={"s3_key": str},
out={"stocks": Out(dagster_type=List[Stock])},
tags={"kind": "s3"},
description="Get a list of stocks from an S3 file",
)
def get_s3_data(context):
output = list()
with open(context.op_config["s3_key"]) as csvfile:
reader = csv.reader(csvfile)
for row in reader:
stock = Stock.from_list(row)
output.append(stock)
return output


@op
def process_data():
pass


@op
def put_redis_data():
pass


@job
def week_1_pipeline():
pass
46 changes: 34 additions & 12 deletions week_2/dagster_ucr/project/week_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,48 @@
from dagster_ucr.resources import mock_s3_resource, redis_resource, s3_resource


@op
def get_s3_data():
pass
@op(
config_schema={"s3_key": str},
out={"the_stocks": Out(dagster_type=List[Stock])},
required_resource_keys={'s3'},
tags={"kind": "s3"},
description="List of Stocks",
)
def get_s3_data(context):
stocklist = list()
s3_key = context.op_config["s3_key"]
for row in context.resources.s3.get_data(s3_key):
stock = Stock.from_list(row)
stocklist.append(stock)
return stocklist


@op
def process_data():
# Use your op from week 1
pass
@op(
description="Return Aggregation from stock list with the greatest `high` value",
ins={"the_stocks": In(dagster_type=List[Stock])},
out={"Aggregation": Out(Aggregation)},
)
def process_data(the_stocks: List[Stock]):
aggregation = max(the_stocks, key=lambda stock: stock.high)
return Aggregation(date=aggregation.date, high=aggregation.high)


@op
def put_redis_data():
pass
@op(
description="Upload to Redis",
ins={"aggregation": In(dagster_type=Aggregation)},
out=Out(Nothing),
required_resource_keys={"redis"},
tags={"kind": "redis"},
)
def put_redis_data(context, aggregation) -> Nothing:
context.resources.redis.put_data("agg_data", str(aggregation))


@graph
def week_2_pipeline():
# Use your graph from week 1
pass
stocks = get_s3_data()
stock_agg = process_data(stocks)
put_redis_data(stock_agg)


local = {
Expand Down
39 changes: 30 additions & 9 deletions week_2/dagster_ucr/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def put_data(self, name: str, value: str):


# Resources

@resource(
config_schema={
"host": Field(String),
Expand All @@ -77,7 +78,7 @@ def postgres_resource(context) -> Postgres:
)


@resource
@resource()
def mock_s3_resource():
stocks = [
["2020/09/01", "10.0", "10", "10.0", "10.0", "10.0"],
Expand All @@ -91,13 +92,33 @@ def mock_s3_resource():
return s3_mock


@resource
def s3_resource():
"""This resource defines a S3 client"""
pass
@resource(
config_schema={
"bucket": Field(String),
"access_key": Field(String),
"secret_key": Field(String),
"endpoint_url": Field(String),
},
description="S3 resource",
)
def s3_resource(context):
return S3(
bucket=context.resource_config['bucket'],
access_key=context.resource_config['access_key'],
secret_key=context.resource_config['secret_key'],
endpoint_url=context.resource_config['endpoint_url']
)


@resource(
config_schema={
"host": Field(String),
"port": Field(Int),
},
)
def redis_resource(context):
return Redis(
host=context.resource_config['host'],
port=context.resource_config['port']
)

@resource
def redis_resource():
"""This resource defines a Redis client"""
pass
5 changes: 2 additions & 3 deletions week_3/project/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,17 @@
from project.week_3 import (
docker_week_3_pipeline,
docker_week_3_schedule,
docker_week_3_sensor,
local_week_3_pipeline,
local_week_3_schedule,
docker_week_3_sensor
)


@repository
def repo():
return [
docker_week_3_pipeline,
local_week_3_pipeline,
local_week_3_schedule,
docker_week_3_schedule,
docker_week_3_sensor,
docker_week_3_sensor
]
5 changes: 5 additions & 0 deletions week_3/project/sandbox.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
def return_list(key):
list = {"resources": {"s3": {"config": {"bucket": "dagster","access_key": "test","secret_key": "test","endpoint_url": "http://host.docker.internal:4566",}},"redis": {"config": {"host": "redis","port": 6379,}},},"ops": {"get_s3_data": {"config": {"s3_key": key}}},}
return list

print(return_list('fraser'))
Loading