Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enable weather data #96

Merged
merged 27 commits into from
Sep 25, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
897e2b6
fix deployment issue in multi envs
zhawan Sep 22, 2020
79a9954
fix typo
zhawan Sep 22, 2020
09cd441
Merge remote-tracking branch 'upstream/v0.1' into v0.1
zhawan Sep 22, 2020
d9e117a
fix ~/.maro not exist issue in build
zhawan Sep 22, 2020
b94ff95
skip deploy when build
zhawan Sep 22, 2020
d0c94f4
update for comments
zhawan Sep 22, 2020
f371cf2
Merge remote-tracking branch 'upstream/v0.1' into v0.1
zhawan Sep 23, 2020
30c0b9c
Merge remote-tracking branch 'upstream/v0.1' into v0.1
zhawan Sep 23, 2020
1366871
temporarily disable weather info
zhawan Sep 23, 2020
afdb600
replace ecr with cim in setup.py
zhawan Sep 23, 2020
5f6dd92
replace ecr in manifest
zhawan Sep 23, 2020
92b31c0
remove weather check when read data
zhawan Sep 23, 2020
aa8567c
fix station id issue
zhawan Sep 23, 2020
829429c
Merge remote-tracking branch 'upstream/v0.1' into v0.1
zhawan Sep 23, 2020
12d59ac
fix format
zhawan Sep 23, 2020
b65ddad
Merge remote-tracking branch 'upstream/v0.1' into v0.1
zhawan Sep 23, 2020
e2d4d43
add TODO in comments
zhawan Sep 23, 2020
e728f13
Merge remote-tracking branch 'upstream/v0.1' into v0.1
zhawan Sep 23, 2020
d8f14b2
Merge remote-tracking branch 'upstream/v0.1' into v0.1
zhawan Sep 24, 2020
ed8b68b
Merge remote-tracking branch 'upstream/v0.1' into v0.1
zhawan Sep 24, 2020
8f53aee
Merge remote-tracking branch 'upstream/v0.1' into v0.1
zhawan Sep 24, 2020
5bee6ae
add noaa weather source
zhawan Sep 24, 2020
ee5badd
Merge remote-tracking branch 'upstream/v0.1' into v0.1
zhawan Sep 24, 2020
8b2d918
fix weather reset and weather comment
zhawan Sep 24, 2020
b33d755
add comment for weather data url
zhawan Sep 25, 2020
c60112a
Merge remote-tracking branch 'upstream/v0.1' into v0.1
zhawan Sep 25, 2020
6bf4243
some format update
zhawan Sep 25, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 71 additions & 6 deletions maro/cli/data_pipeline/citi_bike.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,8 +364,7 @@ class CitiBikeTopology(DataTopology):
def __init__(self, topology: str, trip_source: str, station_info: str, weather_source: str, is_temp: bool = False):
super().__init__()
self._data_pipeline["trip"] = CitiBikePipeline(topology, trip_source, station_info, is_temp)
# TODO: Weather data source changed, temporarily disable, will enable it later when new data source is available.
# self._data_pipeline["weather"] = WeatherPipeline(topology, weather_source, is_temp)
self._data_pipeline["weather"] = NOAAWeatherPipeline(topology, weather_source, is_temp)
self._is_temp = is_temp

def __del__(self):
Expand Down Expand Up @@ -586,8 +585,7 @@ def __init__(self, topology: str, config_path: str, is_temp: bool = False):
with open(config_path) as fp:
cfg = safe_load(fp)
self._data_pipeline["trip"] = CitiBikeToyPipeline(start_time=cfg["start_time"], end_time=cfg["end_time"], stations=cfg["stations"], trips=cfg["trips"], topology=topology, is_temp=is_temp)
# TODO: Weather data source changed, temporarily disable, will enable it later when new data source is available.
# self._data_pipeline["weather"] = WeatherToyPipeline(topology=topology, start_time=cfg["start_time"], end_time=cfg["end_time"], is_temp=is_temp)
self._data_pipeline["weather"] = WeatherToyPipeline(topology=topology, start_time=cfg["start_time"], end_time=cfg["end_time"], is_temp=is_temp)
else:
logger.warning(f"Config file {config_path} for toy topology {topology} not found.")

Expand Down Expand Up @@ -617,7 +615,74 @@ def __init__(self, is_temp: bool = False):
self._conf = safe_load(fp)
for topology in self._conf["trips"].keys():
if topology.startswith("toy"):
self.topologies[topology] = CitiBikeToyTopology(topology=topology, config_path=self._conf["trips"][topology]["toy_meta_path"], is_temp=is_temp)
self.topologies[topology] = CitiBikeToyTopology(topology=topology,
config_path=self._conf["trips"][topology]["toy_meta_path"],
is_temp=is_temp)
else:
self.topologies[topology] = CitiBikeTopology(topology=topology, trip_source=self._conf["trips"][topology]["trip_remote_url"],
station_info=self._conf["station_info"]["ny_station_info_url"], weather_source=self._conf["weather"]["ny_weather_url"], is_temp=is_temp)
station_info=self._conf["station_info"]["ny_station_info_url"],
weather_source=self._conf["weather"][topology]["noaa_weather_url"],
is_temp=is_temp)


class NOAAWeatherPipeline(WeatherPipeline):

def __init__(self, topology: str, source: str, is_temp: bool = False):
"""
Generate weather data bin for the specified topology from ncei.noaa.gov.
Generated files will be generated in ~/.maro/data/citi_bike/[topology]/_build.
Folder structure:
~/.maro
/data/citi_bike/[topology]
/_build bin data file
/source
/_download original data file
/_clean cleaned data file
/temp download temp file

Args:
topology(str): topology name of the data file
source(str): source url of original data file
is_temp(bool): (optional) if the data file is temporary
"""
super().__init__(topology, source, is_temp)

def clean(self):
super().clean()
if os.path.exists(self._download_file):
self._new_file_list.append(self._clean_file)
logger.info_green("Cleaning weather data")
self._preprocess(input_file=self._download_file, output_file=self._clean_file)
else:
logger.warning(f"Not found downloaded weather data: {self._download_file}")

def _weather(self, row):
water = row["PRCP"] if row["PRCP"] is not None else 0.0

snow = row["SNOW"] if row["SNOW"] is not None else 0.0

if snow > 0.0 and water > 0:
return WeatherPipeline.WeatherEnum.SLEET.value
elif water > 0.0:
return WeatherPipeline.WeatherEnum.RAINY.value
elif snow > 0.0:
return WeatherPipeline.WeatherEnum.SNOWY.value
else:
return WeatherPipeline.WeatherEnum.SUNNY.value

def _preprocess(self, input_file: str, output_file: str):
data: pd.DataFrame = pd.DataFrame()

with open(input_file, "rt") as fp:
org_data = pd.read_csv(fp)
org_data["PRCP"] = pd.to_numeric(org_data["PRCP"], errors="coerce", downcast="integer")
org_data["SNOW"] = pd.to_numeric(org_data["SNOW"], errors="coerce", downcast="integer")
org_data["TMAX"] = pd.to_numeric(org_data["TMAX"], errors="coerce", downcast="integer")
org_data["TMIN"] = pd.to_numeric(org_data["TMIN"], errors="coerce", downcast="integer")

data["date"] = org_data["DATE"]
data["weather"] = org_data.apply(self._weather, axis=1)
data["temp"] = (org_data["TMAX"] + org_data["TMIN"])/2

with open(output_file, mode="w", encoding="utf-8", newline="") as f:
data.to_csv(f, index=False, header=True)
24 changes: 10 additions & 14 deletions maro/simulator/scenarios/citi_bike/business_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,8 @@ def reset(self):

self._decision_strategy.reset()

self._last_date = None

def get_agent_idx_list(self) -> List[int]:
return [station.index for station in self._stations]

Expand Down Expand Up @@ -202,13 +204,10 @@ def _init(self):
if trip_data_path.startswith("~"):
trip_data_path = os.path.expanduser(trip_data_path)

# TODO: Weather data source changed, temporarily disable, will enable it later when new data source is available.
# if (not os.path.exists(weather_data_path)) or (not os.path.exists(trip_data_path)):
if not os.path.exists(trip_data_path):
if (not os.path.exists(weather_data_path)) or (not os.path.exists(trip_data_path)):
self._build_temp_data()

# TODO: Weather data source changed, temporarily disable, will enable it later when new data source is available.
# self._weather_lut = WeatherTable(self._conf["weather_data"], self._time_zone)
self._weather_lut = WeatherTable(self._conf["weather_data"], self._time_zone)

self._trip_reader = BinaryReader(self._conf["trip_data"])

Expand Down Expand Up @@ -305,8 +304,7 @@ def _update_station_extra_features(self, tick: int):

self._last_date = cur_datetime

# TODO: Weather data source changed, temporarily disable, will enable it later when new data source is available.
# weather_info = self._weather_lut[cur_datetime]
weather_info = self._weather_lut[cur_datetime]

weekday = cur_datetime.weekday()
holiday = cur_datetime in self._us_holidays
Expand All @@ -315,10 +313,9 @@ def _update_station_extra_features(self, tick: int):
weather = 0
temperature = 0

# TODO: Weather data source changed, temporarily disable, will enable it later when new data source is available.
# if weather_info is not None:
# weather = weather_info.weather
# temperature = weather_info.temp
if weather_info is not None:
weather = weather_info.weather
temperature = weather_info.temp

for station in self._stations:
station.weekday = weekday
Expand Down Expand Up @@ -471,9 +468,8 @@ def _build_temp_data(self):
self._citi_bike_data_pipeline.build()
build_folders = self._citi_bike_data_pipeline.get_build_folders()
trip_folder = build_folders["trip"]
# TODO: Weather data source changed, temporarily disable, will enable it later when new data source is available.
# weather_folder = build_folders["weather"]
# self._conf["weather_data"] = chagne_file_path(self._conf["weather_data"], weather_folder)
weather_folder = build_folders["weather"]
self._conf["weather_data"] = chagne_file_path(self._conf["weather_data"], weather_folder)
self._conf["trip_data"] = chagne_file_path(self._conf["trip_data"], trip_folder)
self._conf["stations_init_data"] = chagne_file_path(self._conf["stations_init_data"], trip_folder)
self._conf["distance_adj_data"] = chagne_file_path(self._conf["distance_adj_data"], trip_folder)
Expand Down
64 changes: 63 additions & 1 deletion maro/simulator/scenarios/citi_bike/meta/source_urls.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,68 @@ trips:

weather:
ny_weather_url: "http://www.frontierweather.com/historicaldataonly/KNYC_daily.txt"
# Use www.ncei.noaa.gov's daily-summaries data of "US NY NY CENTRAL PARK, NY US" station (USW00094728) to represent the weather conditions in New York City.
ny.201801:
noaa_weather_url: "https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&dataTypes=PRCP,SNOW,TMAX,TMIN,AWND&stations=USW00094728&startDate=2018-01-01&endDate=2018-02-01&boundingBox=40.78,-74.0,40.76,-73.7"
ny.201802:
noaa_weather_url: "https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&dataTypes=PRCP,SNOW,TMAX,TMIN,AWND&stations=USW00094728&startDate=2018-02-01&endDate=2018-03-01&boundingBox=40.78,-74.0,40.76,-73.7"
ny.201803:
noaa_weather_url: "https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&dataTypes=PRCP,SNOW,TMAX,TMIN,AWND&stations=USW00094728&startDate=2018-03-01&endDate=2018-04-01&boundingBox=40.78,-74.0,40.76,-73.7"
ny.201804:
noaa_weather_url: "https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&dataTypes=PRCP,SNOW,TMAX,TMIN,AWND&stations=USW00094728&startDate=2018-04-01&endDate=2018-05-01&boundingBox=40.78,-74.0,40.76,-73.7"
ny.201805:
noaa_weather_url: "https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&dataTypes=PRCP,SNOW,TMAX,TMIN,AWND&stations=USW00094728&startDate=2018-05-01&endDate=2018-06-01&boundingBox=40.78,-74.0,40.76,-73.7"
ny.201806:
noaa_weather_url: "https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&dataTypes=PRCP,SNOW,TMAX,TMIN,AWND&stations=USW00094728&startDate=2018-06-01&endDate=2018-07-01&boundingBox=40.78,-74.0,40.76,-73.7"
ny.201807:
noaa_weather_url: "https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&dataTypes=PRCP,SNOW,TMAX,TMIN,AWND&stations=USW00094728&startDate=2018-07-01&endDate=2018-08-01&boundingBox=40.78,-74.0,40.76,-73.7"
ny.201808:
noaa_weather_url: "https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&dataTypes=PRCP,SNOW,TMAX,TMIN,AWND&stations=USW00094728&startDate=2018-08-01&endDate=2018-09-01&boundingBox=40.78,-74.0,40.76,-73.7"
ny.201809:
noaa_weather_url: "https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&dataTypes=PRCP,SNOW,TMAX,TMIN,AWND&stations=USW00094728&startDate=2018-09-01&endDate=2018-10-01&boundingBox=40.78,-74.0,40.76,-73.7"
ny.201810:
noaa_weather_url: "https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&dataTypes=PRCP,SNOW,TMAX,TMIN,AWND&stations=USW00094728&startDate=2018-10-01&endDate=2018-11-01&boundingBox=40.78,-74.0,40.76,-73.7"
ny.201811:
noaa_weather_url: "https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&dataTypes=PRCP,SNOW,TMAX,TMIN,AWND&stations=USW00094728&startDate=2018-11-01&endDate=2018-12-01&boundingBox=40.78,-74.0,40.76,-73.7"
ny.201812:
noaa_weather_url: "https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&dataTypes=PRCP,SNOW,TMAX,TMIN,AWND&stations=USW00094728&startDate=2018-12-01&endDate=2019-01-01&boundingBox=40.78,-74.0,40.76,-73.7"
ny.201901:
noaa_weather_url: "https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&dataTypes=PRCP,SNOW,TMAX,TMIN,AWND&stations=USW00094728&startDate=2019-01-01&endDate=2019-02-01&boundingBox=40.78,-74.0,40.76,-73.7"
ny.201902:
noaa_weather_url: "https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&dataTypes=PRCP,SNOW,TMAX,TMIN,AWND&stations=USW00094728&startDate=2019-02-01&endDate=2019-03-01&boundingBox=40.78,-74.0,40.76,-73.7"
ny.201903:
noaa_weather_url: "https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&dataTypes=PRCP,SNOW,TMAX,TMIN,AWND&stations=USW00094728&startDate=2019-03-01&endDate=2019-04-01&boundingBox=40.78,-74.0,40.76,-73.7"
ny.201904:
noaa_weather_url: "https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&dataTypes=PRCP,SNOW,TMAX,TMIN,AWND&stations=USW00094728&startDate=2019-04-01&endDate=2019-05-01&boundingBox=40.78,-74.0,40.76,-73.7"
ny.201905:
noaa_weather_url: "https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&dataTypes=PRCP,SNOW,TMAX,TMIN,AWND&stations=USW00094728&startDate=2019-05-01&endDate=2019-06-01&boundingBox=40.78,-74.0,40.76,-73.7"
ny.201906:
noaa_weather_url: "https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&dataTypes=PRCP,SNOW,TMAX,TMIN,AWND&stations=USW00094728&startDate=2019-06-01&endDate=2019-07-01&boundingBox=40.78,-74.0,40.76,-73.7"
ny.201907:
noaa_weather_url: "https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&dataTypes=PRCP,SNOW,TMAX,TMIN,AWND&stations=USW00094728&startDate=2019-07-01&endDate=2019-08-01&boundingBox=40.78,-74.0,40.76,-73.7"
ny.201908:
noaa_weather_url: "https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&dataTypes=PRCP,SNOW,TMAX,TMIN,AWND&stations=USW00094728&startDate=2019-08-01&endDate=2019-09-01&boundingBox=40.78,-74.0,40.76,-73.7"
ny.201909:
noaa_weather_url: "https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&dataTypes=PRCP,SNOW,TMAX,TMIN,AWND&stations=USW00094728&startDate=2019-09-01&endDate=2019-10-01&boundingBox=40.78,-74.0,40.76,-73.7"
ny.201910:
noaa_weather_url: "https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&dataTypes=PRCP,SNOW,TMAX,TMIN,AWND&stations=USW00094728&startDate=2019-10-01&endDate=2019-11-01&boundingBox=40.78,-74.0,40.76,-73.7"
ny.201911:
noaa_weather_url: "https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&dataTypes=PRCP,SNOW,TMAX,TMIN,AWND&stations=USW00094728&startDate=2019-11-01&endDate=2019-12-01&boundingBox=40.78,-74.0,40.76,-73.7"
ny.201912:
noaa_weather_url: "https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&dataTypes=PRCP,SNOW,TMAX,TMIN,AWND&stations=USW00094728&startDate=2019-12-01&endDate=2020-01-01&boundingBox=40.78,-74.0,40.76,-73.7"
ny.202001:
noaa_weather_url: "https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&dataTypes=PRCP,SNOW,TMAX,TMIN,AWND&stations=USW00094728&startDate=2020-01-01&endDate=2020-02-01&boundingBox=40.78,-74.0,40.76,-73.7"
ny.202002:
noaa_weather_url: "https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&dataTypes=PRCP,SNOW,TMAX,TMIN,AWND&stations=USW00094728&startDate=2020-02-01&endDate=2020-03-01&boundingBox=40.78,-74.0,40.76,-73.7"
ny.202003:
noaa_weather_url: "https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&dataTypes=PRCP,SNOW,TMAX,TMIN,AWND&stations=USW00094728&startDate=2020-03-01&endDate=2020-04-01&boundingBox=40.78,-74.0,40.76,-73.7"
ny.202004:
noaa_weather_url: "https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&dataTypes=PRCP,SNOW,TMAX,TMIN,AWND&stations=USW00094728&startDate=2020-04-01&endDate=2020-05-01&boundingBox=40.78,-74.0,40.76,-73.7"
ny.202005:
noaa_weather_url: "https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&dataTypes=PRCP,SNOW,TMAX,TMIN,AWND&stations=USW00094728&startDate=2020-05-01&endDate=2020-06-01&boundingBox=40.78,-74.0,40.76,-73.7"
ny.202006:
noaa_weather_url: "https://www.ncei.noaa.gov/access/services/data/v1?dataset=daily-summaries&dataTypes=PRCP,SNOW,TMAX,TMIN,AWND&stations=USW00094728&startDate=2020-06-01&endDate=2020-07-01&boundingBox=40.78,-74.0,40.76,-73.7"


station_info:
ny_station_info_url: "https://gbfs.citibikenyc.com/gbfs/en/station_information.json"
ny_station_info_url: "https://gbfs.citibikenyc.com/gbfs/en/station_information.json"