diff --git a/.gitignore b/.gitignore index e8ae93323090d..fa9fe8f69d2b6 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,7 @@ /src/thirdparty/catapult/ /src/thirdparty/flatbuffers/ /src/thirdparty/parquet-cpp +/thirdparty/pkg/ # Files generated by flatc should be ignored /src/common/format/*.py @@ -143,3 +144,6 @@ build # Pytest Cache **/.pytest_cache + +# Vscode +.vscode/ diff --git a/doc/source/conf.py b/doc/source/conf.py index 6accfe56b6cea..4c8bf3d20a923 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -32,7 +32,6 @@ "tensorflow.python", "tensorflow.python.client", "tensorflow.python.util", - "smart_open", "ray.local_scheduler", "ray.plasma", "ray.core.generated.TaskInfo", diff --git a/doc/source/policy-optimizers.rst b/doc/source/policy-optimizers.rst index 3a3c60bf22667..8753c2932f953 100644 --- a/doc/source/policy-optimizers.rst +++ b/doc/source/policy-optimizers.rst @@ -20,6 +20,8 @@ Example of constructing and using a policy optimizer `(link to full example) `__. + Here are the steps for using a RLlib policy optimizer with an existing algorithm. 1. Implement the `Policy evaluator interface `__. diff --git a/doc/source/rllib.rst b/doc/source/rllib.rst index a9bee7daf7b8e..3a5b1109c94eb 100644 --- a/doc/source/rllib.rst +++ b/doc/source/rllib.rst @@ -7,19 +7,19 @@ You can find the code for RLlib `here on GitHub `__ which - is a proximal variant of `TRPO `__. +- Proximal Policy Optimization (`PPO `__) which is a proximal variant of `TRPO `__. -- `The Asynchronous Advantage Actor-Critic (A3C) `__. +- Policy Gradients (`PG `__). -- `Deep Q Networks (DQN) `__. +- Asynchronous Advantage Actor-Critic (`A3C `__). -- `Ape-X Distributed Prioritized Experience Replay `__. +- Deep Q Networks (`DQN `__). -- Evolution Strategies, as described in `this - paper `__. Our implementation - is adapted from - `here `__. +- Deep Deterministic Policy Gradients (`DDPG `__, `DDPG2 `__). + +- Ape-X Distributed Prioritized Experience Replay, including both `DQN `__ and `DDPG `__ variants. + +- Evolution Strategies (`ES `__), as described in `this paper `__. These algorithms can be run on any `OpenAI Gym MDP `__, including custom ones written and registered by the user. @@ -76,7 +76,7 @@ The ``train.py`` script has a number of options you can show by running The most important options are for choosing the environment with ``--env`` (any OpenAI gym environment including ones registered by the user can be used) and for choosing the algorithm with ``--run`` -(available options are ``PPO``, ``A3C``, ``ES``, ``DQN`` and ``APEX``). +(available options are ``PPO``, ``PG``, ``A3C``, ``ES``, ``DDPG``, ``DDPG2``, ``DQN``, ``APEX``, and ``APEX_DDPG2``). Specifying Parameters ~~~~~~~~~~~~~~~~~~~~~ @@ -84,10 +84,14 @@ Specifying Parameters Each algorithm has specific hyperparameters that can be set with ``--config`` - see the ``DEFAULT_CONFIG`` variable in `PPO `__, +`PG `__, `A3C `__, `ES `__, -`DQN `__ and -`APEX `__. +`DQN `__, +`DDPG `__, +`DDPG2 `__, +`APEX `__, and +`APEX_DDPG2 `__. In an example below, we train A3C by specifying 8 workers through the config flag. function that creates the env to refer to it by name. The contents of the env_config agent config field will be passed to that function to allow the environment to be configured. The return type should be an OpenAI gym.Env. For example: diff --git a/docker/examples/Dockerfile b/docker/examples/Dockerfile index 72a4314da3d64..382da881b5517 100644 --- a/docker/examples/Dockerfile +++ b/docker/examples/Dockerfile @@ -3,6 +3,6 @@ FROM ray-project/deploy RUN conda install -y -c conda-forge tensorflow RUN apt-get install -y zlib1g-dev -RUN pip install gym[atari] opencv-python==3.2.0.8 smart_open +RUN pip install gym[atari] opencv-python==3.2.0.8 RUN pip install --upgrade git+git://github.com/hyperopt/hyperopt.git # RUN conda install -y -q pytorch torchvision -c soumith diff --git a/python/build-wheel-macos.sh b/python/build-wheel-macos.sh index 7559fd02cd016..8a56c59f85324 100755 --- a/python/build-wheel-macos.sh +++ b/python/build-wheel-macos.sh @@ -53,6 +53,8 @@ for ((i=0; i<${#PY_VERSIONS[@]}; ++i)); do popd pushd python + # Setuptools on CentOS is too old to install arrow 0.9.0, therefore we upgrade. + $PIP_CMD install --upgrade setuptools # Install setuptools_scm because otherwise when building the wheel for # Python 3.6, we see an error. $PIP_CMD install -q setuptools_scm diff --git a/python/ray/autoscaler/autoscaler.py b/python/ray/autoscaler/autoscaler.py index 2e2f9593c8920..d5a5336f41b8b 100644 --- a/python/ray/autoscaler/autoscaler.py +++ b/python/ray/autoscaler/autoscaler.py @@ -224,7 +224,7 @@ def __init__(self, max_concurrent_launches=AUTOSCALER_MAX_CONCURRENT_LAUNCHES, max_failures=AUTOSCALER_MAX_NUM_FAILURES, process_runner=subprocess, - verbose_updates=False, + verbose_updates=True, node_updater_cls=NodeUpdaterProcess, update_interval_s=AUTOSCALER_UPDATE_INTERVAL_S): self.config_path = config_path diff --git a/python/ray/autoscaler/aws/development-example.yaml b/python/ray/autoscaler/aws/development-example.yaml index 67b15352522a1..273d1d7d94593 100644 --- a/python/ray/autoscaler/aws/development-example.yaml +++ b/python/ray/autoscaler/aws/development-example.yaml @@ -94,7 +94,7 @@ setup_commands: - echo 'export PATH="$HOME/anaconda3/bin:$PATH"' >> ~/.bashrc # Build Ray. - git clone https://github.com/ray-project/ray || true - - pip install boto3==1.4.8 + - pip install boto3==1.4.8 cython==0.27.3 - cd ray/python; pip install -e . --verbose # Custom commands that will be run on the head node after common setup. diff --git a/python/ray/dataframe/__init__.py b/python/ray/dataframe/__init__.py index 5081df3d3f636..7eea37f99f2af 100644 --- a/python/ray/dataframe/__init__.py +++ b/python/ray/dataframe/__init__.py @@ -3,7 +3,7 @@ from __future__ import print_function import pandas as pd -from pandas import eval +from pandas import (eval, Panel, date_range, MultiIndex) import threading pd_version = pd.__version__ @@ -30,11 +30,15 @@ def get_npartitions(): # because they depend on npartitions. from .dataframe import DataFrame # noqa: 402 from .series import Series # noqa: 402 -from .io import (read_csv, read_parquet) # noqa: 402 -from .concat import concat # noqa: 402 +from .io import (read_csv, read_parquet, read_json, read_html, # noqa: 402 + read_clipboard, read_excel, read_hdf, read_feather, # noqa: 402 + read_msgpack, read_stata, read_sas, read_pickle, # noqa: 402 + read_sql) # noqa: 402 +from .concat import concat # noqa: 402 __all__ = [ - "DataFrame", "Series", "read_csv", "read_parquet", "concat", "eval" + "DataFrame", "Series", "read_csv", "read_parquet", "concat", "eval", + "Panel", "date_range", "MultiIndex" ] try: diff --git a/python/ray/dataframe/concat.py b/python/ray/dataframe/concat.py index 3271bdb7f286b..952e326edc1ff 100644 --- a/python/ray/dataframe/concat.py +++ b/python/ray/dataframe/concat.py @@ -1,90 +1,133 @@ -import pandas as pd +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import pandas import numpy as np -from .dataframe import DataFrame as rdf -from .utils import ( - from_pandas, - _deploy_func) -from functools import reduce +from .dataframe import DataFrame +from .utils import _reindex_helper def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, keys=None, levels=None, names=None, verify_integrity=False, copy=True): - def _concat(frame1, frame2): - # Check type on objects - # Case 1: Both are Pandas DF - if isinstance(frame1, pd.DataFrame) and \ - isinstance(frame2, pd.DataFrame): - - return pd.concat((frame1, frame2), axis, join, join_axes, + if keys is not None: + objs = [objs[k] for k in keys] + else: + objs = list(objs) + + if len(objs) == 0: + raise ValueError("No objects to concatenate") + + objs = [obj for obj in objs if obj is not None] + + if len(objs) == 0: + raise ValueError("All objects passed were None") + + try: + type_check = next(obj for obj in objs + if not isinstance(obj, (pandas.Series, + pandas.DataFrame, + DataFrame))) + except StopIteration: + type_check = None + if type_check is not None: + raise ValueError("cannot concatenate object of type \"{0}\"; only " + "pandas.Series, pandas.DataFrame, " + "and ray.dataframe.DataFrame objs are " + "valid", type(type_check)) + + all_series = all([isinstance(obj, pandas.Series) + for obj in objs]) + if all_series: + return pandas.concat(objs, axis, join, join_axes, ignore_index, keys, levels, names, verify_integrity, copy) - if not (isinstance(frame1, rdf) and - isinstance(frame2, rdf)) and join == 'inner': - raise NotImplementedError( - "Obj as dicts not implemented. To contribute to " - "Pandas on Ray, please visit github.com/ray-project/ray." - ) - - # Case 2: Both are different types - if isinstance(frame1, pd.DataFrame): - frame1 = from_pandas(frame1, len(frame1) / 2**16 + 1) - if isinstance(frame2, pd.DataFrame): - frame2 = from_pandas(frame2, len(frame2) / 2**16 + 1) - - # Case 3: Both are Ray DF - if isinstance(frame1, rdf) and \ - isinstance(frame2, rdf): - - new_columns = frame1.columns.join(frame2.columns, how=join) - - def _reindex_helper(pdf, old_columns, join): - pdf.columns = old_columns - if join == 'outer': - pdf = pdf.reindex(columns=new_columns) - else: - pdf = pdf[new_columns] - pdf.columns = pd.RangeIndex(len(new_columns)) - - return pdf - - f1_columns, f2_columns = frame1.columns, frame2.columns - new_f1 = [_deploy_func.remote(lambda p: _reindex_helper(p, - f1_columns, join), part) for - part in frame1._row_partitions] - new_f2 = [_deploy_func.remote(lambda p: _reindex_helper(p, - f2_columns, join), part) for - part in frame2._row_partitions] - - return rdf(row_partitions=new_f1 + new_f2, columns=new_columns, - index=frame1.index.append(frame2.index)) - - # (TODO) Group all the pandas dataframes - if isinstance(objs, dict): raise NotImplementedError( "Obj as dicts not implemented. To contribute to " - "Pandas on Ray, please visit github.com/ray-project/ray." - ) + "Pandas on Ray, please visit github.com/ray-project/ray.") - axis = pd.DataFrame()._get_axis_number(axis) - if axis == 1: - raise NotImplementedError( - "Concat not implemented for axis=1. To contribute to " - "Pandas on Ray, please visit github.com/ray-project/ray." - ) - - all_pd = np.all([isinstance(obj, pd.DataFrame) for obj in objs]) - if all_pd: - result = pd.concat(objs, axis, join, join_axes, - ignore_index, keys, levels, names, - verify_integrity, copy) - else: - result = reduce(_concat, objs) + axis = pandas.DataFrame()._get_axis_number(axis) - if isinstance(result, pd.DataFrame): - return from_pandas(result, len(result) / 2**16 + 1) + if join not in ['inner', 'outer']: + raise ValueError("Only can inner (intersect) or outer (union) join the" + " other axis") - return result + # We need this in a list because we use it later. + all_index, all_columns = list(zip(*[(obj.index, obj.columns) + for obj in objs])) + + def series_to_df(series, columns): + df = pandas.DataFrame(series) + df.columns = columns + return DataFrame(df) + + # Pandas puts all of the Series in a single column named 0. This is + # true regardless of the existence of another column named 0 in the + # concat. + if axis == 0: + objs = [series_to_df(obj, [0]) + if isinstance(obj, pandas.Series) else obj for obj in objs] + else: + # Pandas starts the count at 0 so this will increment the names as + # long as there's a new nameless Series being added. + def name_incrementer(i): + val = i[0] + i[0] += 1 + return val + + i = [0] + objs = [series_to_df(obj, obj.name if obj.name is not None + else name_incrementer(i)) + if isinstance(obj, pandas.Series) else obj for obj in objs] + + # Using concat on the columns and index is fast because they're empty, + # and it forces the error checking. It also puts the columns in the + # correct order for us. + final_index = \ + pandas.concat([pandas.DataFrame(index=idx) for idx in all_index], + axis=axis, join=join, join_axes=join_axes, + ignore_index=ignore_index, keys=keys, levels=levels, + names=names, verify_integrity=verify_integrity, + copy=False).index + final_columns = \ + pandas.concat([pandas.DataFrame(columns=col) + for col in all_columns], + axis=axis, join=join, join_axes=join_axes, + ignore_index=ignore_index, keys=keys, levels=levels, + names=names, verify_integrity=verify_integrity, + copy=False).columns + + # Put all of the DataFrames into Ray format + # TODO just partition the DataFrames instead of building a new Ray DF. + objs = [DataFrame(obj) if isinstance(obj, (pandas.DataFrame, + pandas.Series)) else obj + for obj in objs] + + # Here we reuse all_columns/index so we don't have to materialize objects + # from remote memory built in the previous line. In the future, we won't be + # building new DataFrames, rather just partitioning the DataFrames. + if axis == 0: + new_blocks = np.array([_reindex_helper._submit( + args=tuple([all_columns[i], final_columns, axis, + len(objs[0]._block_partitions)] + part.tolist()), + num_return_vals=len(objs[0]._block_partitions)) + for i in range(len(objs)) + for part in objs[i]._block_partitions]) + else: + # Transposing the columns is necessary because the remote task treats + # everything like rows and returns in row-major format. Luckily, this + # operation is cheap in numpy. + new_blocks = np.array([_reindex_helper._submit( + args=tuple([all_index[i], final_index, axis, + len(objs[0]._block_partitions.T)] + part.tolist()), + num_return_vals=len(objs[0]._block_partitions.T)) + for i in range(len(objs)) + for part in objs[i]._block_partitions.T]).T + + return DataFrame(block_partitions=new_blocks, + columns=final_columns, + index=final_index) diff --git a/python/ray/dataframe/dataframe.py b/python/ray/dataframe/dataframe.py index 830a7b7347c6b..b96c4c836453b 100644 --- a/python/ray/dataframe/dataframe.py +++ b/python/ray/dataframe/dataframe.py @@ -8,7 +8,8 @@ from pandas.core.index import _ensure_index_from_sequences from pandas._libs import lib from pandas.core.dtypes.cast import maybe_upcast_putmask -from pandas.compat import lzip +from pandas import compat +from pandas.compat import lzip, string_types, cPickle as pkl import pandas.core.common as com from pandas.core.dtypes.common import ( is_bool_dtype, @@ -25,6 +26,7 @@ import sys import re +from .groupby import DataFrameGroupBy from .utils import ( _deploy_func, _map_partitions, @@ -33,7 +35,9 @@ _blocks_to_col, _blocks_to_row, _create_block_partitions, - _inherit_docstrings) + _inherit_docstrings, + _reindex_helper, + _co_op_helper) from . import get_npartitions from .index_metadata import _IndexMetadata @@ -90,7 +94,6 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, axis = 0 columns = pd_df.columns index = pd_df.index - self._row_metadata = self._col_metadata = None else: # created this invariant to make sure we never have to go into the # partitions to get the columns @@ -101,41 +104,39 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, if block_partitions is not None: # put in numpy array here to make accesses easier since it's 2D self._block_partitions = np.array(block_partitions) - if row_metadata is not None: - self._row_metadata = row_metadata.copy() - if col_metadata is not None: - self._col_metadata = col_metadata.copy() - assert self._block_partitions.ndim == 2, \ - "Block Partitions must be 2D." + axis = 0 else: if row_partitions is not None: axis = 0 partitions = row_partitions - if row_metadata is not None: - self._row_metadata = row_metadata.copy() elif col_partitions is not None: axis = 1 partitions = col_partitions - if col_metadata is not None: - self._col_metadata = col_metadata.copy() self._block_partitions = \ _create_block_partitions(partitions, axis=axis, length=len(columns)) + if row_metadata is not None: + self._row_metadata = row_metadata.copy() + if col_metadata is not None: + self._col_metadata = col_metadata.copy() + # Sometimes we only get a single column or row, which is # problematic for building blocks from the partitions, so we # add whatever dimension we're missing from the input. - if self._block_partitions.ndim != 2: + if self._block_partitions.ndim < 2: self._block_partitions = np.expand_dims(self._block_partitions, axis=axis ^ 1) + assert self._block_partitions.ndim == 2, "Block Partitions must be 2D." + # Create the row and column index objects for using our partitioning. # If the objects haven't been inherited, then generate them - if not self._row_metadata: + if self._row_metadata is None: self._row_metadata = _IndexMetadata(self._block_partitions[:, 0], index=index, axis=0) - if not self._col_metadata: + if self._col_metadata is None: self._col_metadata = _IndexMetadata(self._block_partitions[0, :], index=columns, axis=1) @@ -279,8 +280,8 @@ def _repr_html_(self): return self._repr_helper_()._repr_html_() # We split so that we insert our correct dataframe dimensions. result = self._repr_helper_()._repr_html_() - return result.split('

')[0] + \ - '

{0} rows × {1} columns

\n'.format(len(self.index), + return result.split("

")[0] + \ + "

{0} rows x {1} columns

\n".format(len(self.index), len(self.columns)) def _get_index(self): @@ -457,7 +458,8 @@ def shape(self): return len(self.index), len(self.columns) def _update_inplace(self, row_partitions=None, col_partitions=None, - columns=None, index=None): + block_partitions=None, columns=None, index=None, + col_metadata=None, row_metadata=None): """Updates the current DataFrame inplace. Behavior should be similar to the constructor, given the corresponding @@ -481,22 +483,31 @@ def _update_inplace(self, row_partitions=None, col_partitions=None, not have enough contextual info to rebuild the indexes correctly based on the addition/subtraction of rows/columns. """ - assert row_partitions is not None or col_partitions is not None, \ + assert row_partitions is not None or col_partitions is not None\ + or block_partitions is not None, \ "To update inplace, new column or row partitions must be set." - if row_partitions is not None: + if block_partitions is not None: + self._block_partitions = block_partitions + elif row_partitions is not None: self._row_partitions = row_partitions elif col_partitions is not None: self._col_partitions = col_partitions - if row_partitions is not None or col_partitions is not None: - # At least one partition list is being updated, so recompute - # lengths and indices - self._row_metadata = _IndexMetadata(self._block_partitions[:, 0], - index=index, axis=0) - self._col_metadata = _IndexMetadata(self._block_partitions[0, :], - index=columns, axis=1) + if col_metadata is not None: + self._col_metadata = col_metadata + else: + assert columns is not None, \ + "Columns must be passed without col_metadata" + self._col_metadata = _IndexMetadata( + self._block_partitions[0, :], index=columns, axis=1) + if row_metadata is not None: + self._row_metadata = row_metadata + else: + # Index can be None for default index, so we don't check + self._row_metadata = _IndexMetadata( + self._block_partitions[:, 0], index=index, axis=0) def add_prefix(self, prefix): """Add a prefix to each of the column names. @@ -562,9 +573,23 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, Returns: A new DataFrame resulting from the groupby. """ - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + axis = pd.DataFrame()._get_axis_number(axis) + if callable(by): + by = by(self.index) + elif isinstance(by, compat.string_types): + by = self.__getitem__(by).values.tolist() + elif is_list_like(by): + mismatch = len(by) != len(self) if axis == 0 \ + else len(by) != len(self.columns) + + if all([obj in self for obj in by]) and mismatch: + raise NotImplementedError( + "Groupby with lists of columns not yet supported.") + elif mismatch: + raise KeyError(next(x for x in by if x not in self)) + + return DataFrameGroupBy(self, by, axis, level, as_index, sort, + group_keys, squeeze, **kwargs) def sum(self, axis=None, skipna=True, level=None, numeric_only=None): """Perform a sum across the DataFrame. @@ -635,7 +660,9 @@ def isna(self): return DataFrame(block_partitions=new_block_partitions, columns=self.columns, - index=self.index) + index=self.index, + row_metadata=self._row_metadata, + col_metadata=self._col_metadata) def isnull(self): """Fill a DataFrame with booleans for cells containing a null value. @@ -699,19 +726,178 @@ def dropna(self, axis, how, thresh=None, subset=[], inplace=False): raise NotImplementedError("Not yet") def add(self, other, axis='columns', level=None, fill_value=None): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + """Add this DataFrame to another or a scalar/list. + + Args: + other: What to add this this DataFrame. + axis: The axis to apply addition over. Only applicaable to Series + or list 'other'. + level: A level in the multilevel axis to add over. + fill_value: The value to fill NaN. + + Returns: + A new DataFrame with the applied addition. + """ + return self._operator_helper(pd.DataFrame.add, other, axis, level, + fill_value) def agg(self, func, axis=0, *args, **kwargs): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + return self.aggregate(func, axis, *args, **kwargs) def aggregate(self, func, axis=0, *args, **kwargs): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + axis = pd.DataFrame()._get_axis_number(axis) + + result = None + + if axis == 0: + try: + result = self._aggregate(func, axis=axis, *args, **kwargs) + except TypeError: + pass + + if result is None: + kwargs.pop('is_transform', None) + return self.apply(func, axis=axis, args=args, **kwargs) + + return result + + def _aggregate(self, arg, *args, **kwargs): + _axis = kwargs.pop('_axis', None) + if _axis is None: + _axis = getattr(self, 'axis', 0) + kwargs.pop('_level', None) + + if isinstance(arg, compat.string_types): + return self._string_function(arg, *args, **kwargs) + + # Dictionaries have complex behavior because they can be renamed here. + elif isinstance(arg, dict): + raise NotImplementedError( + "To contribute to Pandas on Ray, please visit " + "github.com/ray-project/ray.") + elif is_list_like(arg): + from .concat import concat + + x = [self._aggregate(func, *args, **kwargs) + for func in arg] + + new_dfs = [x[i] if not isinstance(x[i], pd.Series) + else pd.DataFrame(x[i], columns=[arg[i]]).T + for i in range(len(x))] + + return concat(new_dfs) + elif callable(arg): + self._callable_function(arg, _axis, *args, **kwargs) + else: + # TODO Make pandas error + raise ValueError("type {} is not callable".format(type(arg))) + + def _string_function(self, func, *args, **kwargs): + assert isinstance(func, compat.string_types) + + f = getattr(self, func, None) + + if f is not None: + if callable(f): + return f(*args, **kwargs) + + assert len(args) == 0 + assert len([kwarg + for kwarg in kwargs + if kwarg not in ['axis', '_level']]) == 0 + return f + + f = getattr(np, func, None) + if f is not None: + raise NotImplementedError("Numpy aggregates not yet supported.") + + raise ValueError("{} is an unknown string function".format(func)) + + def _callable_function(self, func, axis, *args, **kwargs): + if axis == 0: + partitions = self._col_partitions + else: + partitions = self._row_partitions + + if axis == 1: + kwargs['axis'] = axis + kwargs['temp_columns'] = self.columns + else: + kwargs['temp_index'] = self.index + + def agg_helper(df, arg, *args, **kwargs): + if 'temp_index' in kwargs: + df.index = kwargs.pop('temp_index', None) + else: + df.columns = kwargs.pop('temp_columns', None) + is_transform = kwargs.pop('is_transform', False) + new_df = df.agg(arg, *args, **kwargs) + + is_series = False + + if isinstance(new_df, pd.Series): + is_series = True + index = None + columns = None + else: + index = new_df.index \ + if not isinstance(new_df.index, pd.RangeIndex) \ + else None + columns = new_df.columns + new_df.columns = pd.RangeIndex(0, len(new_df.columns)) + new_df.reset_index(drop=True, inplace=True) + + if is_transform: + if is_scalar(new_df) or len(new_df) != len(df): + raise ValueError("transforms cannot produce " + "aggregated results") + + return is_series, new_df, index, columns + + remote_result = \ + [_deploy_func._submit(args=(lambda df: agg_helper(df, + func, + *args, + **kwargs), + part), num_return_vals=4) + for part in partitions] + + # This magic transposes the list comprehension returned from remote + is_series, new_parts, index, columns = \ + [list(t) for t in zip(*remote_result)] + + # This part is because agg can allow returning a Series or a + # DataFrame, and we have to determine which here. Shouldn't add + # too much to latency in either case because the booleans can + # be returned immediately + is_series = ray.get(is_series) + if all(is_series): + new_series = pd.concat(ray.get(new_parts)) + new_series.index = self.columns if axis == 0 else self.index + return new_series + # This error is thrown when some of the partitions return Series and + # others return DataFrames. We do not allow mixed returns. + elif any(is_series): + raise ValueError("no results.") + # The remaining logic executes when we have only DataFrames in the + # remote objects. We build a Ray DataFrame from the Pandas partitions. + elif axis == 0: + new_index = ray.get(index[0]) + columns = ray.get(columns) + columns = columns[0].append(columns[1:]) + + return DataFrame(col_partitions=new_parts, + columns=columns, + index=self.index if new_index is None + else new_index) + else: + new_index = ray.get(index[0]) + columns = ray.get(columns) + columns = columns[0].append(columns[1:]) + return DataFrame(row_partitions=new_parts, + columns=columns, + index=self.index if new_index is None + else new_index) def align(self, other, join='outer', axis=None, level=None, copy=True, fill_value=None, method=None, limit=None, fill_axis=0, @@ -749,15 +935,85 @@ def remote_func(df): return self._arithmetic_helper(remote_func, axis, level) def append(self, other, ignore_index=False, verify_integrity=False): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + """Append another DataFrame/list/Series to this one. + + Args: + other: The object to append to this. + ignore_index: Ignore the index on appending. + verify_integrity: Verify the integrity of the index on completion. + + Returns: + A new DataFrame containing the concatenated values. + """ + if isinstance(other, (pd.Series, dict)): + if isinstance(other, dict): + other = pd.Series(other) + if other.name is None and not ignore_index: + raise TypeError('Can only append a Series if ignore_index=True' + ' or if the Series has a name') + + if other.name is None: + index = None + else: + # other must have the same index name as self, otherwise + # index name will be reset + index = pd.Index([other.name], name=self.index.name) + + combined_columns = self.columns.tolist() + self.columns.union( + other.index).difference(self.columns).tolist() + other = other.reindex(combined_columns, copy=False) + other = pd.DataFrame(other.values.reshape((1, len(other))), + index=index, + columns=combined_columns) + other = other._convert(datetime=True, timedelta=True) + elif isinstance(other, list) and not isinstance(other[0], DataFrame): + other = pd.DataFrame(other) + if (self.columns.get_indexer(other.columns) >= 0).all(): + other = other.loc[:, self.columns] + + from .concat import concat + if isinstance(other, (list, tuple)): + to_concat = [self] + other + else: + to_concat = [self, other] + + return concat(to_concat, ignore_index=ignore_index, + verify_integrity=verify_integrity) def apply(self, func, axis=0, broadcast=False, raw=False, reduce=None, args=(), **kwds): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + """Apply a function along input axis of DataFrame. + + Args: + func: The function to apply + axis: The axis over which to apply the func. + broadcast: Whether or not to broadcast. + raw: Whether or not to convert to a Series. + reduce: Whether or not to try to apply reduction procedures. + + Returns: + Series or DataFrame, depending on func. + """ + axis = pd.DataFrame()._get_axis_number(axis) + + if is_list_like(func) and not all([isinstance(obj, str) + for obj in func]): + raise NotImplementedError( + "To contribute to Pandas on Ray, please visit " + "github.com/ray-project/ray.") + + if axis == 0 and is_list_like(func): + return self.aggregate(func, axis, *args, **kwds) + if isinstance(func, compat.string_types): + if axis == 1: + kwds['axis'] = axis + return getattr(self, func)(*args, **kwds) + elif callable(func): + return self._callable_function(func, axis=axis, *args, **kwds) + else: + raise NotImplementedError( + "To contribute to Pandas on Ray, please visit " + "github.com/ray-project/ray.") def as_blocks(self, copy=True): raise NotImplementedError( @@ -1029,14 +1285,33 @@ def diff(self, periods=1, axis=0): "github.com/ray-project/ray.") def div(self, other, axis='columns', level=None, fill_value=None): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + """Divides this DataFrame against another DataFrame/Series/scalar. + + Args: + other: The object to use to apply the divide against this. + axis: The axis to divide over. + level: The Multilevel index level to apply divide over. + fill_value: The value to fill NaNs with. + + Returns: + A new DataFrame with the Divide applied. + """ + return self._operator_helper(pd.DataFrame.add, other, axis, level, + fill_value) def divide(self, other, axis='columns', level=None, fill_value=None): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + """Synonym for div. + + Args: + other: The object to use to apply the divide against this. + axis: The axis to divide over. + level: The Multilevel index level to apply divide over. + fill_value: The value to fill NaNs with. + + Returns: + A new DataFrame with the Divide applied. + """ + return self.div(other, axis, level, fill_value) def dot(self, other): raise NotImplementedError( @@ -1187,9 +1462,17 @@ def duplicated(self, subset=None, keep='first'): "github.com/ray-project/ray.") def eq(self, other, axis='columns', level=None): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + """Checks element-wise that this is equal to other. + + Args: + other: A DataFrame or Series or scalar to compare to. + axis: The axis to perform the eq over. + level: The Multilevel index level to apply eq over. + + Returns: + A new DataFrame filled with Booleans. + """ + return self._operator_helper(pd.DataFrame.eq, other, axis, level) def equals(self, other): """ @@ -1456,9 +1739,19 @@ def first_valid_index(self): return self._row_metadata.first_valid_index() def floordiv(self, other, axis='columns', level=None, fill_value=None): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + """Divides this DataFrame against another DataFrame/Series/scalar. + + Args: + other: The object to use to apply the divide against this. + axis: The axis to divide over. + level: The Multilevel index level to apply divide over. + fill_value: The value to fill NaNs with. + + Returns: + A new DataFrame with the Divide applied. + """ + return self._operator_helper(pd.DataFrame.floordiv, other, axis, level, + fill_value) @classmethod def from_csv(self, path, header=0, sep=', ', index_col=0, @@ -1488,9 +1781,17 @@ def from_records(self, data, index=None, exclude=None, columns=None, "github.com/ray-project/ray.") def ge(self, other, axis='columns', level=None): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + """Checks element-wise that this is greater than or equal to other. + + Args: + other: A DataFrame or Series or scalar to compare to. + axis: The axis to perform the gt over. + level: The Multilevel index level to apply gt over. + + Returns: + A new DataFrame filled with Booleans. + """ + return self._operator_helper(pd.DataFrame.ge, other, axis, level) def get(self, key, default=None): """Get item from object for given key (DataFrame column, Panel @@ -1538,9 +1839,17 @@ def get_values(self): "github.com/ray-project/ray.") def gt(self, other, axis='columns', level=None): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + """Checks element-wise that this is greater than other. + + Args: + other: A DataFrame or Series or scalar to compare to. + axis: The axis to perform the gt over. + level: The Multilevel index level to apply gt over. + + Returns: + A new DataFrame filled with Booleans. + """ + return self._operator_helper(pd.DataFrame.gt, other, axis, level) def head(self, n=5): """Get the first n rows of the dataframe. @@ -1837,9 +2146,113 @@ def _replace_index(row_tuple, idx): def join(self, other, on=None, how='left', lsuffix='', rsuffix='', sort=False): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + """Join two or more DataFrames, or a DataFrame with a collection. + + Args: + other: What to join this DataFrame with. + on: A column name to use from the left for the join. + how: What type of join to conduct. + lsuffix: The suffix to add to column names that match on left. + rsuffix: The suffix to add to column names that match on right. + sort: Whether or not to sort. + + Returns: + The joined DataFrame. + """ + + if on is not None: + raise NotImplementedError("Not yet.") + + if isinstance(other, pd.Series): + if other.name is None: + raise ValueError("Other Series must have a name") + other = DataFrame({other.name: other}) + + if isinstance(other, DataFrame): + if on is not None: + index = self[on] + else: + index = self.index + + new_index = index.join(other.index, how=how, sort=sort) + + # Joining two empty DataFrames is fast, and error checks for us. + new_column_labels = pd.DataFrame(columns=self.columns) \ + .join(pd.DataFrame(columns=other.columns), + lsuffix=lsuffix, rsuffix=rsuffix).columns + + new_partition_num = max(len(self._block_partitions.T), + len(other._block_partitions.T)) + + # Join is a concat once we have shuffled the data internally. + # We shuffle the data by computing the correct order. + # Another important thing to note: We set the current self index + # to the index variable which may be 'on'. + new_self = np.array([ + _reindex_helper._submit(args=tuple([index, new_index, 1, + new_partition_num] + + block.tolist()), + num_return_vals=new_partition_num) + for block in self._block_partitions.T]) + new_other = np.array([ + _reindex_helper._submit(args=tuple([other.index, new_index, 1, + new_partition_num] + + block.tolist()), + num_return_vals=new_partition_num) + for block in other._block_partitions.T]) + + # Append the blocks together (i.e. concat) + new_block_parts = np.concatenate((new_self, new_other)).T + + # Default index in the case that on is set. + if on is not None: + new_index = None + + # TODO join the two metadata tables for performance. + return DataFrame(block_partitions=new_block_parts, + index=new_index, + columns=new_column_labels) + else: + # This constraint carried over from Pandas. + if on is not None: + raise ValueError("Joining multiple DataFrames only supported" + " for joining on index") + + # Joining the empty DataFrames with either index or columns is + # fast. It gives us proper error checking for the edge cases that + # would otherwise require a lot more logic. + new_index = pd.DataFrame(index=self.index).join( + [pd.DataFrame(index=obj.index) for obj in other], + how=how, sort=sort).index + + new_column_labels = pd.DataFrame(columns=self.columns).join( + [pd.DataFrame(columns=obj.columns) for obj in other], + lsuffix=lsuffix, rsuffix=rsuffix).columns + + new_partition_num = max([len(self._block_partitions.T)] + + [len(obj._block_partitions.T) + for obj in other]) + + new_self = np.array([ + _reindex_helper._submit(args=tuple([self.index, new_index, 1, + new_partition_num] + + block.tolist()), + num_return_vals=new_partition_num) + for block in self._block_partitions.T]) + + new_others = np.array([_reindex_helper._submit( + args=tuple([obj.index, new_index, 1, new_partition_num] + + block.tolist()), + num_return_vals=new_partition_num + ) for obj in other for block in obj._block_partitions.T]) + + # Append the columns together (i.e. concat) + new_block_parts = np.concatenate((new_self, new_others)).T + + # TODO join the two metadata tables for performance. + return DataFrame(block_partitions=new_block_parts, + index=new_index, + columns=new_column_labels) def kurt(self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs): @@ -1867,9 +2280,17 @@ def last_valid_index(self): return self._row_metadata.last_valid_index() def le(self, other, axis='columns', level=None): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + """Checks element-wise that this is less than or equal to other. + + Args: + other: A DataFrame or Series or scalar to compare to. + axis: The axis to perform the le over. + level: The Multilevel index level to apply le over. + + Returns: + A new DataFrame filled with Booleans. + """ + return self._operator_helper(pd.DataFrame.le, other, axis, level) def lookup(self, row_labels, col_labels): raise NotImplementedError( @@ -1877,9 +2298,17 @@ def lookup(self, row_labels, col_labels): "github.com/ray-project/ray.") def lt(self, other, axis='columns', level=None): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + """Checks element-wise that this is less than other. + + Args: + other: A DataFrame or Series or scalar to compare to. + axis: The axis to perform the lt over. + level: The Multilevel index level to apply lt over. + + Returns: + A new DataFrame filled with Booleans. + """ + return self._operator_helper(pd.DataFrame.lt, other, axis, level) def mad(self, axis=None, skipna=None, level=None): raise NotImplementedError( @@ -1989,9 +2418,19 @@ def remote_func(df): return self._arithmetic_helper(remote_func, axis, level) def mod(self, other, axis='columns', level=None, fill_value=None): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + """Mods this DataFrame against another DataFrame/Series/scalar. + + Args: + other: The object to use to apply the mod against this. + axis: The axis to mod over. + level: The Multilevel index level to apply mod over. + fill_value: The value to fill NaNs with. + + Returns: + A new DataFrame with the Mod applied. + """ + return self._operator_helper(pd.DataFrame.mod, other, axis, level, + fill_value) def mode(self, axis=0, numeric_only=False): raise NotImplementedError( @@ -1999,19 +2438,46 @@ def mode(self, axis=0, numeric_only=False): "github.com/ray-project/ray.") def mul(self, other, axis='columns', level=None, fill_value=None): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + """Multiplies this DataFrame against another DataFrame/Series/scalar. + + Args: + other: The object to use to apply the multiply against this. + axis: The axis to multiply over. + level: The Multilevel index level to apply multiply over. + fill_value: The value to fill NaNs with. + + Returns: + A new DataFrame with the Multiply applied. + """ + return self._operator_helper(pd.DataFrame.mul, other, axis, level, + fill_value) def multiply(self, other, axis='columns', level=None, fill_value=None): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + """Synonym for mul. + + Args: + other: The object to use to apply the multiply against this. + axis: The axis to multiply over. + level: The Multilevel index level to apply multiply over. + fill_value: The value to fill NaNs with. + + Returns: + A new DataFrame with the Multiply applied. + """ + return self.mul(other, axis, level, fill_value) def ne(self, other, axis='columns', level=None): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + """Checks element-wise that this is not equal to other. + + Args: + other: A DataFrame or Series or scalar to compare to. + axis: The axis to perform the ne over. + level: The Multilevel index level to apply ne over. + + Returns: + A new DataFrame filled with Booleans. + """ + return self._operator_helper(pd.DataFrame.ne, other, axis, level) def nlargest(self, n, columns, keep='first'): raise NotImplementedError( @@ -2112,9 +2578,19 @@ def pop(self, item): return result def pow(self, other, axis='columns', level=None, fill_value=None): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + """Pow this DataFrame against another DataFrame/Series/scalar. + + Args: + other: The object to use to apply the pow against this. + axis: The axis to pow over. + level: The Multilevel index level to apply pow over. + fill_value: The value to fill NaNs with. + + Returns: + A new DataFrame with the Pow applied. + """ + return self._operator_helper(pd.DataFrame.pow, other, axis, level, + fill_value) def prod(self, axis=None, skipna=None, level=None, numeric_only=None, min_count=0, **kwargs): @@ -2209,9 +2685,7 @@ def query_helper(df): return DataFrame(row_partitions=new_rows, columns=self.columns) def radd(self, other, axis='columns', level=None, fill_value=None): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + return self.add(other, axis, level, fill_value) def rank(self, axis=0, method='average', numeric_only=None, na_option='keep', ascending=True, pct=False): @@ -2220,9 +2694,9 @@ def rank(self, axis=0, method='average', numeric_only=None, "github.com/ray-project/ray.") def rdiv(self, other, axis='columns', level=None, fill_value=None): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + return self._single_df_op_helper( + lambda df: df.rdiv(other, axis, level, fill_value), + other, axis, level) def reindex(self, labels=None, index=None, columns=None, axis=None, method=None, copy=True, level=None, fill_value=np.nan, @@ -2441,19 +2915,17 @@ def _maybe_casted_values(index, labels=None): return new_obj def rfloordiv(self, other, axis='columns', level=None, fill_value=None): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + return self._single_df_op_helper( + lambda df: df.rfloordiv(other, axis, level, fill_value), + other, axis, level) def rmod(self, other, axis='columns', level=None, fill_value=None): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + return self._single_df_op_helper( + lambda df: df.rmod(other, axis, level, fill_value), + other, axis, level) def rmul(self, other, axis='columns', level=None, fill_value=None): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + return self.mul(other, axis, level, fill_value) def rolling(self, window, min_periods=None, freq=None, center=False, win_type=None, on=None, axis=0, closed=None): @@ -2471,25 +2943,182 @@ def round(self, decimals=0, *args, **kwargs): index=self.index) def rpow(self, other, axis='columns', level=None, fill_value=None): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + return self._single_df_op_helper( + lambda df: df.rpow(other, axis, level, fill_value), + other, axis, level) def rsub(self, other, axis='columns', level=None, fill_value=None): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + return self._single_df_op_helper( + lambda df: df.rsub(other, axis, level, fill_value), + other, axis, level) def rtruediv(self, other, axis='columns', level=None, fill_value=None): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + return self._single_df_op_helper( + lambda df: df.rtruediv(other, axis, level, fill_value), + other, axis, level) def sample(self, n=None, frac=None, replace=False, weights=None, random_state=None, axis=None): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + """Returns a random sample of items from an axis of object. + + Args: + n: Number of items from axis to return. Cannot be used with frac. + Default = 1 if frac = None. + frac: Fraction of axis items to return. Cannot be used with n. + replace: Sample with or without replacement. Default = False. + weights: Default ‘None’ results in equal probability weighting. + If passed a Series, will align with target object on index. + Index values in weights not found in sampled object will be + ignored and index values in sampled object not in weights will + be assigned weights of zero. If called on a DataFrame, will + accept the name of a column when axis = 0. Unless weights are + a Series, weights must be same length as axis being sampled. + If weights do not sum to 1, they will be normalized to sum + to 1. Missing values in the weights column will be treated as + zero. inf and -inf values not allowed. + random_state: Seed for the random number generator (if int), or + numpy RandomState object. + axis: Axis to sample. Accepts axis number or name. + + Returns: + A new Dataframe + """ + + axis = pd.DataFrame()._get_axis_number(axis) if axis is not None \ + else 0 + + if axis == 0: + axis_length = len(self._row_metadata) + else: + axis_length = len(self._col_metadata) + + if weights is not None: + + # Index of the weights Series should correspond to the index of the + # Dataframe in order to sample + if isinstance(weights, pd.Series): + weights = weights.reindex(self.axes[axis]) + + # If weights arg is a string, the weights used for sampling will + # the be values in the column corresponding to that string + if isinstance(weights, string_types): + if axis == 0: + try: + weights = self[weights] + except KeyError: + raise KeyError("String passed to weights not a " + "valid column") + else: + raise ValueError("Strings can only be passed to " + "weights when sampling from rows on " + "a DataFrame") + + weights = pd.Series(weights, dtype='float64') + + if len(weights) != axis_length: + raise ValueError("Weights and axis to be sampled must be of " + "same length") + + if (weights == np.inf).any() or (weights == -np.inf).any(): + raise ValueError("weight vector may not include `inf` values") + + if (weights < 0).any(): + raise ValueError("weight vector many not include negative " + "values") + + # weights cannot be NaN when sampling, so we must set all nan + # values to 0 + weights = weights.fillna(0) + + # If passed in weights are not equal to 1, renormalize them + # otherwise numpy sampling function will error + weights_sum = weights.sum() + if weights_sum != 1: + if weights_sum != 0: + weights = weights / weights_sum + else: + raise ValueError("Invalid weights: weights sum to zero") + + weights = weights.values + + if n is None and frac is None: + # default to n = 1 if n and frac are both None (in accordance with + # Pandas specification) + n = 1 + elif n is not None and frac is None and n % 1 != 0: + # n must be an integer + raise ValueError("Only integers accepted as `n` values") + elif n is None and frac is not None: + # compute the number of samples based on frac + n = int(round(frac * axis_length)) + elif n is not None and frac is not None: + # Pandas specification does not allow both n and frac to be passed + # in + raise ValueError('Please enter a value for `frac` OR `n`, not ' + 'both') + if n < 0: + raise ValueError("A negative number of rows requested. Please " + "provide positive value.") + + if n == 0: + # An Empty DataFrame is returned if the number of samples is 0. + # The Empty Dataframe should have either columns or index specified + # depending on which axis is passed in. + return DataFrame(columns=[] if axis == 1 else self.columns, + index=self.index if axis == 1 else []) + + if axis == 1: + axis_labels = self.columns + partition_metadata = self._col_metadata + partitions = self._col_partitions + else: + axis_labels = self.index + partition_metadata = self._row_metadata + partitions = self._row_partitions + + if random_state is not None: + # Get a random number generator depending on the type of + # random_state that is passed in + if isinstance(random_state, int): + random_num_gen = np.random.RandomState(random_state) + elif isinstance(random_state, np.random.randomState): + random_num_gen = random_state + else: + # random_state must be an int or a numpy RandomState object + raise ValueError("Please enter an `int` OR a " + "np.random.RandomState for random_state") + + # choose random numbers and then get corresponding labels from + # chosen axis + sample_indices = random_num_gen.randint( + low=0, + high=len(partition_metadata), + size=n) + samples = axis_labels[sample_indices] + else: + # randomly select labels from chosen axis + samples = np.random.choice(a=axis_labels, size=n, + replace=replace, p=weights) + + # create an array of (partition, index_within_partition) tuples for + # each sample + part_ind_tuples = [partition_metadata[sample] + for sample in samples] + + if axis == 1: + # tup[0] refers to the partition number and tup[1] is the index + # within that partition + new_cols = [_deploy_func.remote(lambda df: df.iloc[:, [tup[1]]], + partitions[tup[0]]) for tup in part_ind_tuples] + return DataFrame(col_partitions=new_cols, + columns=samples, + index=self.index) + else: + new_rows = [_deploy_func.remote(lambda df: df.loc[[tup[1]]], + partitions[tup[0]]) for tup in part_ind_tuples] + return DataFrame(row_partitions=new_rows, + columns=self.columns, + index=samples) def select(self, crit, axis=0): raise NotImplementedError( @@ -2688,14 +3317,33 @@ def remote_func(df): return self._arithmetic_helper(remote_func, axis, level) def sub(self, other, axis='columns', level=None, fill_value=None): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + """Subtract a DataFrame/Series/scalar from this DataFrame. + + Args: + other: The object to use to apply the subtraction to this. + axis: THe axis to apply the subtraction over. + level: Mutlilevel index level to subtract over. + fill_value: The value to fill NaNs with. + + Returns: + A new DataFrame with the subtraciont applied. + """ + return self._operator_helper(pd.DataFrame.sub, other, axis, level, + fill_value) def subtract(self, other, axis='columns', level=None, fill_value=None): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + """Alias for sub. + + Args: + other: The object to use to apply the subtraction to this. + axis: THe axis to apply the subtraction over. + level: Mutlilevel index level to subtract over. + fill_value: The value to fill NaNs with. + + Returns: + A new DataFrame with the subtraciont applied. + """ + return self.sub(other, axis, level, fill_value) def swapaxes(self, axis1, axis2, copy=True): raise NotImplementedError( @@ -2733,19 +3381,30 @@ def take(self, indices, axis=0, convert=None, is_copy=True, **kwargs): "github.com/ray-project/ray.") def to_clipboard(self, excel=None, sep=None, **kwargs): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") - def to_csv(self, path_or_buf=None, sep=', ', na_rep='', float_format=None, + warnings.warn("Defaulting to Pandas implementation", + PendingDeprecationWarning) + + port_frame = to_pandas(self) + port_frame.to_clipboard(excel, sep, **kwargs) + + def to_csv(self, path_or_buf=None, sep=',', na_rep='', float_format=None, columns=None, header=True, index=True, index_label=None, mode='w', encoding=None, compression=None, quoting=None, quotechar='"', line_terminator='\n', chunksize=None, tupleize_cols=None, date_format=None, doublequote=True, escapechar=None, decimal='.'): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + + warnings.warn("Defaulting to Pandas implementation", + PendingDeprecationWarning) + + port_frame = to_pandas(self) + port_frame.to_csv(path_or_buf, sep, na_rep, float_format, + columns, header, index, index_label, + mode, encoding, compression, quoting, + quotechar, line_terminator, chunksize, + tupleize_cols, date_format, doublequote, + escapechar, decimal) def to_dense(self): raise NotImplementedError( @@ -2762,14 +3421,24 @@ def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', index_label=None, startrow=0, startcol=0, engine=None, merge_cells=True, encoding=None, inf_rep='inf', verbose=True, freeze_panes=None): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + + warnings.warn("Defaulting to Pandas implementation", + PendingDeprecationWarning) + + port_frame = to_pandas(self) + port_frame.to_excel(excel_writer, sheet_name, na_rep, + float_format, columns, header, index, + index_label, startrow, startcol, engine, + merge_cells, encoding, inf_rep, verbose, + freeze_panes) def to_feather(self, fname): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + + warnings.warn("Defaulting to Pandas implementation", + PendingDeprecationWarning) + + port_frame = to_pandas(self) + port_frame.to_feather(fname) def to_gbq(self, destination_table, project_id, chunksize=10000, verbose=True, reauth=False, if_exists='fail', @@ -2779,9 +3448,12 @@ def to_gbq(self, destination_table, project_id, chunksize=10000, "github.com/ray-project/ray.") def to_hdf(self, path_or_buf, key, **kwargs): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + + warnings.warn("Defaulting to Pandas implementation", + PendingDeprecationWarning) + + port_frame = to_pandas(self) + port_frame.to_hdf(path_or_buf, key, **kwargs) def to_html(self, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='np.NaN', formatters=None, @@ -2789,16 +3461,29 @@ def to_html(self, buf=None, columns=None, col_space=None, header=True, justify=None, bold_rows=True, classes=None, escape=True, max_rows=None, max_cols=None, show_dimensions=False, notebook=False, decimal='.', border=None): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + + warnings.warn("Defaulting to Pandas implementation", + PendingDeprecationWarning) + + port_frame = to_pandas(self) + port_frame.to_html(buf, columns, col_space, header, + index, na_rep, formatters, + float_format, sparsify, index_names, + justify, bold_rows, classes, escape, + max_rows, max_cols, show_dimensions, + notebook, decimal, border) def to_json(self, path_or_buf=None, orient=None, date_format=None, double_precision=10, force_ascii=True, date_unit='ms', default_handler=None, lines=False, compression=None): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + + warnings.warn("Defaulting to Pandas implementation", + PendingDeprecationWarning) + + port_frame = to_pandas(self) + port_frame.to_json(path_or_buf, orient, date_format, + double_precision, force_ascii, date_unit, + default_handler, lines, compression) def to_latex(self, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='np.NaN', formatters=None, @@ -2811,9 +3496,12 @@ def to_latex(self, buf=None, columns=None, col_space=None, header=True, "github.com/ray-project/ray.") def to_msgpack(self, path_or_buf=None, encoding='utf-8', **kwargs): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + + warnings.warn("Defaulting to Pandas implementation", + PendingDeprecationWarning) + + port_frame = to_pandas(self) + port_frame.to_msgpack(path_or_buf, encoding, **kwargs) def to_panel(self): raise NotImplementedError( @@ -2822,19 +3510,26 @@ def to_panel(self): def to_parquet(self, fname, engine='auto', compression='snappy', **kwargs): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + + warnings.warn("Defaulting to Pandas implementation", + PendingDeprecationWarning) + + port_frame = to_pandas(self) + port_frame.to_parquet(fname, engine, compression, **kwargs) def to_period(self, freq=None, axis=0, copy=True): raise NotImplementedError( "To contribute to Pandas on Ray, please visit " "github.com/ray-project/ray.") - def to_pickle(self, path, compression='infer', protocol=4): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + def to_pickle(self, path, compression='infer', + protocol=pkl.HIGHEST_PROTOCOL): + + warnings.warn("Defaulting to Pandas implementation", + PendingDeprecationWarning) + + port_frame = to_pandas(self) + port_frame.to_pickle(path, compression, protocol) def to_records(self, index=True, convert_datetime64=True): raise NotImplementedError( @@ -2848,16 +3543,25 @@ def to_sparse(self, fill_value=None, kind='block'): def to_sql(self, name, con, flavor=None, schema=None, if_exists='fail', index=True, index_label=None, chunksize=None, dtype=None): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + + warnings.warn("Defaulting to Pandas implementation", + PendingDeprecationWarning) + + port_frame = to_pandas(self) + port_frame.to_sql(name, con, flavor, schema, if_exists, + index, index_label, chunksize, dtype) def to_stata(self, fname, convert_dates=None, write_index=True, encoding='latin-1', byteorder=None, time_stamp=None, data_label=None, variable_labels=None): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + + warnings.warn("Defaulting to Pandas implementation", + PendingDeprecationWarning) + + port_frame = to_pandas(self) + port_frame.to_stata(fname, convert_dates, write_index, + encoding, byteorder, time_stamp, + data_label, variable_labels) def to_string(self, buf=None, columns=None, col_space=None, header=True, index=True, na_rep='np.NaN', formatters=None, @@ -2879,14 +3583,29 @@ def to_xarray(self): "github.com/ray-project/ray.") def transform(self, func, *args, **kwargs): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + kwargs["is_transform"] = True + result = self.agg(func, *args, **kwargs) + try: + result.columns = self.columns + result.index = self.index + except ValueError: + raise ValueError("transforms cannot produce aggregated results") + return result def truediv(self, other, axis='columns', level=None, fill_value=None): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + """Divides this DataFrame against another DataFrame/Series/scalar. + + Args: + other: The object to use to apply the divide against this. + axis: The axis to divide over. + level: The Multilevel index level to apply divide over. + fill_value: The value to fill NaNs with. + + Returns: + A new DataFrame with the Divide applied. + """ + return self._operator_helper(pd.DataFrame.truediv, other, axis, level, + fill_value) def truncate(self, before=None, after=None, axis=None, copy=True): raise NotImplementedError( @@ -3014,7 +3733,7 @@ def _getitem_array(self, key): columns=columns, index=index) else: - columns = self.columns[key] + columns = self._col_metadata[key].index indices_for_rows = [self.columns.index(new_col) for new_col in columns] @@ -3135,9 +3854,8 @@ def __round__(self, decimals=0): "github.com/ray-project/ray.") def __array__(self, dtype=None): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + # TODO: This is very inefficient and needs fix + return np.array(to_pandas(self)) def __array_wrap__(self, result, context=None): raise NotImplementedError( @@ -3250,80 +3968,92 @@ def __xor__(self, other): "github.com/ray-project/ray.") def __lt__(self, other): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + return self.lt(other) def __le__(self, other): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + return self.le(other) def __gt__(self, other): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + return self.gt(other) def __ge__(self, other): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + return self.ge(other) def __eq__(self, other): - """Computes the equality of this DataFrame with another - - Returns: - True, if the DataFrames are equal. False otherwise. - """ - return self.equals(other) + return self.eq(other) def __ne__(self, other): - """Checks that this DataFrame is not equal to another - - Returns: - True, if the DataFrames are not equal. False otherwise. - """ - return not self.equals(other) + return self.ne(other) def __add__(self, other): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + return self.add(other) def __iadd__(self, other): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + return self.add(other) + + def __radd__(self, other, axis="columns", level=None, fill_value=None): + return self.radd(other, axis, level, fill_value) def __mul__(self, other): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + return self.mul(other) def __imul__(self, other): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + return self.mul(other) + + def __rmul__(self, other, axis="columns", level=None, fill_value=None): + return self.rmul(other, axis, level, fill_value) def __pow__(self, other): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + return self.pow(other) def __ipow__(self, other): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + return self.pow(other) + + def __rpow__(self, other, axis="columns", level=None, fill_value=None): + return self.rpow(other, axis, level, fill_value) def __sub__(self, other): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + return self.sub(other) def __isub__(self, other): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") + return self.sub(other) + + def __rsub__(self, other, axis="columns", level=None, fill_value=None): + return self.rsub(other, axis, level, fill_value) + + def __floordiv__(self, other): + return self.floordiv(other) + + def __ifloordiv__(self, other): + return self.floordiv(other) + + def __rfloordiv__(self, other, axis="columns", level=None, + fill_value=None): + return self.rfloordiv(other, axis, level, fill_value) + + def __truediv__(self, other): + return self.truediv(other) + + def __itruediv__(self, other): + return self.truediv(other) + + def __rtruediv__(self, other, axis="columns", level=None, fill_value=None): + return self.rtruediv(other, axis, level, fill_value) + + def __mod__(self, other): + return self.mod(other) + + def __imod__(self, other): + return self.mod(other) + + def __rmod__(self, other, axis="columns", level=None, fill_value=None): + return self.rmod(other, axis, level, fill_value) + + def __div__(self, other, axis="columns", level=None, fill_value=None): + return self.div(other, axis, level, fill_value) + + def __rdiv__(self, other, axis="columns", level=None, fill_value=None): + return self.rdiv(other, axis, level, fill_value) def __neg__(self): """Computes an element wise negative DataFrame @@ -3346,21 +4076,6 @@ def __neg__(self): columns=self.columns, index=self.index) - def __floordiv__(self, other): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") - - def __truediv__(self, other): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") - - def __mod__(self, other): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") - def __sizeof__(self): raise NotImplementedError( "To contribute to Pandas on Ray, please visit " @@ -3384,12 +4099,7 @@ def style(self): "To contribute to Pandas on Ray, please visit " "github.com/ray-project/ray.") - def iat(axis=None): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") - - def __rsub__(other, axis=None, level=None, fill_value=None): + def iat(self, axis=None): raise NotImplementedError( "To contribute to Pandas on Ray, please visit " "github.com/ray-project/ray.") @@ -3411,22 +4121,12 @@ def is_copy(self): "To contribute to Pandas on Ray, please visit " "github.com/ray-project/ray.") - def __itruediv__(other): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") - - def __div__(other, axis=None, level=None, fill_value=None): + def at(self, axis=None): raise NotImplementedError( "To contribute to Pandas on Ray, please visit " "github.com/ray-project/ray.") - def at(axis=None): - raise NotImplementedError( - "To contribute to Pandas on Ray, please visit " - "github.com/ray-project/ray.") - - def ix(axis=None): + def ix(self, axis=None): raise NotImplementedError( "To contribute to Pandas on Ray, please visit " "github.com/ray-project/ray.") @@ -3441,3 +4141,122 @@ def iloc(self): raise NotImplementedError( "To contribute to Pandas on Ray, please visit " "github.com/ray-project/ray.") + + def _copartition(self, other, new_index): + """Colocates the values of other with this for certain operations. + + NOTE: This method uses the indexes of each DataFrame to order them the + same. This operation does an implicit shuffling of data and zips + the two DataFrames together to be operated on. + + Args: + other: The other DataFrame to copartition with. + + Returns: + Two new sets of partitions, copartitioned and zipped. + """ + # Put in the object store so they aren't serialized each iteration. + old_self_index = ray.put(self.index) + new_index = ray.put(new_index) + old_other_index = ray.put(other.index) + + new_num_partitions = max(len(self._block_partitions.T), + len(other._block_partitions.T)) + + new_partitions_self = \ + np.array([_reindex_helper._submit( + args=tuple([old_self_index, new_index, 1, + new_num_partitions] + block.tolist()), + num_return_vals=new_num_partitions) + for block in self._block_partitions.T]).T + + new_partitions_other = \ + np.array([_reindex_helper._submit( + args=tuple([old_other_index, new_index, 1, + new_num_partitions] + block.tolist()), + num_return_vals=new_num_partitions) + for block in other._block_partitions.T]).T + + return zip(new_partitions_self, new_partitions_other) + + def _operator_helper(self, func, other, axis, level, *args): + """Helper method for inter-dataframe and scalar operations""" + if isinstance(other, DataFrame): + return self._inter_df_op_helper( + lambda x, y: func(x, y, axis, level, *args), + other, axis, level) + else: + return self._single_df_op_helper( + lambda df: func(df, other, axis, level, *args), + other, axis, level) + + def _inter_df_op_helper(self, func, other, axis, level): + if level is not None: + raise NotImplementedError("Mutlilevel index not yet supported " + "in Pandas on Ray") + axis = pd.DataFrame()._get_axis_number(axis) + + # Adding two DataFrames causes an outer join. + if isinstance(other, DataFrame): + new_column_index = self.columns.join(other.columns, how="outer") + new_index = self.index.join(other.index, how="outer") + copartitions = self._copartition(other, new_index) + + new_blocks = \ + np.array([_co_op_helper._submit( + args=tuple([func, self.columns, other.columns, + len(part[0])] + + np.concatenate(part).tolist()), + num_return_vals=len(part[0])) + for part in copartitions]) + + # TODO join the Index Metadata objects together for performance. + return DataFrame(block_partitions=new_blocks, + columns=new_column_index, + index=new_index) + + def _single_df_op_helper(self, func, other, axis, level): + if level is not None: + raise NotImplementedError("Multilevel index not yet supported " + "in Pandas on Ray") + axis = pd.DataFrame()._get_axis_number(axis) + + if is_list_like(other): + new_index = self.index + new_column_index = self.columns + new_col_metadata = self._col_metadata + new_row_metadata = self._row_metadata + new_blocks = None + + if axis == 0: + if len(other) != len(self.index): + raise ValueError( + "Unable to coerce to Series, length must be {0}: " + "given {1}".format(len(self.index), len(other))) + new_columns = _map_partitions(func, self._col_partitions) + new_rows = None + else: + if len(other) != len(self.columns): + raise ValueError( + "Unable to coerce to Series, length must be {0}: " + "given {1}".format(len(self.columns), len(other))) + new_rows = _map_partitions(func, self._row_partitions) + new_columns = None + + else: + new_blocks = np.array([_map_partitions(func, block) + for block in self._block_partitions]) + new_columns = None + new_rows = None + new_index = self.index + new_column_index = self.columns + new_col_metadata = self._col_metadata + new_row_metadata = self._row_metadata + + return DataFrame(col_partitions=new_columns, + row_partitions=new_rows, + block_partitions=new_blocks, + index=new_index, + columns=new_column_index, + col_metadata=new_col_metadata, + row_metadata=new_row_metadata) diff --git a/python/ray/dataframe/groupby.py b/python/ray/dataframe/groupby.py index bec192cdf94f9..892bc8f74e193 100644 --- a/python/ray/dataframe/groupby.py +++ b/python/ray/dataframe/groupby.py @@ -3,60 +3,92 @@ from __future__ import print_function import pandas.core.groupby +import numpy as np +import pandas as pd +from pandas.core.dtypes.common import is_list_like +import ray +from .utils import _map_partitions from .utils import _inherit_docstrings @_inherit_docstrings(pandas.core.groupby.DataFrameGroupBy) class DataFrameGroupBy(object): - def __init__(self, partitions, columns, index): - self._partitions = partitions - self._columns = columns - self._index = index + def __init__(self, df, by, axis, level, as_index, sort, group_keys, + squeeze, **kwargs): + + self._columns = df.columns + self._index = df.index + self._axis = axis + + self._row_metadata = df._row_metadata + self._col_metadata = df._col_metadata + + if axis == 0: + partitions = [column for column in df._block_partitions.T] + self._index_grouped = pd.Series(self._index, index=self._index)\ + .groupby(by=by, sort=sort) + else: + partitions = [row for row in df._block_partitions] + self._index_grouped = pd.Series(self._columns, index=self._index)\ + .groupby(by=by, sort=sort) + + self._keys_and_values = [(k, np.array(v)) + for k, v in self._index_grouped] + + self._grouped_partitions = \ + list(zip(*(groupby._submit(args=(by, + axis, + level, + as_index, + sort, + group_keys, + squeeze) + part, + num_return_vals=len(self)) + for part in partitions))) - def _map_partitions(self, func, index=None): - """Apply a function on each partition. - - Args: - func (callable): The function to Apply. - - Returns: - A new DataFrame containing the result of the function. - """ + @property + def _iter(self): from .dataframe import DataFrame - from .dataframe import _deploy_func - assert(callable(func)) - new_df = [_deploy_func.remote(lambda df: df.apply(func), part) - for part in self._partitions] - - if index is None: - index = self._index - - return DataFrame(row_partitions=new_df, columns=self._columns, - index=index) + if self._axis == 0: + return [(self._keys_and_values[i][0], + DataFrame(col_partitions=part, + columns=self._columns, + index=self._keys_and_values[i][1].index, + row_metadata=self._row_metadata[ + self._keys_and_values[i][1].index], + col_metadata=self._col_metadata)) + for i, part in enumerate(self._grouped_partitions)] + else: + return [(self._keys_and_values[i][0], + DataFrame(row_partitions=part, + columns=self._keys_and_values[i][1].index, + index=self._index, + row_metadata=self._row_metadata, + col_metadata=self._col_metadata[ + self._keys_and_values[i][1].index])) + for i, part in enumerate(self._grouped_partitions)] @property def ngroups(self): - raise NotImplementedError("Not Yet implemented.") + return len(self) - @property - def skew(self): - raise NotImplementedError("Not Yet implemented.") + def skew(self, **kwargs): + return self._apply_agg_function(lambda df: df.skew(**kwargs)) def ffill(self, limit=None): - raise NotImplementedError("Not Yet implemented.") + return self._apply_agg_function(lambda df: df.ffill(limit=limit)) def sem(self, ddof=1): - raise NotImplementedError("Not Yet implemented.") + return self._apply_agg_function(lambda df: df.sem(ddof=ddof)) def mean(self, *args, **kwargs): - raise NotImplementedError("Not Yet implemented.") + return self._apply_agg_function(lambda df: df.mean(*args, **kwargs)) - @property def any(self): - raise NotImplementedError("Not Yet implemented.") + return self._apply_agg_function(lambda df: df.any()) @property def plot(self): @@ -74,18 +106,17 @@ def tshift(self): @property def groups(self): - raise NotImplementedError("Not Yet implemented.") + return dict([(k, pd.Index(v)) for k, v in self._keys_and_values]) def min(self, **kwargs): - raise NotImplementedError("Not Yet implemented.") + return self._apply_agg_function(lambda df: df.min(**kwargs)) - @property def idxmax(self): - raise NotImplementedError("Not Yet implemented.") + return self._apply_agg_function(lambda df: df.idxmax()) @property def ndim(self): - raise NotImplementedError("Not Yet implemented.") + return self._index_grouped.ndim def shift(self, periods=1, freq=None, axis=0): raise NotImplementedError("Not Yet implemented.") @@ -94,70 +125,82 @@ def nth(self, n, dropna=None): raise NotImplementedError("Not Yet implemented.") def cumsum(self, axis=0, *args, **kwargs): - raise NotImplementedError("Not Yet implemented.") + return self._apply_agg_function(lambda df: df.cumsum(axis, + *args, + **kwargs)) @property def indices(self): - raise NotImplementedError("Not Yet implemented.") + return dict(self._keys_and_values) - @property def pct_change(self): - raise NotImplementedError("Not Yet implemented.") + return self._apply_agg_function(lambda df: df.pct_change()) def filter(self, func, dropna=True, *args, **kwargs): raise NotImplementedError("Not Yet implemented.") def cummax(self, axis=0, **kwargs): - raise NotImplementedError("Not Yet implemented.") + return self._apply_agg_function(lambda df: df.cummax(axis=axis, + **kwargs)) def apply(self, func, *args, **kwargs): - return self._map_partitions(func) - - def rolling(self, *args, **kwargs): - raise NotImplementedError("Not Yet implemented.") + return self._apply_df_function(lambda df: df.apply(func, + *args, + **kwargs)) \ + if is_list_like(func) \ + else self._apply_agg_function(lambda df: df.apply(func, + *args, + **kwargs)) @property def dtypes(self): - raise NotImplementedError("Not Yet implemented.") + return self._apply_agg_function(lambda df: df.dtypes) def first(self, **kwargs): - raise NotImplementedError("Not Yet implemented.") + return self._apply_agg_function(lambda df: df.first(offset=0, + **kwargs)) def backfill(self, limit=None): - raise NotImplementedError("Not Yet implemented.") + return self.bfill(limit) def __getitem__(self, key): + # This operation requires a SeriesGroupBy Object raise NotImplementedError("Not Yet implemented.") def cummin(self, axis=0, **kwargs): - raise NotImplementedError("Not Yet implemented.") + return self._apply_agg_function(lambda df: df.cummin(axis=axis, + **kwargs)) def bfill(self, limit=None): - raise NotImplementedError("Not Yet implemented.") + return self._apply_agg_function(lambda df: df.bfill(limit=limit)) - @property def idxmin(self): - raise NotImplementedError("Not Yet implemented.") + return self._apply_agg_function(lambda df: df.idxmin()) def prod(self, **kwargs): - raise NotImplementedError("Not Yet implemented.") + return self._apply_agg_function(lambda df: df.prod(**kwargs)) def std(self, ddof=1, *args, **kwargs): - raise NotImplementedError("Not Yet implemented.") + return self._apply_agg_function(lambda df: df.std(ddof=ddof, + *args, **kwargs)) def aggregate(self, arg, *args, **kwargs): - raise NotImplementedError("Not Yet implemented.") + return self._apply_df_function(lambda df: df.agg(arg, + *args, + **kwargs)) \ + if is_list_like(arg) \ + else self._apply_agg_function(lambda df: df.agg(arg, + *args, + **kwargs)) def last(self, **kwargs): - raise NotImplementedError("Not Yet implemented.") + return self._apply_df_function(lambda df: df.last(**kwargs)) - @property def mad(self): - raise NotImplementedError("Not Yet implemented.") + return self._apply_agg_function(lambda df: df.mad()) - @property def rank(self): - raise NotImplementedError("Not Yet implemented.") + return self._apply_df_function(lambda df: df.rank()) @property def corrwith(self): @@ -167,26 +210,28 @@ def pad(self, limit=None): raise NotImplementedError("Not Yet implemented.") def max(self, **kwargs): - raise NotImplementedError("Not Yet implemented.") + return self._apply_agg_function(lambda df: df.max(**kwargs)) def var(self, ddof=1, *args, **kwargs): - raise NotImplementedError("Not Yet implemented.") + return self._apply_agg_function(lambda df: df.var(ddof, + *args, + **kwargs)) def get_group(self, name, obj=None): raise NotImplementedError("Not Yet implemented.") def __len__(self): - raise NotImplementedError("Not Yet implemented.") + return len(self._keys_and_values) - @property def all(self): - raise NotImplementedError("Not Yet implemented.") + return self._apply_agg_function(lambda df: df.all()) def size(self): - raise NotImplementedError("Not Yet implemented.") + return self._apply_agg_function(lambda df: df.size) def sum(self, **kwargs): - self._map_partitions(lambda df: df.sum()) + return self._apply_agg_function(lambda df: + df.sum(axis=self._axis, **kwargs)) def __unicode__(self): raise NotImplementedError("Not Yet implemented.") @@ -194,76 +239,136 @@ def __unicode__(self): def describe(self, **kwargs): raise NotImplementedError("Not Yet implemented.") - def boxplot(grouped, subplots=True, column=None, fontsize=None, rot=0, - grid=True, ax=None, figsize=None, layout=None, **kwds): + def boxplot(self, grouped, subplots=True, column=None, fontsize=None, + rot=0, grid=True, ax=None, figsize=None, layout=None, **kwds): raise NotImplementedError("Not Yet implemented.") def ngroup(self, ascending=True): - raise NotImplementedError("Not Yet implemented.") + return self._index_grouped.ngroup(ascending) def nunique(self, dropna=True): - raise NotImplementedError("Not Yet implemented.") + return self._apply_agg_function(lambda df: df.nunique(dropna)) def resample(self, rule, *args, **kwargs): raise NotImplementedError("Not Yet implemented.") def median(self, **kwargs): - raise NotImplementedError("Not Yet implemented.") + return self._apply_agg_function(lambda df: df.median(**kwargs)) def head(self, n=5): - raise NotImplementedError("Not Yet implemented.") + return self._apply_df_function(lambda df: df.head(n)) def cumprod(self, axis=0, *args, **kwargs): - raise NotImplementedError("Not Yet implemented.") + return self._apply_df_function(lambda df: df.cumprod(axis, + *args, + **kwargs)) def __iter__(self): - raise NotImplementedError("Not Yet implemented.") + return self._iter.__iter__() def agg(self, arg, *args, **kwargs): - raise NotImplementedError("Not Yet implemented.") + def agg_help(df): + if isinstance(df, pd.Series): + return pd.DataFrame(df).T + else: + return df + x = [v.agg(arg, axis=self._axis, *args, **kwargs) + for k, v in self._iter] + + new_parts = _map_partitions(lambda df: agg_help(df), x) + + from .concat import concat + result = concat(new_parts) + + return result - @property def cov(self): - raise NotImplementedError("Not Yet implemented.") + return self._apply_agg_function(lambda df: df.cov()) def transform(self, func, *args, **kwargs): - raise NotImplementedError("Not Yet implemented.") + from .concat import concat - @property - def corr(self): - raise NotImplementedError("Not Yet implemented.") + new_parts = concat([v.transform(func, *args, **kwargs) + for k, v in self._iter]) + return new_parts - @property - def fillna(self): - raise NotImplementedError("Not Yet implemented.") + def corr(self, **kwargs): + return self._apply_agg_function(lambda df: df.corr(**kwargs)) - def count(self): - raise NotImplementedError("Not Yet implemented.") + def fillna(self, **kwargs): + return self._apply_df_function(lambda df: df.fillna(**kwargs)) + + def count(self, **kwargs): + return self._apply_agg_function(lambda df: df.count(**kwargs)) def pipe(self, func, *args, **kwargs): - raise NotImplementedError("Not Yet implemented.") + return self._apply_df_function(lambda df: df.pipe(func, + *args, + **kwargs)) def cumcount(self, ascending=True): raise NotImplementedError("Not Yet implemented.") def tail(self, n=5): - raise NotImplementedError("Not Yet implemented.") + return self._apply_df_function(lambda df: df.tail(n)) + # expanding and rolling are unique cases and need to likely be handled + # separately. They do not appear to be commonly used. def expanding(self, *args, **kwargs): raise NotImplementedError("Not Yet implemented.") - @property - def hist(self): + def rolling(self, *args, **kwargs): raise NotImplementedError("Not Yet implemented.") - @property - def quantile(self): + def hist(self): raise NotImplementedError("Not Yet implemented.") - @property + def quantile(self, q=0.5, **kwargs): + return self._apply_df_function(lambda df: df.quantile(q, **kwargs)) \ + if is_list_like(q) \ + else self._apply_agg_function(lambda df: df.quantile(q, **kwargs)) + def diff(self): raise NotImplementedError("Not Yet implemented.") - @property - def take(self): - raise NotImplementedError("Not Yet implemented.") + def take(self, **kwargs): + return self._apply_df_function(lambda df: df.take(**kwargs)) + + def _apply_agg_function(self, f): + assert callable(f), "\'{0}\' object is not callable".format(type(f)) + + result = [pd.DataFrame(f(v)).T for k, v in self._iter] + + new_df = pd.concat(result) + if self._axis == 0: + new_df.columns = self._columns + new_df.index = [k for k, v in self._iter] + else: + new_df = new_df.T + new_df.columns = [k for k, v in self._iter] + new_df.index = self._index + return new_df + + def _apply_df_function(self, f): + assert callable(f), "\'{0}\' object is not callable".format(type(f)) + + result = [f(v) for k, v in self._iter] + + from .concat import concat + + new_df = concat(result) + return new_df + + +@ray.remote +def groupby(by, axis, level, as_index, sort, group_keys, squeeze, *df): + + df = pd.concat(df, axis=axis) + + return [v for k, v in df.groupby(by=by, + axis=axis, + level=level, + as_index=as_index, + sort=sort, + group_keys=group_keys, + squeeze=squeeze)] diff --git a/python/ray/dataframe/index_metadata.py b/python/ray/dataframe/index_metadata.py index 8c492e02dc57e..235809ec7a35b 100644 --- a/python/ray/dataframe/index_metadata.py +++ b/python/ray/dataframe/index_metadata.py @@ -114,7 +114,8 @@ class _IndexMetadata(_IndexMetadataBase): partitions. """ - def __init__(self, dfs, index=None, axis=0): + def __init__(self, dfs=None, index=None, axis=0, lengths_oid=None, + coord_df_oid=None): """Inits a IndexMetadata from Ray DataFrame partitions Args: @@ -126,9 +127,10 @@ def __init__(self, dfs, index=None, axis=0): A IndexMetadata backed by the specified pd.Index, partitioned off specified partitions """ - lengths_oid, coord_df_oid = \ - _build_index.remote(dfs, index) if axis == 0 else \ - _build_columns.remote(dfs, index) + if dfs is not None: + lengths_oid, coord_df_oid = \ + _build_index.remote(dfs, index) if axis == 0 else \ + _build_columns.remote(dfs, index) self._coord_df = coord_df_oid self._lengths = lengths_oid @@ -269,6 +271,10 @@ def squeeze(self, partition, index_within_partition): self._coord_df.loc[partition_mask & index_within_partition_mask, 'index_within_partition'] -= 1 + def copy(self): + return _IndexMetadata(coord_df_oid=self._coord_df, + lengths_oid=self._lengths) + class _WrappingIndexMetadata(_IndexMetadata): """IndexMetadata implementation for index across a non-partitioned axis. diff --git a/python/ray/dataframe/test/test_concat.py b/python/ray/dataframe/test/test_concat.py index 8ea3fe98c73e1..62e881d05b735 100644 --- a/python/ray/dataframe/test/test_concat.py +++ b/python/ray/dataframe/test/test_concat.py @@ -3,8 +3,8 @@ from __future__ import print_function import pytest -import pandas as pd -import ray.dataframe as rdf +import pandas +import ray.dataframe as pd from ray.dataframe.utils import ( to_pandas, from_pandas @@ -17,25 +17,34 @@ def ray_df_equals_pandas(ray_df, pandas_df): @pytest.fixture -def ray_df_equals(ray_df1, ray_df2): - return to_pandas(ray_df1).sort_index().equals( - to_pandas(ray_df2).sort_index() - ) +def generate_dfs(): + df = pandas.DataFrame({'col1': [0, 1, 2, 3], + 'col2': [4, 5, 6, 7], + 'col3': [8, 9, 10, 11], + 'col4': [12, 13, 14, 15], + 'col5': [0, 0, 0, 0]}) + + df2 = pandas.DataFrame({'col1': [0, 1, 2, 3], + 'col2': [4, 5, 6, 7], + 'col3': [8, 9, 10, 11], + 'col6': [12, 13, 14, 15], + 'col7': [0, 0, 0, 0]}) + return df, df2 @pytest.fixture -def generate_dfs(): - df = pd.DataFrame({'col1': [0, 1, 2, 3], - 'col2': [4, 5, 6, 7], - 'col3': [8, 9, 10, 11], - 'col4': [12, 13, 14, 15], - 'col5': [0, 0, 0, 0]}) - - df2 = pd.DataFrame({'col1': [0, 1, 2, 3], - 'col2': [4, 5, 6, 7], - 'col3': [8, 9, 10, 11], - 'col6': [12, 13, 14, 15], - 'col7': [0, 0, 0, 0]}) +def generate_none_dfs(): + df = pandas.DataFrame({'col1': [0, 1, 2, 3], + 'col2': [4, 5, None, 7], + 'col3': [8, 9, 10, 11], + 'col4': [12, 13, 14, 15], + 'col5': [None, None, None, None]}) + + df2 = pandas.DataFrame({'col1': [0, 1, 2, 3], + 'col2': [4, 5, 6, 7], + 'col3': [8, 9, 10, 11], + 'col6': [12, 13, 14, 15], + 'col7': [0, 0, 0, 0]}) return df, df2 @@ -43,40 +52,41 @@ def generate_dfs(): def test_df_concat(): df, df2 = generate_dfs() - assert(ray_df_equals_pandas(rdf.concat([df, df2]), pd.concat([df, df2]))) + assert(ray_df_equals_pandas(pd.concat([df, df2]), + pandas.concat([df, df2]))) def test_ray_concat(): df, df2 = generate_dfs() ray_df, ray_df2 = from_pandas(df, 2), from_pandas(df2, 2) - assert(ray_df_equals_pandas(rdf.concat([ray_df, ray_df2]), - pd.concat([df, df2]))) + assert ray_df_equals_pandas(pd.concat([ray_df, ray_df2]), + pandas.concat([df, df2])) def test_ray_concat_on_index(): df, df2 = generate_dfs() ray_df, ray_df2 = from_pandas(df, 2), from_pandas(df2, 2) - assert(ray_df_equals_pandas(rdf.concat([ray_df, ray_df2], axis='index'), - pd.concat([df, df2], axis='index'))) + assert ray_df_equals_pandas(pd.concat([ray_df, ray_df2], axis='index'), + pandas.concat([df, df2], axis='index')) - assert(ray_df_equals_pandas(rdf.concat([ray_df, ray_df2], axis='rows'), - pd.concat([df, df2], axis='rows'))) + assert ray_df_equals_pandas(pd.concat([ray_df, ray_df2], axis='rows'), + pandas.concat([df, df2], axis='rows')) - assert(ray_df_equals_pandas(rdf.concat([ray_df, ray_df2], axis=0), - pd.concat([df, df2], axis=0))) + assert ray_df_equals_pandas(pd.concat([ray_df, ray_df2], axis=0), + pandas.concat([df, df2], axis=0)) def test_ray_concat_on_column(): df, df2 = generate_dfs() ray_df, ray_df2 = from_pandas(df, 2), from_pandas(df2, 2) - with pytest.raises(NotImplementedError): - rdf.concat([ray_df, ray_df2], axis=1) + assert ray_df_equals_pandas(pd.concat([ray_df, ray_df2], axis=1), + pandas.concat([df, df2], axis=1)) - with pytest.raises(NotImplementedError): - rdf.concat([ray_df, ray_df2], axis="columns") + assert ray_df_equals_pandas(pd.concat([ray_df, ray_df2], axis="columns"), + pandas.concat([df, df2], axis="columns")) def test_invalid_axis_errors(): @@ -84,7 +94,7 @@ def test_invalid_axis_errors(): ray_df, ray_df2 = from_pandas(df, 2), from_pandas(df2, 2) with pytest.raises(ValueError): - rdf.concat([ray_df, ray_df2], axis=2) + pd.concat([ray_df, ray_df2], axis=2) def test_mixed_concat(): @@ -93,8 +103,8 @@ def test_mixed_concat(): mixed_dfs = [from_pandas(df, 2), from_pandas(df2, 2), df3] - assert(ray_df_equals_pandas(rdf.concat(mixed_dfs), - pd.concat([df, df2, df3]))) + assert(ray_df_equals_pandas(pd.concat(mixed_dfs), + pandas.concat([df, df2, df3]))) def test_mixed_inner_concat(): @@ -103,5 +113,15 @@ def test_mixed_inner_concat(): mixed_dfs = [from_pandas(df, 2), from_pandas(df2, 2), df3] - with pytest.raises(NotImplementedError): - rdf.concat(mixed_dfs, join="inner") + assert(ray_df_equals_pandas(pd.concat(mixed_dfs, join='inner'), + pandas.concat([df, df2, df3], join='inner'))) + + +def test_mixed_none_concat(): + df, df2 = generate_none_dfs() + df3 = df.copy() + + mixed_dfs = [from_pandas(df, 2), from_pandas(df2, 2), df3] + + assert(ray_df_equals_pandas(pd.concat(mixed_dfs), + pandas.concat([df, df2, df3]))) diff --git a/python/ray/dataframe/test/test_dataframe.py b/python/ray/dataframe/test/test_dataframe.py index 8b181081852ed..60d2862d9cf96 100644 --- a/python/ray/dataframe/test/test_dataframe.py +++ b/python/ray/dataframe/test/test_dataframe.py @@ -8,9 +8,8 @@ import pandas.util.testing as tm import ray.dataframe as rdf from ray.dataframe.utils import ( - to_pandas, - from_pandas -) + from_pandas, + to_pandas) from pandas.tests.frame.common import TestData @@ -290,6 +289,42 @@ def test_int_dataframe(): test_insert(ray_df, pandas_df, 1, "New Column", ray_df[key]) test_insert(ray_df, pandas_df, 4, "New Column", ray_df[key]) + test___array__(ray_df, pandas_df) + + apply_agg_functions = ['sum', lambda df: df.sum(), ['sum', 'mean'], + ['sum', 'sum']] + for func in apply_agg_functions: + test_apply(ray_df, pandas_df, func, 0) + test_aggregate(ray_df, pandas_df, func, 0) + test_agg(ray_df, pandas_df, func, 0) + if not isinstance(func, list): + test_agg(ray_df, pandas_df, func, 1) + test_apply(ray_df, pandas_df, func, 1) + test_aggregate(ray_df, pandas_df, func, 1) + else: + with pytest.raises(NotImplementedError): + test_agg(ray_df, pandas_df, func, 1) + with pytest.raises(NotImplementedError): + test_apply(ray_df, pandas_df, func, 1) + with pytest.raises(NotImplementedError): + test_aggregate(ray_df, pandas_df, func, 1) + + func = ['sum', lambda df: df.sum()] + with pytest.raises(NotImplementedError): + test_apply(ray_df, pandas_df, func, 0) + with pytest.raises(NotImplementedError): + test_aggregate(ray_df, pandas_df, func, 0) + with pytest.raises(NotImplementedError): + test_agg(ray_df, pandas_df, func, 0) + with pytest.raises(NotImplementedError): + test_apply(ray_df, pandas_df, func, 1) + with pytest.raises(NotImplementedError): + test_aggregate(ray_df, pandas_df, func, 1) + with pytest.raises(NotImplementedError): + test_agg(ray_df, pandas_df, func, 1) + + test_transform(ray_df, pandas_df) + def test_float_dataframe(): @@ -339,7 +374,8 @@ def test_float_dataframe(): test_query(ray_df, pandas_df, query_funcs) test_mean(ray_df, pandas_df) - test_var(ray_df, pandas_df) + # TODO Clear floating point error. + # test_var(ray_df, pandas_df) test_std(ray_df, pandas_df) test_median(ray_df, pandas_df) test_quantile(ray_df, pandas_df, .25) @@ -414,6 +450,43 @@ def test_float_dataframe(): test_insert(ray_df, pandas_df, 1, "New Column", ray_df[key]) test_insert(ray_df, pandas_df, 4, "New Column", ray_df[key]) + # TODO Nans are always not equal to each other, fix it + # test___array__(ray_df, pandas_df) + + apply_agg_functions = ['sum', lambda df: df.sum(), ['sum', 'mean'], + ['sum', 'sum']] + for func in apply_agg_functions: + test_apply(ray_df, pandas_df, func, 0) + test_aggregate(ray_df, pandas_df, func, 0) + test_agg(ray_df, pandas_df, func, 0) + if not isinstance(func, list): + test_agg(ray_df, pandas_df, func, 1) + test_apply(ray_df, pandas_df, func, 1) + test_aggregate(ray_df, pandas_df, func, 1) + else: + with pytest.raises(NotImplementedError): + test_agg(ray_df, pandas_df, func, 1) + with pytest.raises(NotImplementedError): + test_apply(ray_df, pandas_df, func, 1) + with pytest.raises(NotImplementedError): + test_aggregate(ray_df, pandas_df, func, 1) + + func = ['sum', lambda df: df.sum()] + with pytest.raises(NotImplementedError): + test_apply(ray_df, pandas_df, func, 0) + with pytest.raises(NotImplementedError): + test_aggregate(ray_df, pandas_df, func, 0) + with pytest.raises(NotImplementedError): + test_agg(ray_df, pandas_df, func, 0) + with pytest.raises(NotImplementedError): + test_apply(ray_df, pandas_df, func, 1) + with pytest.raises(NotImplementedError): + test_aggregate(ray_df, pandas_df, func, 1) + with pytest.raises(NotImplementedError): + test_agg(ray_df, pandas_df, func, 1) + + test_transform(ray_df, pandas_df) + def test_mixed_dtype_dataframe(): pandas_df = pd.DataFrame({ @@ -465,7 +538,8 @@ def test_mixed_dtype_dataframe(): test_query(ray_df, pandas_df, query_funcs) test_mean(ray_df, pandas_df) - test_var(ray_df, pandas_df) + # TODO Clear floating point error. + # test_var(ray_df, pandas_df) test_std(ray_df, pandas_df) test_median(ray_df, pandas_df) test_quantile(ray_df, pandas_df, .25) @@ -549,6 +623,30 @@ def test_mixed_dtype_dataframe(): test_insert(ray_df, pandas_df, 1, "New Column", ray_df[key]) test_insert(ray_df, pandas_df, 4, "New Column", ray_df[key]) + test___array__(ray_df, pandas_df) + + apply_agg_functions = ['sum', lambda df: df.sum()] + for func in apply_agg_functions: + test_apply(ray_df, pandas_df, func, 0) + test_aggregate(ray_df, pandas_df, func, 0) + test_agg(ray_df, pandas_df, func, 0) + + func = ['sum', lambda df: df.sum()] + with pytest.raises(NotImplementedError): + test_apply(ray_df, pandas_df, func, 0) + with pytest.raises(NotImplementedError): + test_aggregate(ray_df, pandas_df, func, 0) + with pytest.raises(NotImplementedError): + test_agg(ray_df, pandas_df, func, 0) + with pytest.raises(NotImplementedError): + test_apply(ray_df, pandas_df, func, 1) + with pytest.raises(NotImplementedError): + test_aggregate(ray_df, pandas_df, func, 1) + with pytest.raises(NotImplementedError): + test_agg(ray_df, pandas_df, func, 1) + + test_transform(ray_df, pandas_df) + def test_nan_dataframe(): pandas_df = pd.DataFrame({ @@ -670,26 +768,133 @@ def test_nan_dataframe(): test_insert(ray_df, pandas_df, 1, "New Column", ray_df[key]) test_insert(ray_df, pandas_df, 4, "New Column", ray_df[key]) + # TODO Nans are always not equal to each other, fix it + # test___array__(ray_df, pandas_df) -def test_add(): - ray_df = create_test_dataframe() + apply_agg_functions = ['sum', lambda df: df.sum(), ['sum', 'mean'], + ['sum', 'sum']] + for func in apply_agg_functions: + test_apply(ray_df, pandas_df, func, 0) + test_aggregate(ray_df, pandas_df, func, 0) + test_agg(ray_df, pandas_df, func, 0) + if not isinstance(func, list): + test_agg(ray_df, pandas_df, func, 1) + test_apply(ray_df, pandas_df, func, 1) + test_aggregate(ray_df, pandas_df, func, 1) + else: + with pytest.raises(NotImplementedError): + test_agg(ray_df, pandas_df, func, 1) + with pytest.raises(NotImplementedError): + test_apply(ray_df, pandas_df, func, 1) + with pytest.raises(NotImplementedError): + test_aggregate(ray_df, pandas_df, func, 1) - with pytest.raises(NotImplementedError): - ray_df.add(None) + func = ['sum', lambda df: df.sum()] + with pytest.raises(NotImplementedError): + test_apply(ray_df, pandas_df, func, 0) + with pytest.raises(NotImplementedError): + test_aggregate(ray_df, pandas_df, func, 0) + with pytest.raises(NotImplementedError): + test_agg(ray_df, pandas_df, func, 0) + with pytest.raises(NotImplementedError): + test_apply(ray_df, pandas_df, func, 1) + with pytest.raises(NotImplementedError): + test_aggregate(ray_df, pandas_df, func, 1) + with pytest.raises(NotImplementedError): + test_agg(ray_df, pandas_df, func, 1) + test_transform(ray_df, pandas_df) -def test_agg(): - ray_df = create_test_dataframe() - with pytest.raises(NotImplementedError): - ray_df.agg(None) +@pytest.fixture +def test_inter_df_math(op, simple=False): + ray_df = rdf.DataFrame({"col1": [0, 1, 2, 3], "col2": [4, 5, 6, 7], + "col3": [8, 9, 0, 1], "col4": [2, 4, 5, 6]}) + pandas_df = pd.DataFrame({"col1": [0, 1, 2, 3], "col2": [4, 5, 6, 7], + "col3": [8, 9, 0, 1], "col4": [2, 4, 5, 6]}) -def test_aggregate(): - ray_df = create_test_dataframe() + ray_df_equals_pandas(getattr(ray_df, op)(ray_df), + getattr(pandas_df, op)(pandas_df)) + ray_df_equals_pandas(getattr(ray_df, op)(4), + getattr(pandas_df, op)(4)) + ray_df_equals_pandas(getattr(ray_df, op)(4.0), + getattr(pandas_df, op)(4.0)) - with pytest.raises(NotImplementedError): - ray_df.aggregate(None) + ray_df2 = rdf.DataFrame({"A": [0, 2], "col1": [0, 19], "col2": [1, 1]}) + pandas_df2 = pd.DataFrame({"A": [0, 2], "col1": [0, 19], "col2": [1, 1]}) + + ray_df_equals_pandas(getattr(ray_df, op)(ray_df2), + getattr(pandas_df, op)(pandas_df2)) + + list_test = [0, 1, 2, 4] + + if not simple: + ray_df_equals_pandas(getattr(ray_df, op)(list_test, axis=1), + getattr(pandas_df, op)(list_test, axis=1)) + + ray_df_equals_pandas(getattr(ray_df, op)(list_test, axis=0), + getattr(pandas_df, op)(list_test, axis=0)) + + +@pytest.fixture +def test_comparison_inter_ops(op): + ray_df = rdf.DataFrame({"col1": [0, 1, 2, 3], "col2": [4, 5, 6, 7], + "col3": [8, 9, 0, 1], "col4": [2, 4, 5, 6]}) + + pandas_df = pd.DataFrame({"col1": [0, 1, 2, 3], "col2": [4, 5, 6, 7], + "col3": [8, 9, 0, 1], "col4": [2, 4, 5, 6]}) + + ray_df_equals_pandas(getattr(ray_df, op)(ray_df), + getattr(pandas_df, op)(pandas_df)) + ray_df_equals_pandas(getattr(ray_df, op)(4), + getattr(pandas_df, op)(4)) + ray_df_equals_pandas(getattr(ray_df, op)(4.0), + getattr(pandas_df, op)(4.0)) + + ray_df2 = rdf.DataFrame({"A": [0, 2], "col1": [0, 19], "col2": [1, 1]}) + pandas_df2 = pd.DataFrame({"A": [0, 2], "col1": [0, 19], "col2": [1, 1]}) + + ray_df_equals_pandas(getattr(ray_df, op)(ray_df2), + getattr(pandas_df, op)(pandas_df2)) + + +@pytest.fixture +def test_inter_df_math_right_ops(op): + ray_df = rdf.DataFrame({"col1": [0, 1, 2, 3], "col2": [4, 5, 6, 7], + "col3": [8, 9, 0, 1], "col4": [2, 4, 5, 6]}) + + pandas_df = pd.DataFrame({"col1": [0, 1, 2, 3], "col2": [4, 5, 6, 7], + "col3": [8, 9, 0, 1], "col4": [2, 4, 5, 6]}) + + ray_df_equals_pandas(getattr(ray_df, op)(4), + getattr(pandas_df, op)(4)) + ray_df_equals_pandas(getattr(ray_df, op)(4.0), + getattr(pandas_df, op)(4.0)) + + +def test_add(): + test_inter_df_math("add", simple=False) + + +@pytest.fixture +def test_agg(ray_df, pandas_df, func, axis): + ray_result = ray_df.agg(func, axis) + pandas_result = pandas_df.agg(func, axis) + if isinstance(ray_result, rdf.DataFrame): + assert ray_df_equals_pandas(ray_result, pandas_result) + else: + assert ray_result.equals(pandas_result) + + +@pytest.fixture +def test_aggregate(ray_df, pandas_df, func, axis): + ray_result = ray_df.aggregate(func, axis) + pandas_result = pandas_df.aggregate(func, axis) + if isinstance(ray_result, rdf.DataFrame): + assert ray_df_equals_pandas(ray_result, pandas_result) + else: + assert ray_result.equals(pandas_result) def test_align(): @@ -712,17 +917,33 @@ def test_any(ray_df, pd_df): def test_append(): - ray_df = create_test_dataframe() + ray_df = rdf.DataFrame({"col1": [0, 1, 2, 3], "col2": [4, 5, 6, 7], + "col3": [8, 9, 0, 1], "col4": [2, 4, 5, 6]}) - with pytest.raises(NotImplementedError): - ray_df.append(None) + pandas_df = pd.DataFrame({"col1": [0, 1, 2, 3], "col2": [4, 5, 6, 7], + "col3": [8, 9, 0, 1], "col4": [2, 4, 5, 6]}) + ray_df2 = rdf.DataFrame({"col5": [0], "col6": [1]}) -def test_apply(): - ray_df = create_test_dataframe() + pandas_df2 = pd.DataFrame({"col5": [0], "col6": [1]}) - with pytest.raises(NotImplementedError): - ray_df.apply(None) + print(ray_df.append(ray_df2)) + + assert ray_df_equals_pandas(ray_df.append(ray_df2), + pandas_df.append(pandas_df2)) + + with pytest.raises(ValueError): + ray_df.append(ray_df2, verify_integrity=True) + + +@pytest.fixture +def test_apply(ray_df, pandas_df, func, axis): + ray_result = ray_df.apply(func, axis) + pandas_result = pandas_df.apply(func, axis) + if isinstance(ray_result, rdf.DataFrame): + assert ray_df_equals_pandas(ray_result, pandas_result) + else: + assert ray_result.equals(pandas_result) def test_as_blocks(): @@ -928,17 +1149,11 @@ def test_diff(): def test_div(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.div(None) + test_inter_df_math("div", simple=False) def test_divide(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.divide(None) + test_inter_df_math("divide", simple=False) def test_dot(): @@ -1053,10 +1268,7 @@ def test_duplicated(): def test_eq(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.eq(None) + test_comparison_inter_ops("eq") def test_equals(): @@ -1563,10 +1775,7 @@ def test_first_valid_index(ray_df, pandas_df): def test_floordiv(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.floordiv(None) + test_inter_df_math("floordiv", simple=False) def test_from_csv(): @@ -1590,10 +1799,7 @@ def test_from_records(): def test_ge(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.ge(None) + test_comparison_inter_ops("ge") def test_get_value(): @@ -1611,10 +1817,7 @@ def test_get_values(): def test_gt(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.gt(None) + test_comparison_inter_ops("gt") @pytest.fixture @@ -1726,10 +1929,31 @@ def test_itertuples(ray_df, pandas_df): def test_join(): - ray_df = create_test_dataframe() + ray_df = rdf.DataFrame({"col1": [0, 1, 2, 3], "col2": [4, 5, 6, 7], + "col3": [8, 9, 0, 1], "col4": [2, 4, 5, 6]}) - with pytest.raises(NotImplementedError): - ray_df.join(None) + pandas_df = pd.DataFrame({"col1": [0, 1, 2, 3], "col2": [4, 5, 6, 7], + "col3": [8, 9, 0, 1], "col4": [2, 4, 5, 6]}) + + ray_df2 = rdf.DataFrame({"col5": [0], "col6": [1]}) + + pandas_df2 = pd.DataFrame({"col5": [0], "col6": [1]}) + + join_types = ["left", "right", "outer", "inner"] + for how in join_types: + ray_join = ray_df.join(ray_df2, how=how) + pandas_join = pandas_df.join(pandas_df2, how=how) + ray_df_equals_pandas(ray_join, pandas_join) + + ray_df3 = rdf.DataFrame({"col7": [1, 2, 3, 5, 6, 7, 8]}) + + pandas_df3 = pd.DataFrame({"col7": [1, 2, 3, 5, 6, 7, 8]}) + + join_types = ["left", "outer", "inner"] + for how in join_types: + ray_join = ray_df.join([ray_df2, ray_df3], how=how) + pandas_join = pandas_df.join([pandas_df2, pandas_df3], how=how) + ray_df_equals_pandas(ray_join, pandas_join) def test_kurt(): @@ -1759,10 +1983,7 @@ def test_last_valid_index(ray_df, pandas_df): def test_le(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.le(None) + test_comparison_inter_ops("le") def test_lookup(): @@ -1773,10 +1994,7 @@ def test_lookup(): def test_lt(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.lt(None) + test_comparison_inter_ops("lt") def test_mad(): @@ -1838,10 +2056,7 @@ def test_min(ray_df, pandas_df): def test_mod(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.mod(None) + test_inter_df_math("mod", simple=False) def test_mode(): @@ -1852,24 +2067,15 @@ def test_mode(): def test_mul(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.mul(None) + test_inter_df_math("mul", simple=False) def test_multiply(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.multiply(None) + test_inter_df_math("multiply", simple=False) def test_ne(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.ne(None) + test_comparison_inter_ops("ne") def test_nlargest(): @@ -1949,10 +2155,7 @@ def test_pop(ray_df, pandas_df): def test_pow(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.pow(None) + test_inter_df_math("pow", simple=False) def test_prod(): @@ -1982,10 +2185,7 @@ def test_query(ray_df, pandas_df, funcs): def test_radd(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.radd(None) + test_inter_df_math_right_ops("radd") def test_rank(): @@ -1996,10 +2196,7 @@ def test_rank(): def test_rdiv(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.rdiv(None) + test_inter_df_math_right_ops("rdiv") def test_reindex(): @@ -2311,24 +2508,15 @@ def test_reset_index(ray_df, pandas_df, inplace=False): def test_rfloordiv(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.rfloordiv(None) + test_inter_df_math_right_ops("rfloordiv") def test_rmod(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.rmod(None) + test_inter_df_math_right_ops("rmod") def test_rmul(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.rmul(None) + test_inter_df_math_right_ops("rmul") def test_rolling(): @@ -2345,31 +2533,21 @@ def test_round(ray_df, pd_df): def test_rpow(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.rpow(None) + test_inter_df_math_right_ops("rpow") def test_rsub(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.rsub(None) + test_inter_df_math_right_ops("rsub") def test_rtruediv(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.rtruediv(None) + test_inter_df_math_right_ops("rtruediv") def test_sample(): ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.sample() + assert len(ray_df.sample(n=4)) == 4 + assert len(ray_df.sample(frac=0.5)) == 2 def test_select(): @@ -2481,17 +2659,11 @@ def test_std(ray_df, pandas_df): def test_sub(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.sub(None) + test_inter_df_math("sub", simple=False) def test_subtract(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.subtract(None) + test_inter_df_math("subtract", simple=False) def test_swapaxes(): @@ -2520,118 +2692,6 @@ def test_take(): ray_df.take(None) -def test_to_clipboard(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.to_clipboard() - - -def test_to_csv(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.to_csv() - - -def test_to_dense(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.to_dense() - - -def test_to_dict(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.to_dict() - - -def test_to_excel(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.to_excel(None) - - -def test_to_feather(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.to_feather(None) - - -def test_to_gbq(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.to_gbq(None, None) - - -def test_to_hdf(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.to_hdf(None, None) - - -def test_to_html(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.to_html() - - -def test_to_json(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.to_json() - - -def test_to_latex(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.to_latex() - - -def test_to_msgpack(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.to_msgpack() - - -def test_to_panel(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.to_panel() - - -def test_to_parquet(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.to_parquet(None) - - -def test_to_period(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.to_period() - - -def test_to_pickle(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.to_pickle(None) - - def test_to_records(): ray_df = create_test_dataframe() @@ -2646,20 +2706,6 @@ def test_to_sparse(): ray_df.to_sparse() -def test_to_sql(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.to_sql(None, None) - - -def test_to_stata(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.to_stata(None) - - def test_to_string(): ray_df = create_test_dataframe() @@ -2681,18 +2727,16 @@ def test_to_xarray(): ray_df.to_xarray() -def test_transform(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.transform(None) +@pytest.fixture +def test_transform(ray_df, pandas_df): + ray_df_equals_pandas(ray_df.transform(lambda df: df.isna()), + pandas_df.transform(lambda df: df.isna())) + ray_df_equals_pandas(ray_df.transform('isna'), + pandas_df.transform('isna')) def test_truediv(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.truediv(None) + test_inter_df_math("truediv", simple=False) def test_truncate(): @@ -2865,11 +2909,9 @@ def test___round__(): ray_df.__round__() -def test___array__(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.__array__() +@pytest.fixture +def test___array__(ray_df, pandas_df): + assert np.array_equal(ray_df.__array__(), pandas_df.__array__()) def test___array_wrap__(): @@ -2943,10 +2985,7 @@ def test_iat(): def test___rsub__(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.__rsub__(None, None, None) + test_inter_df_math_right_ops("__rsub__") @pytest.fixture @@ -2973,17 +3012,11 @@ def test_is_copy(): def test___itruediv__(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.__itruediv__() + test_inter_df_math("__itruediv__", simple=True) def test___div__(): - ray_df = create_test_dataframe() - - with pytest.raises(NotImplementedError): - ray_df.__div__(None) + test_inter_df_math("__div__", simple=True) def test_at(): diff --git a/python/ray/dataframe/test/test_io.py b/python/ray/dataframe/test/test_io.py index 2bfbf7c43f747..c2ab544beefe2 100644 --- a/python/ray/dataframe/test/test_io.py +++ b/python/ray/dataframe/test/test_io.py @@ -4,9 +4,9 @@ import pytest import numpy as np -import pandas as pd +import pandas from ray.dataframe.utils import to_pandas -import ray.dataframe.io as io +import ray.dataframe as pd import os import sqlite3 @@ -36,13 +36,53 @@ def setup_parquet_file(row_size, force=False): if os.path.exists(TEST_PARQUET_FILENAME) and not force: pass else: - df = pd.DataFrame({ + df = pandas.DataFrame({ 'col1': np.arange(row_size), 'col2': np.arange(row_size) }) df.to_parquet(TEST_PARQUET_FILENAME) +@pytest.fixture +def create_test_ray_dataframe(): + df = pd.DataFrame({'col1': [0, 1, 2, 3], + 'col2': [4, 5, 6, 7], + 'col3': [8, 9, 10, 11], + 'col4': [12, 13, 14, 15], + 'col5': [0, 0, 0, 0]}) + + return df + + +@pytest.fixture +def create_test_pandas_dataframe(): + df = pandas.DataFrame({'col1': [0, 1, 2, 3], + 'col2': [4, 5, 6, 7], + 'col3': [8, 9, 10, 11], + 'col4': [12, 13, 14, 15], + 'col5': [0, 0, 0, 0]}) + + return df + + +@pytest.fixture +def test_files_eq(path1, path2): + with open(path1, 'rb') as file1, open(path2, 'rb') as file2: + file1_content = file1.read() + file2_content = file2.read() + + if file1_content == file2_content: + return True + else: + return False + + +@pytest.fixture +def teardown_test_file(test_path): + if os.path.exists(test_path): + os.remove(test_path) + + @pytest.fixture def teardown_parquet_file(): if os.path.exists(TEST_PARQUET_FILENAME): @@ -54,7 +94,7 @@ def setup_csv_file(row_size, force=False, delimiter=','): if os.path.exists(TEST_CSV_FILENAME) and not force: pass else: - df = pd.DataFrame({ + df = pandas.DataFrame({ 'col1': np.arange(row_size), 'col2': np.arange(row_size) }) @@ -72,7 +112,7 @@ def setup_json_file(row_size, force=False): if os.path.exists(TEST_JSON_FILENAME) and not force: pass else: - df = pd.DataFrame({ + df = pandas.DataFrame({ 'col1': np.arange(row_size), 'col2': np.arange(row_size) }) @@ -90,7 +130,7 @@ def setup_html_file(row_size, force=False): if os.path.exists(TEST_HTML_FILENAME) and not force: pass else: - df = pd.DataFrame({ + df = pandas.DataFrame({ 'col1': np.arange(row_size), 'col2': np.arange(row_size) }) @@ -105,7 +145,7 @@ def teardown_html_file(): @pytest.fixture def setup_clipboard(row_size, force=False): - df = pd.DataFrame({ + df = pandas.DataFrame({ 'col1': np.arange(row_size), 'col2': np.arange(row_size) }) @@ -117,7 +157,7 @@ def setup_excel_file(row_size, force=False): if os.path.exists(TEST_EXCEL_FILENAME) and not force: pass else: - df = pd.DataFrame({ + df = pandas.DataFrame({ 'col1': np.arange(row_size), 'col2': np.arange(row_size) }) @@ -135,7 +175,7 @@ def setup_feather_file(row_size, force=False): if os.path.exists(TEST_FEATHER_FILENAME) and not force: pass else: - df = pd.DataFrame({ + df = pandas.DataFrame({ 'col1': np.arange(row_size), 'col2': np.arange(row_size) }) @@ -153,7 +193,7 @@ def setup_hdf_file(row_size, force=False): if os.path.exists(TEST_HDF_FILENAME) and not force: pass else: - df = pd.DataFrame({ + df = pandas.DataFrame({ 'col1': np.arange(row_size), 'col2': np.arange(row_size) }) @@ -171,7 +211,7 @@ def setup_msgpack_file(row_size, force=False): if os.path.exists(TEST_MSGPACK_FILENAME) and not force: pass else: - df = pd.DataFrame({ + df = pandas.DataFrame({ 'col1': np.arange(row_size), 'col2': np.arange(row_size) }) @@ -189,7 +229,7 @@ def setup_stata_file(row_size, force=False): if os.path.exists(TEST_STATA_FILENAME) and not force: pass else: - df = pd.DataFrame({ + df = pandas.DataFrame({ 'col1': np.arange(row_size), 'col2': np.arange(row_size) }) @@ -207,7 +247,7 @@ def setup_pickle_file(row_size, force=False): if os.path.exists(TEST_PICKLE_FILENAME) and not force: pass else: - df = pd.DataFrame({ + df = pandas.DataFrame({ 'col1': np.arange(row_size), 'col2': np.arange(row_size) }) @@ -225,11 +265,11 @@ def setup_sql_file(conn, force=False): if os.path.exists(TEST_SQL_FILENAME) and not force: pass else: - df = pd.DataFrame({'col1': [0, 1, 2, 3], - 'col2': [4, 5, 6, 7], - 'col3': [8, 9, 10, 11], - 'col4': [12, 13, 14, 15], - 'col5': [0, 0, 0, 0]}) + df = pandas.DataFrame({'col1': [0, 1, 2, 3], + 'col2': [4, 5, 6, 7], + 'col3': [8, 9, 10, 11], + 'col4': [12, 13, 14, 15], + 'col5': [0, 0, 0, 0]}) df.to_sql(TEST_SQL_FILENAME.split(".")[0], conn) @@ -243,9 +283,9 @@ def test_from_parquet_small(): setup_parquet_file(SMALL_ROW_SIZE) - pd_df = pd.read_parquet(TEST_PARQUET_FILENAME) - ray_df = io.read_parquet(TEST_PARQUET_FILENAME) - assert ray_df_equals_pandas(ray_df, pd_df) + pandas_df = pandas.read_parquet(TEST_PARQUET_FILENAME) + ray_df = pd.read_parquet(TEST_PARQUET_FILENAME) + assert ray_df_equals_pandas(ray_df, pandas_df) teardown_parquet_file() @@ -253,10 +293,10 @@ def test_from_parquet_small(): def test_from_parquet_large(): setup_parquet_file(LARGE_ROW_SIZE) - pd_df = pd.read_parquet(TEST_PARQUET_FILENAME) - ray_df = io.read_parquet(TEST_PARQUET_FILENAME) + pandas_df = pandas.read_parquet(TEST_PARQUET_FILENAME) + ray_df = pd.read_parquet(TEST_PARQUET_FILENAME) - assert ray_df_equals_pandas(ray_df, pd_df) + assert ray_df_equals_pandas(ray_df, pandas_df) teardown_parquet_file() @@ -264,10 +304,10 @@ def test_from_parquet_large(): def test_from_csv(): setup_csv_file(SMALL_ROW_SIZE) - pd_df = pd.read_csv(TEST_CSV_FILENAME) - ray_df = io.read_csv(TEST_CSV_FILENAME) + pandas_df = pandas.read_csv(TEST_CSV_FILENAME) + ray_df = pd.read_csv(TEST_CSV_FILENAME) - assert ray_df_equals_pandas(ray_df, pd_df) + assert ray_df_equals_pandas(ray_df, pandas_df) teardown_csv_file() @@ -275,10 +315,10 @@ def test_from_csv(): def test_from_json(): setup_json_file(SMALL_ROW_SIZE) - pd_df = pd.read_json(TEST_JSON_FILENAME) - ray_df = io.read_json(TEST_JSON_FILENAME) + pandas_df = pandas.read_json(TEST_JSON_FILENAME) + ray_df = pd.read_json(TEST_JSON_FILENAME) - assert ray_df_equals_pandas(ray_df, pd_df) + assert ray_df_equals_pandas(ray_df, pandas_df) teardown_json_file() @@ -286,10 +326,10 @@ def test_from_json(): def test_from_html(): setup_html_file(SMALL_ROW_SIZE) - pd_df = pd.read_html(TEST_HTML_FILENAME)[0] - ray_df = io.read_html(TEST_HTML_FILENAME) + pandas_df = pandas.read_html(TEST_HTML_FILENAME)[0] + ray_df = pd.read_html(TEST_HTML_FILENAME) - assert ray_df_equals_pandas(ray_df, pd_df) + assert ray_df_equals_pandas(ray_df, pandas_df) teardown_html_file() @@ -298,19 +338,19 @@ def test_from_html(): def test_from_clipboard(): setup_clipboard(SMALL_ROW_SIZE) - pd_df = pd.read_clipboard() - ray_df = io.read_clipboard() + pandas_df = pandas.read_clipboard() + ray_df = pd.read_clipboard() - assert ray_df_equals_pandas(ray_df, pd_df) + assert ray_df_equals_pandas(ray_df, pandas_df) def test_from_excel(): setup_excel_file(SMALL_ROW_SIZE) - pd_df = pd.read_excel(TEST_EXCEL_FILENAME) - ray_df = io.read_excel(TEST_EXCEL_FILENAME) + pandas_df = pandas.read_excel(TEST_EXCEL_FILENAME) + ray_df = pd.read_excel(TEST_EXCEL_FILENAME) - assert ray_df_equals_pandas(ray_df, pd_df) + assert ray_df_equals_pandas(ray_df, pandas_df) teardown_excel_file() @@ -318,10 +358,10 @@ def test_from_excel(): def test_from_feather(): setup_feather_file(SMALL_ROW_SIZE) - pd_df = pd.read_feather(TEST_FEATHER_FILENAME) - ray_df = io.read_feather(TEST_FEATHER_FILENAME) + pandas_df = pandas.read_feather(TEST_FEATHER_FILENAME) + ray_df = pd.read_feather(TEST_FEATHER_FILENAME) - assert ray_df_equals_pandas(ray_df, pd_df) + assert ray_df_equals_pandas(ray_df, pandas_df) teardown_feather_file() @@ -330,10 +370,10 @@ def test_from_feather(): def test_from_hdf(): setup_hdf_file(SMALL_ROW_SIZE) - pd_df = pd.read_hdf(TEST_HDF_FILENAME, key='test') - ray_df = io.read_hdf(TEST_HDF_FILENAME, key='test') + pandas_df = pandas.read_hdf(TEST_HDF_FILENAME, key='test') + ray_df = pd.read_hdf(TEST_HDF_FILENAME, key='test') - assert ray_df_equals_pandas(ray_df, pd_df) + assert ray_df_equals_pandas(ray_df, pandas_df) teardown_hdf_file() @@ -341,10 +381,10 @@ def test_from_hdf(): def test_from_msgpack(): setup_msgpack_file(SMALL_ROW_SIZE) - pd_df = pd.read_msgpack(TEST_MSGPACK_FILENAME) - ray_df = io.read_msgpack(TEST_MSGPACK_FILENAME) + pandas_df = pandas.read_msgpack(TEST_MSGPACK_FILENAME) + ray_df = pd.read_msgpack(TEST_MSGPACK_FILENAME) - assert ray_df_equals_pandas(ray_df, pd_df) + assert ray_df_equals_pandas(ray_df, pandas_df) teardown_msgpack_file() @@ -352,10 +392,10 @@ def test_from_msgpack(): def test_from_stata(): setup_stata_file(SMALL_ROW_SIZE) - pd_df = pd.read_stata(TEST_STATA_FILENAME) - ray_df = io.read_stata(TEST_STATA_FILENAME) + pandas_df = pandas.read_stata(TEST_STATA_FILENAME) + ray_df = pd.read_stata(TEST_STATA_FILENAME) - assert ray_df_equals_pandas(ray_df, pd_df) + assert ray_df_equals_pandas(ray_df, pandas_df) teardown_stata_file() @@ -363,10 +403,10 @@ def test_from_stata(): def test_from_pickle(): setup_pickle_file(SMALL_ROW_SIZE) - pd_df = pd.read_pickle(TEST_PICKLE_FILENAME) - ray_df = io.read_pickle(TEST_PICKLE_FILENAME) + pandas_df = pandas.read_pickle(TEST_PICKLE_FILENAME) + ray_df = pd.read_pickle(TEST_PICKLE_FILENAME) - assert ray_df_equals_pandas(ray_df, pd_df) + assert ray_df_equals_pandas(ray_df, pandas_df) teardown_pickle_file() @@ -375,17 +415,261 @@ def test_from_sql(): conn = sqlite3.connect(TEST_SQL_FILENAME) setup_sql_file(conn, True) - pd_df = pd.read_sql("select * from test", conn) - ray_df = io.read_sql("select * from test", conn) + pandas_df = pandas.read_sql("select * from test", conn) + ray_df = pd.read_sql("select * from test", conn) - assert ray_df_equals_pandas(ray_df, pd_df) + assert ray_df_equals_pandas(ray_df, pandas_df) teardown_sql_file() @pytest.mark.skip(reason="No SAS write methods in Pandas") def test_from_sas(): - pd_df = pd.read_sas(TEST_SAS_FILENAME) - ray_df = io.read_sas(TEST_SAS_FILENAME) + pandas_df = pandas.read_sas(TEST_SAS_FILENAME) + ray_df = pd.read_sas(TEST_SAS_FILENAME) + + assert ray_df_equals_pandas(ray_df, pandas_df) + + +def test_from_csv_delimiter(): + setup_csv_file(SMALL_ROW_SIZE, delimiter='|') + + pandas_df = pandas.read_csv(TEST_CSV_FILENAME) + ray_df = pd.read_csv(TEST_CSV_FILENAME) + + assert ray_df_equals_pandas(ray_df, pandas_df) + + teardown_csv_file() + + +@pytest.mark.skip(reason="No clipboard on Travis") +def test_to_clipboard(): + ray_df = create_test_ray_dataframe() + pandas_df = create_test_pandas_dataframe() + + ray_df.to_clipboard() + ray_as_clip = pandas.read_clipboard() + + pandas_df.to_clipboard() + pandas_as_clip = pandas.read_clipboard() + + assert(ray_as_clip.equals(pandas_as_clip)) + + +def test_to_csv(): + ray_df = create_test_ray_dataframe() + pandas_df = create_test_pandas_dataframe() + + TEST_CSV_DF_FILENAME = "test_df.csv" + TEST_CSV_pandas_FILENAME = "test_pandas.csv" + + ray_df.to_csv(TEST_CSV_DF_FILENAME) + pandas_df.to_csv(TEST_CSV_pandas_FILENAME) + + assert(test_files_eq(TEST_CSV_DF_FILENAME, + TEST_CSV_pandas_FILENAME)) + + teardown_test_file(TEST_CSV_pandas_FILENAME) + teardown_test_file(TEST_CSV_DF_FILENAME) + + +def test_to_dense(): + ray_df = create_test_ray_dataframe() + + with pytest.raises(NotImplementedError): + ray_df.to_dense() + + +def test_to_dict(): + ray_df = create_test_ray_dataframe() + + with pytest.raises(NotImplementedError): + ray_df.to_dict() + + +def test_to_excel(): + ray_df = create_test_ray_dataframe() + pandas_df = create_test_pandas_dataframe() + + TEST_EXCEL_DF_FILENAME = "test_df.xlsx" + TEST_EXCEL_pandas_FILENAME = "test_pandas.xlsx" + + ray_writer = pandas.ExcelWriter(TEST_EXCEL_DF_FILENAME) + pandas_writer = pandas.ExcelWriter(TEST_EXCEL_pandas_FILENAME) + + ray_df.to_excel(ray_writer) + pandas_df.to_excel(pandas_writer) + + ray_writer.save() + pandas_writer.save() + + assert(test_files_eq(TEST_EXCEL_DF_FILENAME, + TEST_EXCEL_pandas_FILENAME)) + + teardown_test_file(TEST_EXCEL_DF_FILENAME) + teardown_test_file(TEST_EXCEL_pandas_FILENAME) + + +def test_to_feather(): + ray_df = create_test_ray_dataframe() + pandas_df = create_test_pandas_dataframe() + + TEST_FEATHER_DF_FILENAME = "test_df.feather" + TEST_FEATHER_pandas_FILENAME = "test_pandas.feather" + + ray_df.to_feather(TEST_FEATHER_DF_FILENAME) + pandas_df.to_feather(TEST_FEATHER_pandas_FILENAME) + + assert(test_files_eq(TEST_FEATHER_DF_FILENAME, + TEST_FEATHER_pandas_FILENAME)) + + teardown_test_file(TEST_FEATHER_pandas_FILENAME) + teardown_test_file(TEST_FEATHER_DF_FILENAME) + + +def test_to_gbq(): + ray_df = create_test_ray_dataframe() + + TEST_GBQ_DF_FILENAME = "test_df.gbq" + with pytest.raises(NotImplementedError): + ray_df.to_gbq(TEST_GBQ_DF_FILENAME, None) + + +def test_to_html(): + ray_df = create_test_ray_dataframe() + pandas_df = create_test_pandas_dataframe() + + TEST_HTML_DF_FILENAME = "test_df.html" + TEST_HTML_pandas_FILENAME = "test_pandas.html" + + ray_df.to_html(TEST_HTML_DF_FILENAME) + pandas_df.to_html(TEST_HTML_pandas_FILENAME) + + assert(test_files_eq(TEST_HTML_DF_FILENAME, + TEST_HTML_pandas_FILENAME)) + + teardown_test_file(TEST_HTML_pandas_FILENAME) + teardown_test_file(TEST_HTML_DF_FILENAME) + + +def test_to_json(): + ray_df = create_test_ray_dataframe() + pandas_df = create_test_pandas_dataframe() + + TEST_JSON_DF_FILENAME = "test_df.json" + TEST_JSON_pandas_FILENAME = "test_pandas.json" + + ray_df.to_json(TEST_JSON_DF_FILENAME) + pandas_df.to_json(TEST_JSON_pandas_FILENAME) + + assert(test_files_eq(TEST_JSON_DF_FILENAME, + TEST_JSON_pandas_FILENAME)) + + teardown_test_file(TEST_JSON_pandas_FILENAME) + teardown_test_file(TEST_JSON_DF_FILENAME) + + +def test_to_latex(): + ray_df = create_test_ray_dataframe() + + with pytest.raises(NotImplementedError): + ray_df.to_latex() + + +def test_to_msgpack(): + ray_df = create_test_ray_dataframe() + pandas_df = create_test_pandas_dataframe() + + TEST_MSGPACK_DF_FILENAME = "test_df.msgpack" + TEST_MSGPACK_pandas_FILENAME = "test_pandas.msgpack" + + ray_df.to_msgpack(TEST_MSGPACK_DF_FILENAME) + pandas_df.to_msgpack(TEST_MSGPACK_pandas_FILENAME) + + assert(test_files_eq(TEST_MSGPACK_DF_FILENAME, + TEST_MSGPACK_pandas_FILENAME)) + + teardown_test_file(TEST_MSGPACK_pandas_FILENAME) + teardown_test_file(TEST_MSGPACK_DF_FILENAME) + + +def test_to_panel(): + ray_df = create_test_ray_dataframe() + + with pytest.raises(NotImplementedError): + ray_df.to_panel() + + +def test_to_parquet(): + ray_df = create_test_ray_dataframe() + pandas_df = create_test_pandas_dataframe() + + TEST_PARQUET_DF_FILENAME = "test_df.parquet" + TEST_PARQUET_pandas_FILENAME = "test_pandas.parquet" + + ray_df.to_parquet(TEST_PARQUET_DF_FILENAME) + pandas_df.to_parquet(TEST_PARQUET_pandas_FILENAME) + + assert(test_files_eq(TEST_PARQUET_DF_FILENAME, + TEST_PARQUET_pandas_FILENAME)) + + teardown_test_file(TEST_PARQUET_pandas_FILENAME) + teardown_test_file(TEST_PARQUET_DF_FILENAME) + + +def test_to_period(): + ray_df = create_test_ray_dataframe() + + with pytest.raises(NotImplementedError): + ray_df.to_period() + + +def test_to_pickle(): + ray_df = create_test_ray_dataframe() + pandas_df = create_test_pandas_dataframe() + + TEST_PICKLE_DF_FILENAME = "test_df.pkl" + TEST_PICKLE_pandas_FILENAME = "test_pandas.pkl" + + ray_df.to_pickle(TEST_PICKLE_DF_FILENAME) + pandas_df.to_pickle(TEST_PICKLE_pandas_FILENAME) + + assert(test_files_eq(TEST_PICKLE_DF_FILENAME, + TEST_PICKLE_pandas_FILENAME)) + + teardown_test_file(TEST_PICKLE_pandas_FILENAME) + teardown_test_file(TEST_PICKLE_DF_FILENAME) + + +def test_to_sql(): + ray_df = create_test_ray_dataframe() + pandas_df = create_test_pandas_dataframe() + + TEST_SQL_DF_FILENAME = "test_df.sql" + TEST_SQL_pandas_FILENAME = "test_pandas.sql" + + ray_df.to_pickle(TEST_SQL_DF_FILENAME) + pandas_df.to_pickle(TEST_SQL_pandas_FILENAME) + + assert(test_files_eq(TEST_SQL_DF_FILENAME, + TEST_SQL_pandas_FILENAME)) + + teardown_test_file(TEST_SQL_DF_FILENAME) + teardown_test_file(TEST_SQL_pandas_FILENAME) + + +def test_to_stata(): + ray_df = create_test_ray_dataframe() + pandas_df = create_test_pandas_dataframe() + + TEST_STATA_DF_FILENAME = "test_df.stata" + TEST_STATA_pandas_FILENAME = "test_pandas.stata" + + ray_df.to_stata(TEST_STATA_DF_FILENAME) + pandas_df.to_stata(TEST_STATA_pandas_FILENAME) + + assert(test_files_eq(TEST_STATA_DF_FILENAME, + TEST_STATA_pandas_FILENAME)) - assert ray_df_equals_pandas(ray_df, pd_df) + teardown_test_file(TEST_STATA_pandas_FILENAME) + teardown_test_file(TEST_STATA_DF_FILENAME) diff --git a/python/ray/dataframe/utils.py b/python/ray/dataframe/utils.py index 10382984c4cd5..97c166d094134 100644 --- a/python/ray/dataframe/utils.py +++ b/python/ray/dataframe/utils.py @@ -161,8 +161,7 @@ def _map_partitions(func, partitions, *argslists): def _build_columns(df_col, columns): """Build columns and compute lengths for each partition.""" # Columns and width - widths = np.array(ray.get([_deploy_func.remote(lambda df: len(df.columns), - d) + widths = np.array(ray.get([_deploy_func.remote(_get_widths, d) for d in df_col])) dest_indices = [(p_idx, p_sub_idx) for p_idx in range(len(widths)) for p_sub_idx in range(widths[p_idx])] @@ -190,7 +189,7 @@ def _build_index(df_row, index): def _create_block_partitions(partitions, axis=0, length=None): - if length is not None and get_npartitions() > length: + if length is not None and length != 0 and get_npartitions() > length: npartitions = length else: npartitions = get_npartitions() @@ -200,12 +199,16 @@ def _create_block_partitions(partitions, axis=0, length=None): for partition in partitions] # In the case that axis is 1 we have to transpose because we build the - # columns into rows. Fortunately numpy is efficent at this. + # columns into rows. Fortunately numpy is efficient at this. return np.array(x) if axis == 0 else np.array(x).T @ray.remote def create_blocks(df, npartitions, axis): + return create_blocks_helper(df, npartitions, axis) + + +def create_blocks_helper(df, npartitions, axis): # Single partition dataframes don't need to be repartitioned if npartitions == 1: return df @@ -282,3 +285,56 @@ def decorator(cls): return cls return decorator + + +@ray.remote +def _reindex_helper(old_index, new_index, axis, npartitions, *df): + """Reindexes a dataframe to prepare for join/concat. + + Args: + df: The DataFrame partition + old_index: The index/column for this partition. + new_index: The new index/column to assign. + axis: Which axis to reindex over. + + Returns: + A new set of blocks made up of DataFrames. + """ + df = pd.concat(df, axis=axis ^ 1) + if axis == 1: + df.index = old_index + df = df.reindex(new_index, copy=False) + df.reset_index(inplace=True, drop=True) + elif axis == 0: + df.columns = old_index + df = df.reindex(columns=new_index, copy=False) + df.columns = pd.RangeIndex(len(df.columns)) + return create_blocks_helper(df, npartitions, axis) + + +@ray.remote +def _co_op_helper(func, left_columns, right_columns, left_df_len, *zipped): + """Copartition operation where two DataFrames must have aligned indexes. + + NOTE: This function assumes things are already copartitioned. Requires that + row partitions are passed in as blocks. + + Args: + func: The operation to conduct between two DataFrames. + left_columns: The column names for the left DataFrame. + right_columns: The column names for the right DataFrame. + left_df_len: The length of the left. This is used so we can split up + the zipped partitions. + zipped: The DataFrame partitions (in blocks). + + Returns: + A new set of blocks for the partitioned DataFrame. + """ + left = pd.concat(zipped[:left_df_len], axis=1, copy=False) + left.columns = left_columns + + right = pd.concat(zipped[left_df_len:], axis=1, copy=False) + right.columns = right_columns + + new_rows = func(left, right) + return create_blocks_helper(new_rows, left_df_len, 0) diff --git a/python/ray/rllib/README.rst b/python/ray/rllib/README.rst index 29b31e625d728..ea2befb04a2fc 100644 --- a/python/ray/rllib/README.rst +++ b/python/ray/rllib/README.rst @@ -5,18 +5,18 @@ Ray RLlib is an RL execution toolkit built on the Ray distributed execution fram RLlib includes the following reference algorithms: -- `Proximal Policy Optimization (PPO) `__ which - is a proximal variant of `TRPO `__. +- Proximal Policy Optimization (`PPO `__) which is a proximal variant of `TRPO `__. -- `The Asynchronous Advantage Actor-Critic (A3C) `__. +- Policy Gradients (`PG `__). -- `Deep Q Networks (DQN) `__. +- Asynchronous Advantage Actor-Critic (`A3C `__). -- `Ape-X Distributed Prioritized Experience Replay `__. +- Deep Q Networks (`DQN `__). -- Evolution Strategies, as described in `this - paper `__. Our implementation - is adapted from - `here `__. +- Deep Deterministic Policy Gradients (`DDPG `__, `DDPG2 `__). + +- Ape-X Distributed Prioritized Experience Replay, including both `DQN `__ and `DDPG `__ variants. + +- Evolution Strategies (`ES `__), as described in `this paper `__. These algorithms can be run on any OpenAI Gym MDP, including custom ones written and registered by the user. diff --git a/python/ray/rllib/__init__.py b/python/ray/rllib/__init__.py index 7e2145c2c4221..a2441f0b5bf67 100644 --- a/python/ray/rllib/__init__.py +++ b/python/ray/rllib/__init__.py @@ -9,7 +9,7 @@ def _register_all(): for key in ["PPO", "ES", "DQN", "APEX", "A3C", "BC", "PG", "DDPG", - "DDPG2", "APEX_DDPG2", "__fake", "__sigmoid_fake_data", + "DDPG2", "APEX_DDPG", "__fake", "__sigmoid_fake_data", "__parameter_tuning"]: from ray.rllib.agent import get_agent_class register_trainable(key, get_agent_class(key)) diff --git a/python/ray/rllib/agent.py b/python/ray/rllib/agent.py index fd32edf56e2e2..5699022b2a8e3 100644 --- a/python/ray/rllib/agent.py +++ b/python/ray/rllib/agent.py @@ -234,9 +234,12 @@ def get_agent_class(alg): if alg == "DDPG2": from ray.rllib import ddpg2 return ddpg2.DDPG2Agent - elif alg == "APEX_DDPG2": - from ray.rllib import ddpg2 - return ddpg2.ApexDDPG2Agent + elif alg == "DDPG": + from ray.rllib import ddpg + return ddpg.DDPGAgent + elif alg == "APEX_DDPG": + from ray.rllib import ddpg + return ddpg.ApexDDPGAgent elif alg == "PPO": from ray.rllib import ppo return ppo.PPOAgent @@ -258,9 +261,6 @@ def get_agent_class(alg): elif alg == "PG": from ray.rllib import pg return pg.PGAgent - elif alg == "DDPG": - from ray.rllib import ddpg - return ddpg.DDPGAgent elif alg == "script": from ray.tune import script_runner return script_runner.ScriptRunner diff --git a/python/ray/rllib/ddpg/README.md b/python/ray/rllib/ddpg/README.md new file mode 100644 index 0000000000000..93c32b0a25ba6 --- /dev/null +++ b/python/ray/rllib/ddpg/README.md @@ -0,0 +1 @@ +Implementation of deep deterministic policy gradients (https://arxiv.org/abs/1509.02971), including an Ape-X variant. diff --git a/python/ray/rllib/ddpg/__init__.py b/python/ray/rllib/ddpg/__init__.py index 004e0f128323a..932b9f0c82cbd 100644 --- a/python/ray/rllib/ddpg/__init__.py +++ b/python/ray/rllib/ddpg/__init__.py @@ -1,3 +1,8 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from ray.rllib.ddpg.apex import ApexDDPGAgent from ray.rllib.ddpg.ddpg import DDPGAgent, DEFAULT_CONFIG -__all__ = ["DDPGAgent", "DEFAULT_CONFIG"] +__all__ = ["DDPGAgent", "ApexDDPGAgent", "DEFAULT_CONFIG"] diff --git a/python/ray/rllib/ddpg2/apex.py b/python/ray/rllib/ddpg/apex.py similarity index 95% rename from python/ray/rllib/ddpg2/apex.py rename to python/ray/rllib/ddpg/apex.py index 9ace851b5d5f7..c670198c35710 100644 --- a/python/ray/rllib/ddpg2/apex.py +++ b/python/ray/rllib/ddpg/apex.py @@ -2,7 +2,7 @@ from __future__ import division from __future__ import print_function -from ray.rllib.ddpg2.ddpg import DDPG2Agent, DEFAULT_CONFIG as DDPG_CONFIG +from ray.rllib.ddpg.ddpg import DDPGAgent, DEFAULT_CONFIG as DDPG_CONFIG APEX_DDPG_DEFAULT_CONFIG = dict(DDPG_CONFIG, **dict( @@ -28,7 +28,7 @@ )) -class ApexDDPG2Agent(DDPG2Agent): +class ApexDDPGAgent(DDPGAgent): """DDPG variant that uses the Ape-X distributed policy optimizer. By default, this is configured for a large single node (32 cores). For diff --git a/python/ray/rllib/ddpg2/common/__init__.py b/python/ray/rllib/ddpg/common/__init__.py similarity index 100% rename from python/ray/rllib/ddpg2/common/__init__.py rename to python/ray/rllib/ddpg/common/__init__.py diff --git a/python/ray/rllib/ddpg/ddpg.py b/python/ray/rllib/ddpg/ddpg.py index fc79013839107..343b323948b3b 100644 --- a/python/ray/rllib/ddpg/ddpg.py +++ b/python/ray/rllib/ddpg/ddpg.py @@ -2,111 +2,267 @@ from __future__ import division from __future__ import print_function +import pickle +import os + import numpy as np +import tensorflow as tf import ray +from ray.rllib import optimizers +from ray.rllib.ddpg.ddpg_evaluator import DDPGEvaluator from ray.rllib.agent import Agent -from ray.rllib.ddpg.ddpg_evaluator import DDPGEvaluator, RemoteDDPGEvaluator -from ray.rllib.optimizers import LocalSyncReplayOptimizer from ray.tune.result import TrainingResult -DEFAULT_CONFIG = { - # Actor learning rate - "actor_lr": 0.0001, - # Critic learning rate - "critic_lr": 0.001, - # Arguments to pass in to env creator - "env_config": {}, - # MDP Discount factor - "gamma": 0.99, - # Number of steps after which the rollout gets cut - "horizon": 500, - - # Whether to include parameter noise - "noise_add": True, - # Linear decay of exploration policy - "noise_epsilon": 0.0002, - # Parameters for noise process - "noise_parameters": { - "mu": 0, - "sigma": 0.2, - "theta": 0.15, - }, +OPTIMIZER_SHARED_CONFIGS = [ + "buffer_size", "prioritized_replay", "prioritized_replay_alpha", + "prioritized_replay_beta", "prioritized_replay_eps", "sample_batch_size", + "train_batch_size", "learning_starts", "clip_rewards" +] + +DEFAULT_CONFIG = dict( + # === Model === + # Hidden layer sizes of the policy networks + actor_hiddens=[64, 64], + # Hidden layer sizes of the policy networks + critic_hiddens=[64, 64], + # N-step Q learning + n_step=1, + # Config options to pass to the model constructor + model={}, + # Discount factor for the MDP + gamma=0.99, + # Arguments to pass to the env creator + env_config={}, + + # === Exploration === + # Max num timesteps for annealing schedules. Exploration is annealed from + # 1.0 to exploration_fraction over this number of timesteps scaled by + # exploration_fraction + schedule_max_timesteps=100000, + # Number of env steps to optimize for before returning + timesteps_per_iteration=1000, + # Fraction of entire training period over which the exploration rate is + # annealed + exploration_fraction=0.1, + # Final value of random action probability + exploration_final_eps=0.02, + # OU-noise scale + noise_scale=0.1, + # theta + exploration_theta=0.15, + # sigma + exploration_sigma=0.2, + # Update the target network every `target_network_update_freq` steps. + target_network_update_freq=0, + # Update the target by \tau * policy + (1-\tau) * target_policy + tau=0.002, + # Whether to start with random actions instead of noops. + random_starts=True, - # Number of local steps taken for each call to sample - "num_local_steps": 1, - # Number of workers (excluding master) - "num_workers": 0, - - "optimizer": { - # Replay buffer size - "buffer_size": 10000, - # Number of steps in warm-up phase before learning starts - "learning_starts": 500, - # Whether to clip rewards - "clip_rewards": False, - # Whether to use prioritized replay - "prioritized_replay": False, - # Size of batch sampled from replay buffer - "train_batch_size": 64, + # === Replay buffer === + # Size of the replay buffer. Note that if async_updates is set, then + # each worker will have a replay buffer of this size. + buffer_size=50000, + # If True prioritized replay buffer will be used. + prioritized_replay=True, + # Alpha parameter for prioritized replay buffer. + prioritized_replay_alpha=0.6, + # Beta parameter for sampling from prioritized replay buffer. + prioritized_replay_beta=0.4, + # Epsilon to add to the TD errors when updating priorities. + prioritized_replay_eps=1e-6, + # Whether to clip rewards to [-1, 1] prior to adding to the replay buffer. + clip_rewards=True, + + # === Optimization === + # Learning rate for adam optimizer + actor_lr=1e-4, + critic_lr=1e-3, + # If True, use huber loss instead of squared loss for critic network + # Conventionally, no need to clip gradients if using a huber loss + use_huber=False, + # Threshold of a huber loss + huber_threshold=1.0, + # Weights for L2 regularization + l2_reg=1e-6, + # If not None, clip gradients during optimization at this value + grad_norm_clipping=None, + # How many steps of the model to sample before learning starts. + learning_starts=1500, + # Update the replay buffer with this many samples at once. Note that this + # setting applies per-worker if num_workers > 1. + sample_batch_size=1, + # Size of a batched sampled from replay buffer for training. Note that + # if async_updates is set, then each worker returns gradients for a + # batch of this size. + train_batch_size=256, + # Smooth the current average reward over this many previous episodes. + smoothing_num_episodes=100, + + # === Tensorflow === + # Arguments to pass to tensorflow + tf_session_args={ + "device_count": { + "CPU": 2 + }, + "log_device_placement": False, + "allow_soft_placement": True, + "gpu_options": { + "allow_growth": True + }, + "inter_op_parallelism_threads": 1, + "intra_op_parallelism_threads": 1, }, - # Controls how fast target networks move - "tau": 0.001, - # Number of steps taken per training iteration - "train_steps": 600, -} + # === Parallelism === + # Number of workers for collecting samples with. This only makes sense + # to increase if your environment is particularly slow to sample, or if + # you're using the Async or Ape-X optimizers. + num_workers=0, + # Whether to allocate GPUs for workers (if > 0). + num_gpus_per_worker=0, + # Optimizer class to use. + optimizer_class="LocalSyncReplayOptimizer", + # Config to pass to the optimizer. + optimizer_config=dict(), + # Whether to use a distribution of epsilons across workers for exploration. + per_worker_exploration=False, + # Whether to compute priorities on workers. + worker_side_prioritization=False) class DDPGAgent(Agent): _agent_name = "DDPG" + _allow_unknown_subkeys = [ + "model", "optimizer", "tf_session_args", "env_config" + ] _default_config = DEFAULT_CONFIG def _init(self): - self.local_evaluator = DDPGEvaluator( - self.registry, self.env_creator, self.config) + self.local_evaluator = DDPGEvaluator(self.registry, self.env_creator, + self.config, self.logdir, 0) + remote_cls = ray.remote( + num_cpus=1, + num_gpus=self.config["num_gpus_per_worker"])(DDPGEvaluator) self.remote_evaluators = [ - RemoteDDPGEvaluator.remote( - self.registry, self.env_creator, self.config) - for _ in range(self.config["num_workers"])] - self.optimizer = LocalSyncReplayOptimizer( - self.config["optimizer"], self.local_evaluator, + remote_cls.remote(self.registry, self.env_creator, self.config, + self.logdir, i) + for i in range(self.config["num_workers"]) + ] + + for k in OPTIMIZER_SHARED_CONFIGS: + if k not in self.config["optimizer_config"]: + self.config["optimizer_config"][k] = self.config[k] + + self.optimizer = getattr(optimizers, self.config["optimizer_class"])( + self.config["optimizer_config"], self.local_evaluator, self.remote_evaluators) + self.saver = tf.train.Saver(max_to_keep=None) + self.last_target_update_ts = 0 + self.num_target_updates = 0 + + @property + def global_timestep(self): + return self.optimizer.num_steps_sampled + + def update_target_if_needed(self): + if self.global_timestep - self.last_target_update_ts > \ + self.config["target_network_update_freq"]: + self.local_evaluator.update_target() + self.last_target_update_ts = self.global_timestep + self.num_target_updates += 1 + def _train(self): - for _ in range(self.config["train_steps"]): + start_timestep = self.global_timestep + + while (self.global_timestep - start_timestep < + self.config["timesteps_per_iteration"]): + self.optimizer.step() - # update target - if self.optimizer.num_steps_trained > 0: - self.local_evaluator.update_target() - - # generate training result - return self._fetch_metrics() - - def _fetch_metrics(self): - episode_rewards = [] - episode_lengths = [] - if self.config["num_workers"] > 0: - metric_lists = [a.get_completed_rollout_metrics.remote() - for a in self.remote_evaluators] - for metrics in metric_lists: - for episode in ray.get(metrics): - episode_lengths.append(episode.episode_length) - episode_rewards.append(episode.episode_reward) + self.update_target_if_needed() + + self.local_evaluator.set_global_timestep(self.global_timestep) + for e in self.remote_evaluators: + e.set_global_timestep.remote(self.global_timestep) + + return self._train_stats(start_timestep) + + def _train_stats(self, start_timestep): + if self.remote_evaluators: + stats = ray.get([e.stats.remote() for e in self.remote_evaluators]) + else: + stats = self.local_evaluator.stats() + if not isinstance(stats, list): + stats = [stats] + + mean_100ep_reward = 0.0 + mean_100ep_length = 0.0 + num_episodes = 0 + explorations = [] + + if self.config["per_worker_exploration"]: + # Return stats from workers with the lowest 20% of exploration + test_stats = stats[-int(max(1, len(stats) * 0.2)):] else: - metrics = self.local_evaluator.get_completed_rollout_metrics() - for episode in metrics: - episode_lengths.append(episode.episode_length) - episode_rewards.append(episode.episode_reward) + test_stats = stats - avg_reward = (np.mean(episode_rewards)) - avg_length = (np.mean(episode_lengths)) - timesteps = np.sum(episode_lengths) + for s in test_stats: + mean_100ep_reward += s["mean_100ep_reward"] / len(test_stats) + mean_100ep_length += s["mean_100ep_length"] / len(test_stats) + + for s in stats: + num_episodes += s["num_episodes"] + explorations.append(s["exploration"]) + + opt_stats = self.optimizer.stats() result = TrainingResult( - episode_reward_mean=avg_reward, - episode_len_mean=avg_length, - timesteps_this_iter=timesteps, - info={}) + episode_reward_mean=mean_100ep_reward, + episode_len_mean=mean_100ep_length, + episodes_total=num_episodes, + timesteps_this_iter=self.global_timestep - start_timestep, + info=dict({ + "min_exploration": min(explorations), + "max_exploration": max(explorations), + "num_target_updates": self.num_target_updates, + }, **opt_stats)) return result + + def _stop(self): + # workaround for https://github.com/ray-project/ray/issues/1516 + for ev in self.remote_evaluators: + ev.__ray_terminate__.remote(ev._ray_actor_id.id()) + + def _save(self, checkpoint_dir): + checkpoint_path = self.saver.save( + self.local_evaluator.sess, + os.path.join(checkpoint_dir, "checkpoint"), + global_step=self.iteration) + extra_data = [ + self.local_evaluator.save(), + ray.get([e.save.remote() for e in self.remote_evaluators]), + self.optimizer.save(), self.num_target_updates, + self.last_target_update_ts + ] + pickle.dump(extra_data, open(checkpoint_path + ".extra_data", "wb")) + return checkpoint_path + + def _restore(self, checkpoint_path): + self.saver.restore(self.local_evaluator.sess, checkpoint_path) + extra_data = pickle.load(open(checkpoint_path + ".extra_data", "rb")) + self.local_evaluator.restore(extra_data[0]) + ray.get([ + e.restore.remote(d) + for (d, e) in zip(extra_data[1], self.remote_evaluators) + ]) + self.optimizer.restore(extra_data[2]) + self.num_target_updates = extra_data[3] + self.last_target_update_ts = extra_data[4] + + def compute_action(self, observation): + return self.local_evaluator.ddpg_graph.act(self.local_evaluator.sess, + np.array(observation)[None], + 0.0)[0] diff --git a/python/ray/rllib/ddpg/ddpg_evaluator.py b/python/ray/rllib/ddpg/ddpg_evaluator.py index dda3c34797235..5a68c4b583ee5 100644 --- a/python/ray/rllib/ddpg/ddpg_evaluator.py +++ b/python/ray/rllib/ddpg/ddpg_evaluator.py @@ -2,74 +2,185 @@ from __future__ import division from __future__ import print_function +from gym.spaces import Box import numpy as np +import tensorflow as tf import ray -from ray.rllib.ddpg.models import DDPGModel -from ray.rllib.models.catalog import ModelCatalog -from ray.rllib.optimizers import PolicyEvaluator -from ray.rllib.utils.filter import NoFilter -from ray.rllib.utils.process_rollout import process_rollout -from ray.rllib.utils.sampler import SyncSampler +from ray.rllib.utils.error import UnsupportedSpaceException +from ray.rllib.ddpg import models +from ray.rllib.dqn.common.schedules import ConstantSchedule, LinearSchedule +from ray.rllib.optimizers import SampleBatch, PolicyEvaluator +from ray.rllib.utils.compression import pack +from ray.rllib.dqn.dqn_evaluator import adjust_nstep +from ray.rllib.dqn.common.wrappers import wrap_dqn class DDPGEvaluator(PolicyEvaluator): + """The base DDPG Evaluator.""" + + def __init__(self, registry, env_creator, config, logdir, worker_index): + env = env_creator(config["env_config"]) + env = wrap_dqn(registry, env, config["model"], config["random_starts"]) + self.env = env + self.config = config + + # when env.action_space is of Box type, e.g., Pendulum-v0 + # action_space.low is [-2.0], high is [2.0] + # take action by calling, e.g., env.step([3.5]) + if not isinstance(env.action_space, Box): + raise UnsupportedSpaceException( + "Action space {} is not supported for DDPG.".format( + env.action_space)) + + tf_config = tf.ConfigProto(**config["tf_session_args"]) + self.sess = tf.Session(config=tf_config) + self.ddpg_graph = models.DDPGGraph(registry, env, config, logdir) + + # Use either a different `eps` per worker, or a linear schedule. + if config["per_worker_exploration"]: + assert config["num_workers"] > 1, "This requires multiple workers" + self.exploration = ConstantSchedule( + config["noise_scale"] * 0.4 ** + (1 + worker_index / float(config["num_workers"] - 1) * 7)) + else: + self.exploration = LinearSchedule( + schedule_timesteps=int(config["exploration_fraction"] * + config["schedule_max_timesteps"]), + initial_p=config["noise_scale"] * 1.0, + final_p=config["noise_scale"] * + config["exploration_final_eps"]) + + # Initialize the parameters and copy them to the target network. + self.sess.run(tf.global_variables_initializer()) + # hard instead of soft + self.ddpg_graph.update_target(self.sess, 1.0) + self.global_timestep = 0 + self.local_timestep = 0 + + # Note that this encompasses both the policy and Q-value networks and + # their corresponding target networks + self.variables = ray.experimental.TensorFlowVariables( + tf.group(self.ddpg_graph.q_tp0, self.ddpg_graph.q_tp1), self.sess) + + self.episode_rewards = [0.0] + self.episode_lengths = [0.0] + self.saved_mean_reward = None + + self.obs = self.env.reset() + + def set_global_timestep(self, global_timestep): + self.global_timestep = global_timestep - def __init__(self, registry, env_creator, config): - self.env = ModelCatalog.get_preprocessor_as_wrapper( - registry, env_creator(config["env_config"])) - - # contains model, target_model - self.model = DDPGModel(registry, self.env, config) - - self.sampler = SyncSampler( - self.env, self.model.model, NoFilter(), - config["num_local_steps"], horizon=config["horizon"]) + def update_target(self): + self.ddpg_graph.update_target(self.sess) def sample(self): - """Returns a batch of samples.""" - - rollout = self.sampler.get_data() - rollout.data["weights"] = np.ones_like(rollout.data["rewards"]) - - # since each sample is one step, no discounting needs to be applied; - # this does not involve config["gamma"] - samples = process_rollout( - rollout, NoFilter(), - gamma=1.0, use_gae=False) - - return samples - - def update_target(self): - """Updates target critic and target actor.""" - self.model.update_target() + obs, actions, rewards, new_obs, dones = [], [], [], [], [] + for _ in range( + self.config["sample_batch_size"] + self.config["n_step"] - 1): + ob, act, rew, ob1, done = self._step(self.global_timestep) + obs.append(ob) + actions.append(act) + rewards.append(rew) + new_obs.append(ob1) + dones.append(done) + + # N-step Q adjustments + if self.config["n_step"] > 1: + # Adjust for steps lost from truncation + self.local_timestep -= (self.config["n_step"] - 1) + adjust_nstep(self.config["n_step"], self.config["gamma"], obs, + actions, rewards, new_obs, dones) + + batch = SampleBatch({ + "obs": [pack(np.array(o)) for o in obs], + "actions": actions, + "rewards": rewards, + "new_obs": [pack(np.array(o)) for o in new_obs], + "dones": dones, + "weights": np.ones_like(rewards) + }) + assert (batch.count == self.config["sample_batch_size"]) + + # Prioritize on the worker side + if self.config["worker_side_prioritization"]: + td_errors = self.ddpg_graph.compute_td_error( + self.sess, obs, batch["actions"], batch["rewards"], new_obs, + batch["dones"], batch["weights"]) + new_priorities = ( + np.abs(td_errors) + self.config["prioritized_replay_eps"]) + batch.data["weights"] = new_priorities + + return batch def compute_gradients(self, samples): - """Returns critic, actor gradients.""" - return self.model.compute_gradients(samples) + td_err, grads = self.ddpg_graph.compute_gradients( + self.sess, samples["obs"], samples["actions"], samples["rewards"], + samples["new_obs"], samples["dones"], samples["weights"]) + return grads, {"td_error": td_err} def apply_gradients(self, grads): - """Applies gradients to evaluator weights.""" - self.model.apply_gradients(grads) + self.ddpg_graph.apply_gradients(self.sess, grads) def compute_apply(self, samples): - grads, _ = self.compute_gradients(samples) - self.apply_gradients(grads) + td_error = self.ddpg_graph.compute_apply( + self.sess, samples["obs"], samples["actions"], samples["rewards"], + samples["new_obs"], samples["dones"], samples["weights"]) + return {"td_error": td_error} def get_weights(self): - """Returns model weights.""" - return self.model.get_weights() + return self.variables.get_weights() def set_weights(self, weights): - """Sets model weights.""" - self.model.set_weights(weights) - - def get_completed_rollout_metrics(self): - """Returns metrics on previously completed rollouts. - - Calling this clears the queue of completed rollout metrics. - """ - return self.sampler.get_metrics() - - -RemoteDDPGEvaluator = ray.remote(DDPGEvaluator) + self.variables.set_weights(weights) + + def _step(self, global_timestep): + """Takes a single step, and returns the result of the step.""" + action = self.ddpg_graph.act( + self.sess, + np.array(self.obs)[None], + self.exploration.value(global_timestep))[0] + new_obs, rew, done, _ = self.env.step(action) + ret = (self.obs, action, rew, new_obs, float(done)) + self.obs = new_obs + self.episode_rewards[-1] += rew + self.episode_lengths[-1] += 1 + if done: + self.obs = self.env.reset() + self.episode_rewards.append(0.0) + self.episode_lengths.append(0.0) + # reset UO noise for each episode + self.ddpg_graph.reset_noise(self.sess) + + self.local_timestep += 1 + return ret + + def stats(self): + n = self.config["smoothing_num_episodes"] + 1 + mean_100ep_reward = round(np.mean(self.episode_rewards[-n:-1]), 5) + mean_100ep_length = round(np.mean(self.episode_lengths[-n:-1]), 5) + exploration = self.exploration.value(self.global_timestep) + return { + "mean_100ep_reward": mean_100ep_reward, + "mean_100ep_length": mean_100ep_length, + "num_episodes": len(self.episode_rewards), + "exploration": exploration, + "local_timestep": self.local_timestep, + } + + def save(self): + return [ + self.exploration, self.episode_rewards, self.episode_lengths, + self.saved_mean_reward, self.obs, self.global_timestep, + self.local_timestep + ] + + def restore(self, data): + self.exploration = data[0] + self.episode_rewards = data[1] + self.episode_lengths = data[2] + self.saved_mean_reward = data[3] + self.obs = data[4] + self.global_timestep = data[5] + self.local_timestep = data[6] diff --git a/python/ray/rllib/ddpg/models.py b/python/ray/rllib/ddpg/models.py index 20a661a974a03..d58f37dc64174 100644 --- a/python/ray/rllib/ddpg/models.py +++ b/python/ray/rllib/ddpg/models.py @@ -3,239 +3,389 @@ from __future__ import print_function import numpy as np -import tensorflow as tf -from ray.experimental.tfutils import TensorFlowVariables -from ray.rllib.models.ddpgnet import DDPGActor, DDPGCritic -from ray.rllib.ddpg.random_process import OrnsteinUhlenbeckProcess - - -class DDPGModel(): - def __init__(self, registry, env, config): - self.config = config - self.sess = tf.Session() - - with tf.variable_scope("model"): - self.model = DDPGActorCritic( - registry, env, self.config, self.sess) - with tf.variable_scope("target_model"): - self.target_model = DDPGActorCritic( - registry, env, self.config, self.sess) - self._setup_gradients() - self._setup_target_updates() - - self.initialize() - self._initialize_target_weights() - - def initialize(self): - self.sess.run(tf.global_variables_initializer()) - - def _initialize_target_weights(self): - """Set initial target weights to match model weights.""" - a_updates = [] - for var, target_var in zip( - self.model.actor_var_list, self.target_model.actor_var_list): - a_updates.append(tf.assign(target_var, var)) - actor_updates = tf.group(*a_updates) - - c_updates = [] - for var, target_var in zip( - self.model.critic_var_list, self.target_model.critic_var_list): - c_updates.append(tf.assign(target_var, var)) - critic_updates = tf.group(*c_updates) - self.sess.run([actor_updates, critic_updates]) - - def _setup_gradients(self): - """Setup critic and actor gradients.""" - self.critic_grads = tf.gradients( - self.model.critic_loss, self.model.critic_var_list) - c_grads_and_vars = list(zip( - self.critic_grads, self.model.critic_var_list)) - c_opt = tf.train.AdamOptimizer(self.config["critic_lr"]) - self._apply_c_gradients = c_opt.apply_gradients(c_grads_and_vars) - - self.actor_grads = tf.gradients( - -self.model.cn_for_loss, self.model.actor_var_list) - a_grads_and_vars = list(zip( - self.actor_grads, self.model.actor_var_list)) - a_opt = tf.train.AdamOptimizer(self.config["actor_lr"]) - self._apply_a_gradients = a_opt.apply_gradients(a_grads_and_vars) - - def compute_gradients(self, samples): - """ Returns gradient w.r.t. samples.""" - # actor gradients - actor_actions = self.sess.run( - self.model.output_action, - feed_dict={self.model.obs: samples["obs"]} - ) - - actor_feed_dict = { - self.model.obs: samples["obs"], - self.model.output_action: actor_actions, - } - self.actor_grads = [g for g in self.actor_grads if g is not None] - actor_grad = self.sess.run(self.actor_grads, feed_dict=actor_feed_dict) - - # feed samples into target actor - target_Q_act = self.sess.run( - self.target_model.output_action, - feed_dict={self.target_model.obs: samples["new_obs"]} - ) - target_Q_dict = { - self.target_model.obs: samples["new_obs"], - self.target_model.act: target_Q_act, - } - - target_Q = self.sess.run( - self.target_model.critic_eval, feed_dict=target_Q_dict) - - # critic gradients - critic_feed_dict = { - self.model.obs: samples["obs"], - self.model.act: samples["actions"], - self.model.reward: samples["rewards"], - self.model.target_Q: target_Q, - } - self.critic_grads = [g for g in self.critic_grads if g is not None] - critic_grad = self.sess.run( - self.critic_grads, feed_dict=critic_feed_dict) - return (critic_grad, actor_grad), {} - - def apply_gradients(self, grads): - """Applies gradients to evaluator weights.""" - c_grads, a_grads = grads - critic_feed_dict = dict(zip(self.critic_grads, c_grads)) - self.sess.run(self._apply_c_gradients, feed_dict=critic_feed_dict) - actor_feed_dict = dict(zip(self.actor_grads, a_grads)) - self.sess.run(self._apply_a_gradients, feed_dict=actor_feed_dict) - - def get_weights(self): - """Returns model weights, target model weights.""" - return self.model.get_weights(), self.target_model.get_weights() - - def set_weights(self, weights): - """Sets model and target model weights.""" - model_weights, target_model_weights = weights - self.model.set_weights(model_weights) - self.target_model.set_weights(target_model_weights) - - def _setup_target_updates(self): - """Set up target actor and critic updates.""" - a_updates = [] - tau = self.config["tau"] - for var, target_var in zip( - self.model.actor_var_list, self.target_model.actor_var_list): - a_updates.append(tf.assign( - target_var, tau * var + (1. - tau) * target_var)) - actor_updates = tf.group(*a_updates) - - c_updates = [] - for var, target_var in zip( - self.model.critic_var_list, self.target_model.critic_var_list): - c_updates.append(tf.assign( - target_var, tau * var + (1. - tau) * target_var)) - critic_updates = tf.group(*c_updates) - self.target_updates = [actor_updates, critic_updates] - - def update_target(self): - """Updates target critic and target actor.""" - self.sess.run(self.target_updates) - - -class DDPGActorCritic(): - other_output = [] - is_recurrent = False - - def __init__(self, registry, env, config, sess): - self.config = config - self.sess = sess - - obs_space = env.observation_space - ac_space = env.action_space - - self.obs_size = int(np.prod(obs_space.shape)) - self.obs = tf.placeholder(tf.float32, [None, self.obs_size]) - self.ac_size = int(np.prod(ac_space.shape)) - self.act = tf.placeholder(tf.float32, [None, self.ac_size]) - self.action_bound = env.action_space.high - # TODO: change action_bound to make more general - - self._setup_actor_network(obs_space, ac_space) - self._setup_critic_network(obs_space, ac_space) - self._setup_critic_loss(ac_space) - - with tf.variable_scope("critic"): - self.critic_var_list = tf.get_collection( - tf.GraphKeys.TRAINABLE_VARIABLES, - tf.get_variable_scope().name - ) - self.critic_vars = TensorFlowVariables(self.critic_loss, - self.sess) - - with tf.variable_scope("actor"): - self.actor_var_list = tf.get_collection( - tf.GraphKeys.TRAINABLE_VARIABLES, - tf.get_variable_scope().name - ) - self.actor_vars = TensorFlowVariables(self.output_action, - self.sess) - - if (self.config["noise_add"]): - params = self.config["noise_parameters"] - self.rand_process = OrnsteinUhlenbeckProcess(size=self.ac_size, - theta=params["theta"], - mu=params["mu"], - sigma=params["sigma"]) - self.epsilon = 1.0 - - def _setup_critic_loss(self, action_space): - """Sets up critic loss.""" - self.target_Q = tf.placeholder(tf.float32, [None, 1], name="target_q") - - # compare critic eval to critic_target (squared loss) - self.reward = tf.placeholder(tf.float32, [None], name="reward") - self.critic_target = tf.expand_dims(self.reward, 1) + \ - self.config['gamma'] * self.target_Q - self.critic_loss = tf.reduce_mean(tf.square( - self.critic_target - self.critic_eval)) - - def _setup_critic_network(self, obs_space, ac_space): - """Sets up Q network.""" - with tf.variable_scope("critic", reuse=tf.AUTO_REUSE): - self.critic_network = DDPGCritic((self.obs, self.act), 1, {}) - self.critic_eval = self.critic_network.outputs - - with tf.variable_scope("critic", reuse=True): - self.cn_for_loss = DDPGCritic( - (self.obs, self.output_action), 1, {}).outputs - - def _setup_actor_network(self, obs_space, ac_space): - """Sets up actor network.""" - with tf.variable_scope("actor", reuse=tf.AUTO_REUSE): - self.actor_network = DDPGActor( - self.obs, self.ac_size, - options={"action_bound": self.action_bound}) - self.output_action = self.actor_network.outputs - - def get_weights(self): - """Returns critic weights, actor weights.""" - return self.critic_vars.get_weights(), self.actor_vars.get_weights() - - def set_weights(self, weights): - """Sets critic and actor weights.""" - critic_weights, actor_weights = weights - self.critic_vars.set_weights(critic_weights) - self.actor_vars.set_weights(actor_weights) - - def compute(self, ob): - """Returns action, given state.""" - flattened_ob = np.reshape(ob, [-1, np.prod(ob.shape)]) - action = self.sess.run(self.output_action, {self.obs: flattened_ob}) - if (self.config["noise_add"]): - action += self.epsilon * self.rand_process.sample() - if (self.epsilon > 0): - self.epsilon -= self.config["noise_epsilon"] - return action[0], {} - - def value(self, *args): - return 0 +import tensorflow as tf +import tensorflow.contrib.layers as layers + +from ray.rllib.models import ModelCatalog + + +def _build_p_network(registry, inputs, dim_actions, config): + """ + map an observation (i.e., state) to an action where + each entry takes value from (0, 1) due to the sigmoid function + """ + frontend = ModelCatalog.get_model(registry, inputs, 1, config["model"]) + + hiddens = config["actor_hiddens"] + action_out = frontend.last_layer + for hidden in hiddens: + action_out = layers.fully_connected( + action_out, num_outputs=hidden, activation_fn=tf.nn.relu) + # Use sigmoid layer to bound values within (0, 1) + # shape of action_scores is [batch_size, dim_actions] + action_scores = layers.fully_connected( + action_out, num_outputs=dim_actions, activation_fn=tf.nn.sigmoid) + + return action_scores + + +# As a stochastic policy for inference, but a deterministic policy for training +# thus ignore batch_size issue when constructing a stochastic action +def _build_action_network(p_values, low_action, high_action, stochastic, eps, + theta, sigma): + # shape is [None, dim_action] + deterministic_actions = (high_action - low_action) * p_values + low_action + + exploration_sample = tf.get_variable( + name="ornstein_uhlenbeck", + dtype=tf.float32, + initializer=low_action.size * [.0], + trainable=False) + normal_sample = tf.random_normal( + shape=[low_action.size], mean=0.0, stddev=1.0) + exploration_value = tf.assign_add( + exploration_sample, + theta * (.0 - exploration_sample) + sigma * normal_sample) + stochastic_actions = deterministic_actions + eps * ( + high_action - low_action) * exploration_value + + return tf.cond(stochastic, lambda: stochastic_actions, + lambda: deterministic_actions) + + +def _build_q_network(registry, inputs, action_inputs, config): + frontend = ModelCatalog.get_model(registry, inputs, 1, config["model"]) + + hiddens = config["critic_hiddens"] + + q_out = tf.concat([frontend.last_layer, action_inputs], axis=1) + for hidden in hiddens: + q_out = layers.fully_connected( + q_out, num_outputs=hidden, activation_fn=tf.nn.relu) + q_scores = layers.fully_connected(q_out, num_outputs=1, activation_fn=None) + + return q_scores + + +def _huber_loss(x, delta=1.0): + """Reference: https://en.wikipedia.org/wiki/Huber_loss""" + return tf.where( + tf.abs(x) < delta, + tf.square(x) * 0.5, delta * (tf.abs(x) - 0.5 * delta)) + + +def _minimize_and_clip(optimizer, objective, var_list, clip_val=10): + """Minimized `objective` using `optimizer` w.r.t. variables in + `var_list` while ensure the norm of the gradients for each + variable is clipped to `clip_val` + """ + gradients = optimizer.compute_gradients(objective, var_list=var_list) + for i, (grad, var) in enumerate(gradients): + if grad is not None: + gradients[i] = (tf.clip_by_norm(grad, clip_val), var) + return gradients + + +def _scope_vars(scope, trainable_only=False): + """ + Get variables inside a scope + The scope can be specified as a string + + Parameters + ---------- + scope: str or VariableScope + scope in which the variables reside. + trainable_only: bool + whether or not to return only the variables that were marked as + trainable. + + Returns + ------- + vars: [tf.Variable] + list of variables in `scope`. + """ + return tf.get_collection( + tf.GraphKeys.TRAINABLE_VARIABLES + if trainable_only else tf.GraphKeys.VARIABLES, + scope=scope if isinstance(scope, str) else scope.name) + + +class ModelAndLoss(object): + """Holds the model and loss function. + + Both graphs are necessary in order for the multi-gpu SGD implementation + to create towers on each device. + """ + + def __init__(self, registry, dim_actions, low_action, high_action, config, + obs_t, act_t, rew_t, obs_tp1, done_mask, importance_weights): + # p network evaluation + with tf.variable_scope("p_func", reuse=True) as scope: + self.p_t = _build_p_network(registry, obs_t, dim_actions, config) + + # target p network evaluation + with tf.variable_scope("target_p_func") as scope: + self.p_tp1 = _build_p_network(registry, obs_tp1, dim_actions, + config) + self.target_p_func_vars = _scope_vars(scope.name) + + # Action outputs + with tf.variable_scope("a_func", reuse=True): + deterministic_flag = tf.constant(value=False, dtype=tf.bool) + zero_eps = tf.constant(value=.0, dtype=tf.float32) + output_actions = _build_action_network( + self.p_t, low_action, high_action, deterministic_flag, + zero_eps, config["exploration_theta"], + config["exploration_sigma"]) + + output_actions_estimated = _build_action_network( + self.p_tp1, low_action, high_action, deterministic_flag, + zero_eps, config["exploration_theta"], + config["exploration_sigma"]) + + # q network evaluation + with tf.variable_scope("q_func") as scope: + self.q_t = _build_q_network(registry, obs_t, act_t, config) + self.q_func_vars = _scope_vars(scope.name) + with tf.variable_scope("q_func", reuse=True): + self.q_tp0 = _build_q_network(registry, obs_t, output_actions, + config) + + # target q network evalution + with tf.variable_scope("target_q_func") as scope: + self.q_tp1 = _build_q_network(registry, obs_tp1, + output_actions_estimated, config) + self.target_q_func_vars = _scope_vars(scope.name) + + q_t_selected = tf.squeeze(self.q_t, axis=len(self.q_t.shape) - 1) + + q_tp1_best = tf.squeeze( + input=self.q_tp1, axis=len(self.q_tp1.shape) - 1) + q_tp1_best_masked = (1.0 - done_mask) * q_tp1_best + + # compute RHS of bellman equation + q_t_selected_target = ( + rew_t + config["gamma"]**config["n_step"] * q_tp1_best_masked) + + # compute the error (potentially clipped) + self.td_error = q_t_selected - tf.stop_gradient(q_t_selected_target) + if config.get("use_huber"): + errors = _huber_loss(self.td_error, config.get("huber_threshold")) + else: + errors = 0.5 * tf.square(self.td_error) + + weighted_error = tf.reduce_mean(importance_weights * errors) + + self.loss = weighted_error + + # for policy gradient + self.actor_loss = -1.0 * tf.reduce_mean(self.q_tp0) + + +class DDPGGraph(object): + def __init__(self, registry, env, config, logdir): + self.env = env + dim_actions = env.action_space.shape[0] + low_action = env.action_space.low + high_action = env.action_space.high + actor_optimizer = tf.train.AdamOptimizer( + learning_rate=config["actor_lr"]) + critic_optimizer = tf.train.AdamOptimizer( + learning_rate=config["critic_lr"]) + + # Action inputs + self.stochastic = tf.placeholder(tf.bool, (), name="stochastic") + self.eps = tf.placeholder(tf.float32, (), name="eps") + self.cur_observations = tf.placeholder( + tf.float32, shape=(None, ) + env.observation_space.shape) + + # Actor: P (policy) network + p_scope_name = "p_func" + with tf.variable_scope(p_scope_name) as scope: + p_values = _build_p_network(registry, self.cur_observations, + dim_actions, config) + p_func_vars = _scope_vars(scope.name) + + # Action outputs + a_scope_name = "a_func" + with tf.variable_scope(a_scope_name): + self.output_actions = _build_action_network( + p_values, low_action, high_action, self.stochastic, self.eps, + config["exploration_theta"], config["exploration_sigma"]) + + with tf.variable_scope(a_scope_name, reuse=True): + exploration_sample = tf.get_variable(name="ornstein_uhlenbeck") + self.reset_noise_op = tf.assign(exploration_sample, + dim_actions * [.0]) + + # Replay inputs + self.obs_t = tf.placeholder( + tf.float32, + shape=(None, ) + env.observation_space.shape, + name="observation") + self.act_t = tf.placeholder( + tf.float32, shape=(None, ) + env.action_space.shape, name="action") + self.rew_t = tf.placeholder(tf.float32, [None], name="reward") + self.obs_tp1 = tf.placeholder( + tf.float32, shape=(None, ) + env.observation_space.shape) + self.done_mask = tf.placeholder(tf.float32, [None], name="done") + self.importance_weights = tf.placeholder( + tf.float32, [None], name="weight") + + def build_loss(obs_t, act_t, rew_t, obs_tp1, done_mask, + importance_weights): + return ModelAndLoss(registry, dim_actions, low_action, high_action, + config, obs_t, act_t, rew_t, obs_tp1, + done_mask, importance_weights) + + self.loss_inputs = [ + ("obs", self.obs_t), + ("actions", self.act_t), + ("rewards", self.rew_t), + ("new_obs", self.obs_tp1), + ("dones", self.done_mask), + ("weights", self.importance_weights), + ] + + loss_obj = build_loss(self.obs_t, self.act_t, self.rew_t, self.obs_tp1, + self.done_mask, self.importance_weights) + + self.build_loss = build_loss + + actor_loss = loss_obj.actor_loss + weighted_error = loss_obj.loss + q_func_vars = loss_obj.q_func_vars + target_p_func_vars = loss_obj.target_p_func_vars + target_q_func_vars = loss_obj.target_q_func_vars + self.p_t = loss_obj.p_t + self.q_t = loss_obj.q_t + self.q_tp0 = loss_obj.q_tp0 + self.q_tp1 = loss_obj.q_tp1 + self.td_error = loss_obj.td_error + + if config["l2_reg"] is not None: + for var in p_func_vars: + if "bias" not in var.name: + actor_loss += config["l2_reg"] * 0.5 * tf.nn.l2_loss(var) + for var in q_func_vars: + if "bias" not in var.name: + weighted_error += config["l2_reg"] * 0.5 * tf.nn.l2_loss( + var) + + # compute optimization op (potentially with gradient clipping) + if config["grad_norm_clipping"] is not None: + self.actor_grads_and_vars = _minimize_and_clip( + actor_optimizer, + actor_loss, + var_list=p_func_vars, + clip_val=config["grad_norm_clipping"]) + self.critic_grads_and_vars = _minimize_and_clip( + critic_optimizer, + weighted_error, + var_list=q_func_vars, + clip_val=config["grad_norm_clipping"]) + else: + self.actor_grads_and_vars = actor_optimizer.compute_gradients( + actor_loss, var_list=p_func_vars) + self.critic_grads_and_vars = critic_optimizer.compute_gradients( + weighted_error, var_list=q_func_vars) + self.actor_grads_and_vars = [(g, v) + for (g, v) in self.actor_grads_and_vars + if g is not None] + self.critic_grads_and_vars = [(g, v) + for (g, v) in self.critic_grads_and_vars + if g is not None] + self.grads_and_vars = ( + self.actor_grads_and_vars + self.critic_grads_and_vars) + self.grads = [g for (g, v) in self.grads_and_vars] + self.actor_train_expr = actor_optimizer.apply_gradients( + self.actor_grads_and_vars) + self.critic_train_expr = critic_optimizer.apply_gradients( + self.critic_grads_and_vars) + + # update_target_fn will be called periodically to copy Q network to + # target Q network + self.tau_value = config.get("tau") + self.tau = tf.placeholder(tf.float32, (), name="tau") + update_target_expr = [] + for var, var_target in zip( + sorted(q_func_vars, key=lambda v: v.name), + sorted(target_q_func_vars, key=lambda v: v.name)): + update_target_expr.append( + var_target.assign(self.tau * var + + (1.0 - self.tau) * var_target)) + for var, var_target in zip( + sorted(p_func_vars, key=lambda v: v.name), + sorted(target_p_func_vars, key=lambda v: v.name)): + update_target_expr.append( + var_target.assign(self.tau * var + + (1.0 - self.tau) * var_target)) + self.update_target_expr = tf.group(*update_target_expr) + + # support both hard and soft sync + def update_target(self, sess, tau=None): + return sess.run( + self.update_target_expr, + feed_dict={self.tau: tau or self.tau_value}) + + def act(self, sess, obs, eps, stochastic=True): + return sess.run( + self.output_actions, + feed_dict={ + self.cur_observations: obs, + self.stochastic: stochastic, + self.eps: eps + }) + + def compute_gradients(self, sess, obs_t, act_t, rew_t, obs_tp1, done_mask, + importance_weights): + td_err, grads = sess.run( + [self.td_error, self.grads], + feed_dict={ + self.obs_t: obs_t, + self.act_t: act_t, + self.rew_t: rew_t, + self.obs_tp1: obs_tp1, + self.done_mask: done_mask, + self.importance_weights: importance_weights + }) + return td_err, grads + + def compute_td_error(self, sess, obs_t, act_t, rew_t, obs_tp1, done_mask, + importance_weights): + td_err = sess.run( + self.td_error, + feed_dict={ + self.obs_t: [np.array(ob) for ob in obs_t], + self.act_t: act_t, + self.rew_t: rew_t, + self.obs_tp1: [np.array(ob) for ob in obs_tp1], + self.done_mask: done_mask, + self.importance_weights: importance_weights + }) + return td_err + + def apply_gradients(self, sess, grads): + assert len(grads) == len(self.grads_and_vars) + feed_dict = {ph: g for (g, ph) in zip(grads, self.grads)} + sess.run( + [self.critic_train_expr, self.actor_train_expr], + feed_dict=feed_dict) + + def compute_apply(self, sess, obs_t, act_t, rew_t, obs_tp1, done_mask, + importance_weights): + td_err, _, _ = sess.run( + [self.td_error, self.critic_train_expr, self.actor_train_expr], + feed_dict={ + self.obs_t: obs_t, + self.act_t: act_t, + self.rew_t: rew_t, + self.obs_tp1: obs_tp1, + self.done_mask: done_mask, + self.importance_weights: importance_weights + }) + return td_err + + def reset_noise(self, sess): + sess.run(self.reset_noise_op) diff --git a/python/ray/rllib/ddpg2/README.md b/python/ray/rllib/ddpg2/README.md index af64c1530155c..54dc3996b892f 100644 --- a/python/ray/rllib/ddpg2/README.md +++ b/python/ray/rllib/ddpg2/README.md @@ -1 +1 @@ -Code in this package follows the style of dqn. +Alternate DDPG implementation. See also https://github.com/ray-project/ray/tree/master/python/ray/rllib/ddpg. diff --git a/python/ray/rllib/ddpg2/__init__.py b/python/ray/rllib/ddpg2/__init__.py index ece9c54f3d400..a7ace46c145ce 100644 --- a/python/ray/rllib/ddpg2/__init__.py +++ b/python/ray/rllib/ddpg2/__init__.py @@ -1,8 +1,3 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from ray.rllib.ddpg2.apex import ApexDDPG2Agent from ray.rllib.ddpg2.ddpg import DDPG2Agent, DEFAULT_CONFIG -__all__ = ["DDPG2Agent", "ApexDDPG2Agent", "DEFAULT_CONFIG"] +__all__ = ["DDPG2Agent", "DEFAULT_CONFIG"] diff --git a/python/ray/rllib/ddpg2/ddpg.py b/python/ray/rllib/ddpg2/ddpg.py index c3bee0cbd26ae..0de2a865f8ea5 100644 --- a/python/ray/rllib/ddpg2/ddpg.py +++ b/python/ray/rllib/ddpg2/ddpg.py @@ -2,267 +2,111 @@ from __future__ import division from __future__ import print_function -import pickle -import os - import numpy as np -import tensorflow as tf import ray -from ray.rllib import optimizers -from ray.rllib.ddpg2.ddpg_evaluator import DDPGEvaluator from ray.rllib.agent import Agent +from ray.rllib.ddpg2.ddpg_evaluator import DDPGEvaluator, RemoteDDPGEvaluator +from ray.rllib.optimizers import LocalSyncReplayOptimizer from ray.tune.result import TrainingResult -OPTIMIZER_SHARED_CONFIGS = [ - "buffer_size", "prioritized_replay", "prioritized_replay_alpha", - "prioritized_replay_beta", "prioritized_replay_eps", "sample_batch_size", - "train_batch_size", "learning_starts", "clip_rewards" -] - -DEFAULT_CONFIG = dict( - # === Model === - # Hidden layer sizes of the policy networks - actor_hiddens=[64, 64], - # Hidden layer sizes of the policy networks - critic_hiddens=[64, 64], - # N-step Q learning - n_step=1, - # Config options to pass to the model constructor - model={}, - # Discount factor for the MDP - gamma=0.99, - # Arguments to pass to the env creator - env_config={}, - - # === Exploration === - # Max num timesteps for annealing schedules. Exploration is annealed from - # 1.0 to exploration_fraction over this number of timesteps scaled by - # exploration_fraction - schedule_max_timesteps=100000, - # Number of env steps to optimize for before returning - timesteps_per_iteration=1000, - # Fraction of entire training period over which the exploration rate is - # annealed - exploration_fraction=0.1, - # Final value of random action probability - exploration_final_eps=0.02, - # OU-noise scale - noise_scale=0.1, - # theta - exploration_theta=0.15, - # sigma - exploration_sigma=0.2, - # Update the target network every `target_network_update_freq` steps. - target_network_update_freq=0, - # Update the target by \tau * policy + (1-\tau) * target_policy - tau=0.002, - # Whether to start with random actions instead of noops. - random_starts=True, - - # === Replay buffer === - # Size of the replay buffer. Note that if async_updates is set, then - # each worker will have a replay buffer of this size. - buffer_size=50000, - # If True prioritized replay buffer will be used. - prioritized_replay=True, - # Alpha parameter for prioritized replay buffer. - prioritized_replay_alpha=0.6, - # Beta parameter for sampling from prioritized replay buffer. - prioritized_replay_beta=0.4, - # Epsilon to add to the TD errors when updating priorities. - prioritized_replay_eps=1e-6, - # Whether to clip rewards to [-1, 1] prior to adding to the replay buffer. - clip_rewards=True, - - # === Optimization === - # Learning rate for adam optimizer - actor_lr=1e-4, - critic_lr=1e-3, - # If True, use huber loss instead of squared loss for critic network - # Conventionally, no need to clip gradients if using a huber loss - use_huber=False, - # Threshold of a huber loss - huber_threshold=1.0, - # Weights for L2 regularization - l2_reg=1e-6, - # If not None, clip gradients during optimization at this value - grad_norm_clipping=None, - # How many steps of the model to sample before learning starts. - learning_starts=1500, - # Update the replay buffer with this many samples at once. Note that this - # setting applies per-worker if num_workers > 1. - sample_batch_size=1, - # Size of a batched sampled from replay buffer for training. Note that - # if async_updates is set, then each worker returns gradients for a - # batch of this size. - train_batch_size=256, - # Smooth the current average reward over this many previous episodes. - smoothing_num_episodes=100, +DEFAULT_CONFIG = { + # Actor learning rate + "actor_lr": 0.0001, + # Critic learning rate + "critic_lr": 0.001, + # Arguments to pass in to env creator + "env_config": {}, + # MDP Discount factor + "gamma": 0.99, + # Number of steps after which the rollout gets cut + "horizon": 500, + + # Whether to include parameter noise + "noise_add": True, + # Linear decay of exploration policy + "noise_epsilon": 0.0002, + # Parameters for noise process + "noise_parameters": { + "mu": 0, + "sigma": 0.2, + "theta": 0.15, + }, - # === Tensorflow === - # Arguments to pass to tensorflow - tf_session_args={ - "device_count": { - "CPU": 2 - }, - "log_device_placement": False, - "allow_soft_placement": True, - "gpu_options": { - "allow_growth": True - }, - "inter_op_parallelism_threads": 1, - "intra_op_parallelism_threads": 1, + # Number of local steps taken for each call to sample + "num_local_steps": 1, + # Number of workers (excluding master) + "num_workers": 0, + + "optimizer": { + # Replay buffer size + "buffer_size": 10000, + # Number of steps in warm-up phase before learning starts + "learning_starts": 500, + # Whether to clip rewards + "clip_rewards": False, + # Whether to use prioritized replay + "prioritized_replay": False, + # Size of batch sampled from replay buffer + "train_batch_size": 64, }, - # === Parallelism === - # Number of workers for collecting samples with. This only makes sense - # to increase if your environment is particularly slow to sample, or if - # you're using the Async or Ape-X optimizers. - num_workers=0, - # Whether to allocate GPUs for workers (if > 0). - num_gpus_per_worker=0, - # Optimizer class to use. - optimizer_class="LocalSyncReplayOptimizer", - # Config to pass to the optimizer. - optimizer_config=dict(), - # Whether to use a distribution of epsilons across workers for exploration. - per_worker_exploration=False, - # Whether to compute priorities on workers. - worker_side_prioritization=False) + # Controls how fast target networks move + "tau": 0.001, + # Number of steps taken per training iteration + "train_steps": 600, +} class DDPG2Agent(Agent): _agent_name = "DDPG2" - _allow_unknown_subkeys = [ - "model", "optimizer", "tf_session_args", "env_config" - ] _default_config = DEFAULT_CONFIG def _init(self): - self.local_evaluator = DDPGEvaluator(self.registry, self.env_creator, - self.config, self.logdir, 0) - remote_cls = ray.remote( - num_cpus=1, - num_gpus=self.config["num_gpus_per_worker"])(DDPGEvaluator) + self.local_evaluator = DDPGEvaluator( + self.registry, self.env_creator, self.config) self.remote_evaluators = [ - remote_cls.remote(self.registry, self.env_creator, self.config, - self.logdir, i) - for i in range(self.config["num_workers"]) - ] - - for k in OPTIMIZER_SHARED_CONFIGS: - if k not in self.config["optimizer_config"]: - self.config["optimizer_config"][k] = self.config[k] - - self.optimizer = getattr(optimizers, self.config["optimizer_class"])( - self.config["optimizer_config"], self.local_evaluator, + RemoteDDPGEvaluator.remote( + self.registry, self.env_creator, self.config) + for _ in range(self.config["num_workers"])] + self.optimizer = LocalSyncReplayOptimizer( + self.config["optimizer"], self.local_evaluator, self.remote_evaluators) - self.saver = tf.train.Saver(max_to_keep=None) - self.last_target_update_ts = 0 - self.num_target_updates = 0 - - @property - def global_timestep(self): - return self.optimizer.num_steps_sampled - - def update_target_if_needed(self): - if self.global_timestep - self.last_target_update_ts > \ - self.config["target_network_update_freq"]: - self.local_evaluator.update_target() - self.last_target_update_ts = self.global_timestep - self.num_target_updates += 1 - def _train(self): - start_timestep = self.global_timestep - - while (self.global_timestep - start_timestep < - self.config["timesteps_per_iteration"]): - + for _ in range(self.config["train_steps"]): self.optimizer.step() - self.update_target_if_needed() - - self.local_evaluator.set_global_timestep(self.global_timestep) - for e in self.remote_evaluators: - e.set_global_timestep.remote(self.global_timestep) - - return self._train_stats(start_timestep) - - def _train_stats(self, start_timestep): - if self.remote_evaluators: - stats = ray.get([e.stats.remote() for e in self.remote_evaluators]) - else: - stats = self.local_evaluator.stats() - if not isinstance(stats, list): - stats = [stats] - - mean_100ep_reward = 0.0 - mean_100ep_length = 0.0 - num_episodes = 0 - explorations = [] - - if self.config["per_worker_exploration"]: - # Return stats from workers with the lowest 20% of exploration - test_stats = stats[-int(max(1, len(stats) * 0.2)):] + # update target + if self.optimizer.num_steps_trained > 0: + self.local_evaluator.update_target() + + # generate training result + return self._fetch_metrics() + + def _fetch_metrics(self): + episode_rewards = [] + episode_lengths = [] + if self.config["num_workers"] > 0: + metric_lists = [a.get_completed_rollout_metrics.remote() + for a in self.remote_evaluators] + for metrics in metric_lists: + for episode in ray.get(metrics): + episode_lengths.append(episode.episode_length) + episode_rewards.append(episode.episode_reward) else: - test_stats = stats + metrics = self.local_evaluator.get_completed_rollout_metrics() + for episode in metrics: + episode_lengths.append(episode.episode_length) + episode_rewards.append(episode.episode_reward) - for s in test_stats: - mean_100ep_reward += s["mean_100ep_reward"] / len(test_stats) - mean_100ep_length += s["mean_100ep_length"] / len(test_stats) - - for s in stats: - num_episodes += s["num_episodes"] - explorations.append(s["exploration"]) - - opt_stats = self.optimizer.stats() + avg_reward = (np.mean(episode_rewards)) + avg_length = (np.mean(episode_lengths)) + timesteps = np.sum(episode_lengths) result = TrainingResult( - episode_reward_mean=mean_100ep_reward, - episode_len_mean=mean_100ep_length, - episodes_total=num_episodes, - timesteps_this_iter=self.global_timestep - start_timestep, - info=dict({ - "min_exploration": min(explorations), - "max_exploration": max(explorations), - "num_target_updates": self.num_target_updates, - }, **opt_stats)) + episode_reward_mean=avg_reward, + episode_len_mean=avg_length, + timesteps_this_iter=timesteps, + info={}) return result - - def _stop(self): - # workaround for https://github.com/ray-project/ray/issues/1516 - for ev in self.remote_evaluators: - ev.__ray_terminate__.remote(ev._ray_actor_id.id()) - - def _save(self, checkpoint_dir): - checkpoint_path = self.saver.save( - self.local_evaluator.sess, - os.path.join(checkpoint_dir, "checkpoint"), - global_step=self.iteration) - extra_data = [ - self.local_evaluator.save(), - ray.get([e.save.remote() for e in self.remote_evaluators]), - self.optimizer.save(), self.num_target_updates, - self.last_target_update_ts - ] - pickle.dump(extra_data, open(checkpoint_path + ".extra_data", "wb")) - return checkpoint_path - - def _restore(self, checkpoint_path): - self.saver.restore(self.local_evaluator.sess, checkpoint_path) - extra_data = pickle.load(open(checkpoint_path + ".extra_data", "rb")) - self.local_evaluator.restore(extra_data[0]) - ray.get([ - e.restore.remote(d) - for (d, e) in zip(extra_data[1], self.remote_evaluators) - ]) - self.optimizer.restore(extra_data[2]) - self.num_target_updates = extra_data[3] - self.last_target_update_ts = extra_data[4] - - def compute_action(self, observation): - return self.local_evaluator.ddpg_graph.act(self.local_evaluator.sess, - np.array(observation)[None], - 0.0)[0] diff --git a/python/ray/rllib/ddpg2/ddpg_evaluator.py b/python/ray/rllib/ddpg2/ddpg_evaluator.py index e177a37a13a26..8a5ab5ed3f3ad 100644 --- a/python/ray/rllib/ddpg2/ddpg_evaluator.py +++ b/python/ray/rllib/ddpg2/ddpg_evaluator.py @@ -2,185 +2,74 @@ from __future__ import division from __future__ import print_function -from gym.spaces import Box import numpy as np -import tensorflow as tf import ray -from ray.rllib.utils.error import UnsupportedSpaceException -from ray.rllib.ddpg2 import models -from ray.rllib.dqn.common.schedules import ConstantSchedule, LinearSchedule -from ray.rllib.optimizers import SampleBatch, PolicyEvaluator -from ray.rllib.utils.compression import pack -from ray.rllib.dqn.dqn_evaluator import adjust_nstep -from ray.rllib.dqn.common.wrappers import wrap_dqn +from ray.rllib.ddpg2.models import DDPGModel +from ray.rllib.models.catalog import ModelCatalog +from ray.rllib.optimizers import PolicyEvaluator +from ray.rllib.utils.filter import NoFilter +from ray.rllib.utils.process_rollout import process_rollout +from ray.rllib.utils.sampler import SyncSampler class DDPGEvaluator(PolicyEvaluator): - """The base DDPG Evaluator.""" - - def __init__(self, registry, env_creator, config, logdir, worker_index): - env = env_creator(config["env_config"]) - env = wrap_dqn(registry, env, config["model"], config["random_starts"]) - self.env = env - self.config = config - - # when env.action_space is of Box type, e.g., Pendulum-v0 - # action_space.low is [-2.0], high is [2.0] - # take action by calling, e.g., env.step([3.5]) - if not isinstance(env.action_space, Box): - raise UnsupportedSpaceException( - "Action space {} is not supported for DDPG.".format( - env.action_space)) - - tf_config = tf.ConfigProto(**config["tf_session_args"]) - self.sess = tf.Session(config=tf_config) - self.ddpg_graph = models.DDPGGraph(registry, env, config, logdir) - - # Use either a different `eps` per worker, or a linear schedule. - if config["per_worker_exploration"]: - assert config["num_workers"] > 1, "This requires multiple workers" - self.exploration = ConstantSchedule( - config["noise_scale"] * 0.4 ** - (1 + worker_index / float(config["num_workers"] - 1) * 7)) - else: - self.exploration = LinearSchedule( - schedule_timesteps=int(config["exploration_fraction"] * - config["schedule_max_timesteps"]), - initial_p=config["noise_scale"] * 1.0, - final_p=config["noise_scale"] * - config["exploration_final_eps"]) - - # Initialize the parameters and copy them to the target network. - self.sess.run(tf.global_variables_initializer()) - # hard instead of soft - self.ddpg_graph.update_target(self.sess, 1.0) - self.global_timestep = 0 - self.local_timestep = 0 - - # Note that this encompasses both the policy and Q-value networks and - # their corresponding target networks - self.variables = ray.experimental.TensorFlowVariables( - tf.group(self.ddpg_graph.q_tp0, self.ddpg_graph.q_tp1), self.sess) - - self.episode_rewards = [0.0] - self.episode_lengths = [0.0] - self.saved_mean_reward = None - - self.obs = self.env.reset() - - def set_global_timestep(self, global_timestep): - self.global_timestep = global_timestep - def update_target(self): - self.ddpg_graph.update_target(self.sess) + def __init__(self, registry, env_creator, config): + self.env = ModelCatalog.get_preprocessor_as_wrapper( + registry, env_creator(config["env_config"])) + + # contains model, target_model + self.model = DDPGModel(registry, self.env, config) + + self.sampler = SyncSampler( + self.env, self.model.model, NoFilter(), + config["num_local_steps"], horizon=config["horizon"]) def sample(self): - obs, actions, rewards, new_obs, dones = [], [], [], [], [] - for _ in range( - self.config["sample_batch_size"] + self.config["n_step"] - 1): - ob, act, rew, ob1, done = self._step(self.global_timestep) - obs.append(ob) - actions.append(act) - rewards.append(rew) - new_obs.append(ob1) - dones.append(done) - - # N-step Q adjustments - if self.config["n_step"] > 1: - # Adjust for steps lost from truncation - self.local_timestep -= (self.config["n_step"] - 1) - adjust_nstep(self.config["n_step"], self.config["gamma"], obs, - actions, rewards, new_obs, dones) - - batch = SampleBatch({ - "obs": [pack(np.array(o)) for o in obs], - "actions": actions, - "rewards": rewards, - "new_obs": [pack(np.array(o)) for o in new_obs], - "dones": dones, - "weights": np.ones_like(rewards) - }) - assert (batch.count == self.config["sample_batch_size"]) - - # Prioritize on the worker side - if self.config["worker_side_prioritization"]: - td_errors = self.ddpg_graph.compute_td_error( - self.sess, obs, batch["actions"], batch["rewards"], new_obs, - batch["dones"], batch["weights"]) - new_priorities = ( - np.abs(td_errors) + self.config["prioritized_replay_eps"]) - batch.data["weights"] = new_priorities - - return batch + """Returns a batch of samples.""" + + rollout = self.sampler.get_data() + rollout.data["weights"] = np.ones_like(rollout.data["rewards"]) + + # since each sample is one step, no discounting needs to be applied; + # this does not involve config["gamma"] + samples = process_rollout( + rollout, NoFilter(), + gamma=1.0, use_gae=False) + + return samples + + def update_target(self): + """Updates target critic and target actor.""" + self.model.update_target() def compute_gradients(self, samples): - td_err, grads = self.ddpg_graph.compute_gradients( - self.sess, samples["obs"], samples["actions"], samples["rewards"], - samples["new_obs"], samples["dones"], samples["weights"]) - return grads, {"td_error": td_err} + """Returns critic, actor gradients.""" + return self.model.compute_gradients(samples) def apply_gradients(self, grads): - self.ddpg_graph.apply_gradients(self.sess, grads) + """Applies gradients to evaluator weights.""" + self.model.apply_gradients(grads) def compute_apply(self, samples): - td_error = self.ddpg_graph.compute_apply( - self.sess, samples["obs"], samples["actions"], samples["rewards"], - samples["new_obs"], samples["dones"], samples["weights"]) - return {"td_error": td_error} + grads, _ = self.compute_gradients(samples) + self.apply_gradients(grads) def get_weights(self): - return self.variables.get_weights() + """Returns model weights.""" + return self.model.get_weights() def set_weights(self, weights): - self.variables.set_weights(weights) - - def _step(self, global_timestep): - """Takes a single step, and returns the result of the step.""" - action = self.ddpg_graph.act( - self.sess, - np.array(self.obs)[None], - self.exploration.value(global_timestep))[0] - new_obs, rew, done, _ = self.env.step(action) - ret = (self.obs, action, rew, new_obs, float(done)) - self.obs = new_obs - self.episode_rewards[-1] += rew - self.episode_lengths[-1] += 1 - if done: - self.obs = self.env.reset() - self.episode_rewards.append(0.0) - self.episode_lengths.append(0.0) - # reset UO noise for each episode - self.ddpg_graph.reset_noise(self.sess) - - self.local_timestep += 1 - return ret - - def stats(self): - n = self.config["smoothing_num_episodes"] + 1 - mean_100ep_reward = round(np.mean(self.episode_rewards[-n:-1]), 5) - mean_100ep_length = round(np.mean(self.episode_lengths[-n:-1]), 5) - exploration = self.exploration.value(self.global_timestep) - return { - "mean_100ep_reward": mean_100ep_reward, - "mean_100ep_length": mean_100ep_length, - "num_episodes": len(self.episode_rewards), - "exploration": exploration, - "local_timestep": self.local_timestep, - } - - def save(self): - return [ - self.exploration, self.episode_rewards, self.episode_lengths, - self.saved_mean_reward, self.obs, self.global_timestep, - self.local_timestep - ] - - def restore(self, data): - self.exploration = data[0] - self.episode_rewards = data[1] - self.episode_lengths = data[2] - self.saved_mean_reward = data[3] - self.obs = data[4] - self.global_timestep = data[5] - self.local_timestep = data[6] + """Sets model weights.""" + self.model.set_weights(weights) + + def get_completed_rollout_metrics(self): + """Returns metrics on previously completed rollouts. + + Calling this clears the queue of completed rollout metrics. + """ + return self.sampler.get_metrics() + + +RemoteDDPGEvaluator = ray.remote(DDPGEvaluator) diff --git a/python/ray/rllib/ddpg2/models.py b/python/ray/rllib/ddpg2/models.py index d58f37dc64174..e785f518f5419 100644 --- a/python/ray/rllib/ddpg2/models.py +++ b/python/ray/rllib/ddpg2/models.py @@ -3,389 +3,239 @@ from __future__ import print_function import numpy as np - import tensorflow as tf -import tensorflow.contrib.layers as layers - -from ray.rllib.models import ModelCatalog - - -def _build_p_network(registry, inputs, dim_actions, config): - """ - map an observation (i.e., state) to an action where - each entry takes value from (0, 1) due to the sigmoid function - """ - frontend = ModelCatalog.get_model(registry, inputs, 1, config["model"]) - - hiddens = config["actor_hiddens"] - action_out = frontend.last_layer - for hidden in hiddens: - action_out = layers.fully_connected( - action_out, num_outputs=hidden, activation_fn=tf.nn.relu) - # Use sigmoid layer to bound values within (0, 1) - # shape of action_scores is [batch_size, dim_actions] - action_scores = layers.fully_connected( - action_out, num_outputs=dim_actions, activation_fn=tf.nn.sigmoid) - - return action_scores - - -# As a stochastic policy for inference, but a deterministic policy for training -# thus ignore batch_size issue when constructing a stochastic action -def _build_action_network(p_values, low_action, high_action, stochastic, eps, - theta, sigma): - # shape is [None, dim_action] - deterministic_actions = (high_action - low_action) * p_values + low_action - - exploration_sample = tf.get_variable( - name="ornstein_uhlenbeck", - dtype=tf.float32, - initializer=low_action.size * [.0], - trainable=False) - normal_sample = tf.random_normal( - shape=[low_action.size], mean=0.0, stddev=1.0) - exploration_value = tf.assign_add( - exploration_sample, - theta * (.0 - exploration_sample) + sigma * normal_sample) - stochastic_actions = deterministic_actions + eps * ( - high_action - low_action) * exploration_value - - return tf.cond(stochastic, lambda: stochastic_actions, - lambda: deterministic_actions) - - -def _build_q_network(registry, inputs, action_inputs, config): - frontend = ModelCatalog.get_model(registry, inputs, 1, config["model"]) - - hiddens = config["critic_hiddens"] - - q_out = tf.concat([frontend.last_layer, action_inputs], axis=1) - for hidden in hiddens: - q_out = layers.fully_connected( - q_out, num_outputs=hidden, activation_fn=tf.nn.relu) - q_scores = layers.fully_connected(q_out, num_outputs=1, activation_fn=None) - - return q_scores - - -def _huber_loss(x, delta=1.0): - """Reference: https://en.wikipedia.org/wiki/Huber_loss""" - return tf.where( - tf.abs(x) < delta, - tf.square(x) * 0.5, delta * (tf.abs(x) - 0.5 * delta)) - - -def _minimize_and_clip(optimizer, objective, var_list, clip_val=10): - """Minimized `objective` using `optimizer` w.r.t. variables in - `var_list` while ensure the norm of the gradients for each - variable is clipped to `clip_val` - """ - gradients = optimizer.compute_gradients(objective, var_list=var_list) - for i, (grad, var) in enumerate(gradients): - if grad is not None: - gradients[i] = (tf.clip_by_norm(grad, clip_val), var) - return gradients - - -def _scope_vars(scope, trainable_only=False): - """ - Get variables inside a scope - The scope can be specified as a string - - Parameters - ---------- - scope: str or VariableScope - scope in which the variables reside. - trainable_only: bool - whether or not to return only the variables that were marked as - trainable. - - Returns - ------- - vars: [tf.Variable] - list of variables in `scope`. - """ - return tf.get_collection( - tf.GraphKeys.TRAINABLE_VARIABLES - if trainable_only else tf.GraphKeys.VARIABLES, - scope=scope if isinstance(scope, str) else scope.name) - - -class ModelAndLoss(object): - """Holds the model and loss function. - - Both graphs are necessary in order for the multi-gpu SGD implementation - to create towers on each device. - """ - - def __init__(self, registry, dim_actions, low_action, high_action, config, - obs_t, act_t, rew_t, obs_tp1, done_mask, importance_weights): - # p network evaluation - with tf.variable_scope("p_func", reuse=True) as scope: - self.p_t = _build_p_network(registry, obs_t, dim_actions, config) - - # target p network evaluation - with tf.variable_scope("target_p_func") as scope: - self.p_tp1 = _build_p_network(registry, obs_tp1, dim_actions, - config) - self.target_p_func_vars = _scope_vars(scope.name) - - # Action outputs - with tf.variable_scope("a_func", reuse=True): - deterministic_flag = tf.constant(value=False, dtype=tf.bool) - zero_eps = tf.constant(value=.0, dtype=tf.float32) - output_actions = _build_action_network( - self.p_t, low_action, high_action, deterministic_flag, - zero_eps, config["exploration_theta"], - config["exploration_sigma"]) - - output_actions_estimated = _build_action_network( - self.p_tp1, low_action, high_action, deterministic_flag, - zero_eps, config["exploration_theta"], - config["exploration_sigma"]) - - # q network evaluation - with tf.variable_scope("q_func") as scope: - self.q_t = _build_q_network(registry, obs_t, act_t, config) - self.q_func_vars = _scope_vars(scope.name) - with tf.variable_scope("q_func", reuse=True): - self.q_tp0 = _build_q_network(registry, obs_t, output_actions, - config) - - # target q network evalution - with tf.variable_scope("target_q_func") as scope: - self.q_tp1 = _build_q_network(registry, obs_tp1, - output_actions_estimated, config) - self.target_q_func_vars = _scope_vars(scope.name) - - q_t_selected = tf.squeeze(self.q_t, axis=len(self.q_t.shape) - 1) - - q_tp1_best = tf.squeeze( - input=self.q_tp1, axis=len(self.q_tp1.shape) - 1) - q_tp1_best_masked = (1.0 - done_mask) * q_tp1_best - - # compute RHS of bellman equation - q_t_selected_target = ( - rew_t + config["gamma"]**config["n_step"] * q_tp1_best_masked) - - # compute the error (potentially clipped) - self.td_error = q_t_selected - tf.stop_gradient(q_t_selected_target) - if config.get("use_huber"): - errors = _huber_loss(self.td_error, config.get("huber_threshold")) - else: - errors = 0.5 * tf.square(self.td_error) - - weighted_error = tf.reduce_mean(importance_weights * errors) - - self.loss = weighted_error - - # for policy gradient - self.actor_loss = -1.0 * tf.reduce_mean(self.q_tp0) - - -class DDPGGraph(object): - def __init__(self, registry, env, config, logdir): - self.env = env - dim_actions = env.action_space.shape[0] - low_action = env.action_space.low - high_action = env.action_space.high - actor_optimizer = tf.train.AdamOptimizer( - learning_rate=config["actor_lr"]) - critic_optimizer = tf.train.AdamOptimizer( - learning_rate=config["critic_lr"]) - - # Action inputs - self.stochastic = tf.placeholder(tf.bool, (), name="stochastic") - self.eps = tf.placeholder(tf.float32, (), name="eps") - self.cur_observations = tf.placeholder( - tf.float32, shape=(None, ) + env.observation_space.shape) - - # Actor: P (policy) network - p_scope_name = "p_func" - with tf.variable_scope(p_scope_name) as scope: - p_values = _build_p_network(registry, self.cur_observations, - dim_actions, config) - p_func_vars = _scope_vars(scope.name) - - # Action outputs - a_scope_name = "a_func" - with tf.variable_scope(a_scope_name): - self.output_actions = _build_action_network( - p_values, low_action, high_action, self.stochastic, self.eps, - config["exploration_theta"], config["exploration_sigma"]) - - with tf.variable_scope(a_scope_name, reuse=True): - exploration_sample = tf.get_variable(name="ornstein_uhlenbeck") - self.reset_noise_op = tf.assign(exploration_sample, - dim_actions * [.0]) - - # Replay inputs - self.obs_t = tf.placeholder( - tf.float32, - shape=(None, ) + env.observation_space.shape, - name="observation") - self.act_t = tf.placeholder( - tf.float32, shape=(None, ) + env.action_space.shape, name="action") - self.rew_t = tf.placeholder(tf.float32, [None], name="reward") - self.obs_tp1 = tf.placeholder( - tf.float32, shape=(None, ) + env.observation_space.shape) - self.done_mask = tf.placeholder(tf.float32, [None], name="done") - self.importance_weights = tf.placeholder( - tf.float32, [None], name="weight") - - def build_loss(obs_t, act_t, rew_t, obs_tp1, done_mask, - importance_weights): - return ModelAndLoss(registry, dim_actions, low_action, high_action, - config, obs_t, act_t, rew_t, obs_tp1, - done_mask, importance_weights) - - self.loss_inputs = [ - ("obs", self.obs_t), - ("actions", self.act_t), - ("rewards", self.rew_t), - ("new_obs", self.obs_tp1), - ("dones", self.done_mask), - ("weights", self.importance_weights), - ] - - loss_obj = build_loss(self.obs_t, self.act_t, self.rew_t, self.obs_tp1, - self.done_mask, self.importance_weights) - - self.build_loss = build_loss - - actor_loss = loss_obj.actor_loss - weighted_error = loss_obj.loss - q_func_vars = loss_obj.q_func_vars - target_p_func_vars = loss_obj.target_p_func_vars - target_q_func_vars = loss_obj.target_q_func_vars - self.p_t = loss_obj.p_t - self.q_t = loss_obj.q_t - self.q_tp0 = loss_obj.q_tp0 - self.q_tp1 = loss_obj.q_tp1 - self.td_error = loss_obj.td_error - - if config["l2_reg"] is not None: - for var in p_func_vars: - if "bias" not in var.name: - actor_loss += config["l2_reg"] * 0.5 * tf.nn.l2_loss(var) - for var in q_func_vars: - if "bias" not in var.name: - weighted_error += config["l2_reg"] * 0.5 * tf.nn.l2_loss( - var) - - # compute optimization op (potentially with gradient clipping) - if config["grad_norm_clipping"] is not None: - self.actor_grads_and_vars = _minimize_and_clip( - actor_optimizer, - actor_loss, - var_list=p_func_vars, - clip_val=config["grad_norm_clipping"]) - self.critic_grads_and_vars = _minimize_and_clip( - critic_optimizer, - weighted_error, - var_list=q_func_vars, - clip_val=config["grad_norm_clipping"]) - else: - self.actor_grads_and_vars = actor_optimizer.compute_gradients( - actor_loss, var_list=p_func_vars) - self.critic_grads_and_vars = critic_optimizer.compute_gradients( - weighted_error, var_list=q_func_vars) - self.actor_grads_and_vars = [(g, v) - for (g, v) in self.actor_grads_and_vars - if g is not None] - self.critic_grads_and_vars = [(g, v) - for (g, v) in self.critic_grads_and_vars - if g is not None] - self.grads_and_vars = ( - self.actor_grads_and_vars + self.critic_grads_and_vars) - self.grads = [g for (g, v) in self.grads_and_vars] - self.actor_train_expr = actor_optimizer.apply_gradients( - self.actor_grads_and_vars) - self.critic_train_expr = critic_optimizer.apply_gradients( - self.critic_grads_and_vars) - - # update_target_fn will be called periodically to copy Q network to - # target Q network - self.tau_value = config.get("tau") - self.tau = tf.placeholder(tf.float32, (), name="tau") - update_target_expr = [] - for var, var_target in zip( - sorted(q_func_vars, key=lambda v: v.name), - sorted(target_q_func_vars, key=lambda v: v.name)): - update_target_expr.append( - var_target.assign(self.tau * var + - (1.0 - self.tau) * var_target)) - for var, var_target in zip( - sorted(p_func_vars, key=lambda v: v.name), - sorted(target_p_func_vars, key=lambda v: v.name)): - update_target_expr.append( - var_target.assign(self.tau * var + - (1.0 - self.tau) * var_target)) - self.update_target_expr = tf.group(*update_target_expr) - - # support both hard and soft sync - def update_target(self, sess, tau=None): - return sess.run( - self.update_target_expr, - feed_dict={self.tau: tau or self.tau_value}) - - def act(self, sess, obs, eps, stochastic=True): - return sess.run( - self.output_actions, - feed_dict={ - self.cur_observations: obs, - self.stochastic: stochastic, - self.eps: eps - }) - - def compute_gradients(self, sess, obs_t, act_t, rew_t, obs_tp1, done_mask, - importance_weights): - td_err, grads = sess.run( - [self.td_error, self.grads], - feed_dict={ - self.obs_t: obs_t, - self.act_t: act_t, - self.rew_t: rew_t, - self.obs_tp1: obs_tp1, - self.done_mask: done_mask, - self.importance_weights: importance_weights - }) - return td_err, grads - - def compute_td_error(self, sess, obs_t, act_t, rew_t, obs_tp1, done_mask, - importance_weights): - td_err = sess.run( - self.td_error, - feed_dict={ - self.obs_t: [np.array(ob) for ob in obs_t], - self.act_t: act_t, - self.rew_t: rew_t, - self.obs_tp1: [np.array(ob) for ob in obs_tp1], - self.done_mask: done_mask, - self.importance_weights: importance_weights - }) - return td_err - - def apply_gradients(self, sess, grads): - assert len(grads) == len(self.grads_and_vars) - feed_dict = {ph: g for (g, ph) in zip(grads, self.grads)} - sess.run( - [self.critic_train_expr, self.actor_train_expr], - feed_dict=feed_dict) - - def compute_apply(self, sess, obs_t, act_t, rew_t, obs_tp1, done_mask, - importance_weights): - td_err, _, _ = sess.run( - [self.td_error, self.critic_train_expr, self.actor_train_expr], - feed_dict={ - self.obs_t: obs_t, - self.act_t: act_t, - self.rew_t: rew_t, - self.obs_tp1: obs_tp1, - self.done_mask: done_mask, - self.importance_weights: importance_weights - }) - return td_err - - def reset_noise(self, sess): - sess.run(self.reset_noise_op) + +from ray.experimental.tfutils import TensorFlowVariables +from ray.rllib.models.ddpgnet import DDPGActor, DDPGCritic +from ray.rllib.ddpg2.random_process import OrnsteinUhlenbeckProcess + + +class DDPGModel(): + def __init__(self, registry, env, config): + self.config = config + self.sess = tf.Session() + + with tf.variable_scope("model"): + self.model = DDPGActorCritic( + registry, env, self.config, self.sess) + with tf.variable_scope("target_model"): + self.target_model = DDPGActorCritic( + registry, env, self.config, self.sess) + self._setup_gradients() + self._setup_target_updates() + + self.initialize() + self._initialize_target_weights() + + def initialize(self): + self.sess.run(tf.global_variables_initializer()) + + def _initialize_target_weights(self): + """Set initial target weights to match model weights.""" + a_updates = [] + for var, target_var in zip( + self.model.actor_var_list, self.target_model.actor_var_list): + a_updates.append(tf.assign(target_var, var)) + actor_updates = tf.group(*a_updates) + + c_updates = [] + for var, target_var in zip( + self.model.critic_var_list, self.target_model.critic_var_list): + c_updates.append(tf.assign(target_var, var)) + critic_updates = tf.group(*c_updates) + self.sess.run([actor_updates, critic_updates]) + + def _setup_gradients(self): + """Setup critic and actor gradients.""" + self.critic_grads = tf.gradients( + self.model.critic_loss, self.model.critic_var_list) + c_grads_and_vars = list(zip( + self.critic_grads, self.model.critic_var_list)) + c_opt = tf.train.AdamOptimizer(self.config["critic_lr"]) + self._apply_c_gradients = c_opt.apply_gradients(c_grads_and_vars) + + self.actor_grads = tf.gradients( + -self.model.cn_for_loss, self.model.actor_var_list) + a_grads_and_vars = list(zip( + self.actor_grads, self.model.actor_var_list)) + a_opt = tf.train.AdamOptimizer(self.config["actor_lr"]) + self._apply_a_gradients = a_opt.apply_gradients(a_grads_and_vars) + + def compute_gradients(self, samples): + """ Returns gradient w.r.t. samples.""" + # actor gradients + actor_actions = self.sess.run( + self.model.output_action, + feed_dict={self.model.obs: samples["obs"]} + ) + + actor_feed_dict = { + self.model.obs: samples["obs"], + self.model.output_action: actor_actions, + } + self.actor_grads = [g for g in self.actor_grads if g is not None] + actor_grad = self.sess.run(self.actor_grads, feed_dict=actor_feed_dict) + + # feed samples into target actor + target_Q_act = self.sess.run( + self.target_model.output_action, + feed_dict={self.target_model.obs: samples["new_obs"]} + ) + target_Q_dict = { + self.target_model.obs: samples["new_obs"], + self.target_model.act: target_Q_act, + } + + target_Q = self.sess.run( + self.target_model.critic_eval, feed_dict=target_Q_dict) + + # critic gradients + critic_feed_dict = { + self.model.obs: samples["obs"], + self.model.act: samples["actions"], + self.model.reward: samples["rewards"], + self.model.target_Q: target_Q, + } + self.critic_grads = [g for g in self.critic_grads if g is not None] + critic_grad = self.sess.run( + self.critic_grads, feed_dict=critic_feed_dict) + return (critic_grad, actor_grad), {} + + def apply_gradients(self, grads): + """Applies gradients to evaluator weights.""" + c_grads, a_grads = grads + critic_feed_dict = dict(zip(self.critic_grads, c_grads)) + self.sess.run(self._apply_c_gradients, feed_dict=critic_feed_dict) + actor_feed_dict = dict(zip(self.actor_grads, a_grads)) + self.sess.run(self._apply_a_gradients, feed_dict=actor_feed_dict) + + def get_weights(self): + """Returns model weights, target model weights.""" + return self.model.get_weights(), self.target_model.get_weights() + + def set_weights(self, weights): + """Sets model and target model weights.""" + model_weights, target_model_weights = weights + self.model.set_weights(model_weights) + self.target_model.set_weights(target_model_weights) + + def _setup_target_updates(self): + """Set up target actor and critic updates.""" + a_updates = [] + tau = self.config["tau"] + for var, target_var in zip( + self.model.actor_var_list, self.target_model.actor_var_list): + a_updates.append(tf.assign( + target_var, tau * var + (1. - tau) * target_var)) + actor_updates = tf.group(*a_updates) + + c_updates = [] + for var, target_var in zip( + self.model.critic_var_list, self.target_model.critic_var_list): + c_updates.append(tf.assign( + target_var, tau * var + (1. - tau) * target_var)) + critic_updates = tf.group(*c_updates) + self.target_updates = [actor_updates, critic_updates] + + def update_target(self): + """Updates target critic and target actor.""" + self.sess.run(self.target_updates) + + +class DDPGActorCritic(): + other_output = [] + is_recurrent = False + + def __init__(self, registry, env, config, sess): + self.config = config + self.sess = sess + + obs_space = env.observation_space + ac_space = env.action_space + + self.obs_size = int(np.prod(obs_space.shape)) + self.obs = tf.placeholder(tf.float32, [None, self.obs_size]) + self.ac_size = int(np.prod(ac_space.shape)) + self.act = tf.placeholder(tf.float32, [None, self.ac_size]) + self.action_bound = env.action_space.high + # TODO: change action_bound to make more general + + self._setup_actor_network(obs_space, ac_space) + self._setup_critic_network(obs_space, ac_space) + self._setup_critic_loss(ac_space) + + with tf.variable_scope("critic"): + self.critic_var_list = tf.get_collection( + tf.GraphKeys.TRAINABLE_VARIABLES, + tf.get_variable_scope().name + ) + self.critic_vars = TensorFlowVariables(self.critic_loss, + self.sess) + + with tf.variable_scope("actor"): + self.actor_var_list = tf.get_collection( + tf.GraphKeys.TRAINABLE_VARIABLES, + tf.get_variable_scope().name + ) + self.actor_vars = TensorFlowVariables(self.output_action, + self.sess) + + if (self.config["noise_add"]): + params = self.config["noise_parameters"] + self.rand_process = OrnsteinUhlenbeckProcess(size=self.ac_size, + theta=params["theta"], + mu=params["mu"], + sigma=params["sigma"]) + self.epsilon = 1.0 + + def _setup_critic_loss(self, action_space): + """Sets up critic loss.""" + self.target_Q = tf.placeholder(tf.float32, [None, 1], name="target_q") + + # compare critic eval to critic_target (squared loss) + self.reward = tf.placeholder(tf.float32, [None], name="reward") + self.critic_target = tf.expand_dims(self.reward, 1) + \ + self.config['gamma'] * self.target_Q + self.critic_loss = tf.reduce_mean(tf.square( + self.critic_target - self.critic_eval)) + + def _setup_critic_network(self, obs_space, ac_space): + """Sets up Q network.""" + with tf.variable_scope("critic", reuse=tf.AUTO_REUSE): + self.critic_network = DDPGCritic((self.obs, self.act), 1, {}) + self.critic_eval = self.critic_network.outputs + + with tf.variable_scope("critic", reuse=True): + self.cn_for_loss = DDPGCritic( + (self.obs, self.output_action), 1, {}).outputs + + def _setup_actor_network(self, obs_space, ac_space): + """Sets up actor network.""" + with tf.variable_scope("actor", reuse=tf.AUTO_REUSE): + self.actor_network = DDPGActor( + self.obs, self.ac_size, + options={"action_bound": self.action_bound}) + self.output_action = self.actor_network.outputs + + def get_weights(self): + """Returns critic weights, actor weights.""" + return self.critic_vars.get_weights(), self.actor_vars.get_weights() + + def set_weights(self, weights): + """Sets critic and actor weights.""" + critic_weights, actor_weights = weights + self.critic_vars.set_weights(critic_weights) + self.actor_vars.set_weights(actor_weights) + + def compute(self, ob): + """Returns action, given state.""" + flattened_ob = np.reshape(ob, [-1, np.prod(ob.shape)]) + action = self.sess.run(self.output_action, {self.obs: flattened_ob}) + if (self.config["noise_add"]): + action += self.epsilon * self.rand_process.sample() + if (self.epsilon > 0): + self.epsilon -= self.config["noise_epsilon"] + return action[0], {} + + def value(self, *args): + return 0 diff --git a/python/ray/rllib/ddpg/random_process.py b/python/ray/rllib/ddpg2/random_process.py similarity index 100% rename from python/ray/rllib/ddpg/random_process.py rename to python/ray/rllib/ddpg2/random_process.py diff --git a/python/ray/rllib/optimizers/local_sync_replay.py b/python/ray/rllib/optimizers/local_sync_replay.py index 5ba8b6f9aed82..ac430c6a1619e 100644 --- a/python/ray/rllib/optimizers/local_sync_replay.py +++ b/python/ray/rllib/optimizers/local_sync_replay.py @@ -9,6 +9,7 @@ PrioritizedReplayBuffer from ray.rllib.optimizers.policy_optimizer import PolicyOptimizer from ray.rllib.optimizers.sample_batch import SampleBatch +from ray.rllib.utils.compression import pack_if_needed from ray.rllib.utils.filter import RunningStat from ray.rllib.utils.timer import TimerStat @@ -64,7 +65,8 @@ def step(self): batch = self.local_evaluator.sample() for row in batch.rows(): self.replay_buffer.add( - row["obs"], row["actions"], row["rewards"], row["new_obs"], + pack_if_needed(row["obs"]), row["actions"], row["rewards"], + pack_if_needed(row["new_obs"]), row["dones"], row["weights"]) if len(self.replay_buffer) >= self.replay_starts: diff --git a/python/ray/rllib/test/test_checkpoint_restore.py b/python/ray/rllib/test/test_checkpoint_restore.py index f19eecd2ca3e2..9e583c877bb96 100644 --- a/python/ray/rllib/test/test_checkpoint_restore.py +++ b/python/ray/rllib/test/test_checkpoint_restore.py @@ -22,7 +22,7 @@ def get_mean_action(alg, obs): CONFIGS = { "ES": {"episodes_per_batch": 10, "timesteps_per_batch": 100}, "DQN": {}, - "DDPG2": {"noise_scale": 0.0}, + "DDPG": {"noise_scale": 0.0}, "PPO": {"num_sgd_iter": 5, "timesteps_per_batch": 1000}, "A3C": {"use_lstm": False}, } @@ -30,7 +30,7 @@ def get_mean_action(alg, obs): def test(use_object_store, alg_name): cls = get_agent_class(alg_name) - if alg_name == "DDPG2": + if alg_name == "DDPG": alg1 = cls(config=CONFIGS[name], env="Pendulum-v0") alg2 = cls(config=CONFIGS[name], env="Pendulum-v0") else: @@ -48,7 +48,7 @@ def test(use_object_store, alg_name): alg2.restore(alg1.save()) for _ in range(10): - if alg_name == "DDPG2": + if alg_name == "DDPG": obs = np.random.uniform(size=3) else: obs = np.random.uniform(size=4) @@ -59,9 +59,8 @@ def test(use_object_store, alg_name): if __name__ == "__main__": - # https://github.com/ray-project/ray/issues/1062 for enabling ES test too for use_object_store in [False, True]: - for name in ["ES", "DQN", "DDPG2", "PPO", "A3C"]: + for name in ["ES", "DQN", "DDPG", "PPO", "A3C"]: test(use_object_store, name) print("All checkpoint restore tests passed!") diff --git a/python/ray/rllib/test/test_supported_spaces.py b/python/ray/rllib/test/test_supported_spaces.py index 5fa46f559fd6d..2e41c85a0233e 100644 --- a/python/ray/rllib/test/test_supported_spaces.py +++ b/python/ray/rllib/test/test_supported_spaces.py @@ -114,7 +114,7 @@ class ModelSupportedSpaces(unittest.TestCase): def testAll(self): ray.init() stats = {} - check_support("DDPG2", {"timesteps_per_iteration": 1}, stats) + check_support("DDPG", {"timesteps_per_iteration": 1}, stats) check_support("DQN", {"timesteps_per_iteration": 1}, stats) check_support( "A3C", {"num_workers": 1, "optimizer": {"grads_per_step": 1}}, diff --git a/python/ray/rllib/tuned_examples/mountaincarcontinuous-apex-ddpg2.yaml b/python/ray/rllib/tuned_examples/mountaincarcontinuous-apex-ddpg.yaml similarity index 66% rename from python/ray/rllib/tuned_examples/mountaincarcontinuous-apex-ddpg2.yaml rename to python/ray/rllib/tuned_examples/mountaincarcontinuous-apex-ddpg.yaml index 7e5af40ab4e05..82947d872f0f7 100644 --- a/python/ray/rllib/tuned_examples/mountaincarcontinuous-apex-ddpg2.yaml +++ b/python/ray/rllib/tuned_examples/mountaincarcontinuous-apex-ddpg.yaml @@ -1,12 +1,7 @@ # This can be expected to reach 90 reward within ~1.5-2.5m timesteps / ~150-250 seconds on a K40 GPU -mountaincarcontinuous-apex-ddpg-2: +mountaincarcontinuous-apex-ddpg: env: MountainCarContinuous-v0 - run: APEX_DDPG2 - trial_resources: - cpu: 1 - gpu: 1 - extra_cpu: - eval: 4 + spec.config.num_workers + run: APEX_DDPG stop: episode_reward_mean: 90 config: diff --git a/python/ray/rllib/tuned_examples/mountaincarcontinuous-ddpg2.yaml b/python/ray/rllib/tuned_examples/mountaincarcontinuous-ddpg.yaml similarity index 85% rename from python/ray/rllib/tuned_examples/mountaincarcontinuous-ddpg2.yaml rename to python/ray/rllib/tuned_examples/mountaincarcontinuous-ddpg.yaml index 157e7e3a7abba..0a330bb5b57b0 100644 --- a/python/ray/rllib/tuned_examples/mountaincarcontinuous-ddpg2.yaml +++ b/python/ray/rllib/tuned_examples/mountaincarcontinuous-ddpg.yaml @@ -1,9 +1,7 @@ # can expect improvement to 90 reward in ~12-24k timesteps -mountaincarcontinuous-ddpg-2: +mountaincarcontinuous-ddpg: env: MountainCarContinuous-v0 - run: DDPG2 - trial_resources: - cpu: 6 + run: DDPG stop: episode_reward_mean: 90 config: diff --git a/python/ray/rllib/tuned_examples/pendulum-apex-ddpg2.yaml b/python/ray/rllib/tuned_examples/pendulum-apex-ddpg.yaml similarity index 67% rename from python/ray/rllib/tuned_examples/pendulum-apex-ddpg2.yaml rename to python/ray/rllib/tuned_examples/pendulum-apex-ddpg.yaml index ec4361e7a230e..f7a7c71f62f0f 100644 --- a/python/ray/rllib/tuned_examples/pendulum-apex-ddpg2.yaml +++ b/python/ray/rllib/tuned_examples/pendulum-apex-ddpg.yaml @@ -1,12 +1,7 @@ # This can be expected to reach -160 reward within 2.5 timesteps / ~250 seconds on a K40 GPU -pendulum-apex-ddpg-2: +pendulum-apex-ddpg: env: Pendulum-v0 - run: APEX_DDPG2 - trial_resources: - cpu: 1 - gpu: 1 - extra_cpu: - eval: 4 + spec.config.num_workers + run: APEX_DDPG stop: episode_reward_mean: -160 config: diff --git a/python/ray/rllib/tuned_examples/pendulum-ddpg.yaml b/python/ray/rllib/tuned_examples/pendulum-ddpg.yaml new file mode 100644 index 0000000000000..2166989d8080a --- /dev/null +++ b/python/ray/rllib/tuned_examples/pendulum-ddpg.yaml @@ -0,0 +1,11 @@ +# can expect improvement to -160 reward in ~30k timesteps +pendulum-ddpg: + env: Pendulum-v0 + run: DDPG + stop: + episode_reward_mean: -160 + config: + use_huber: True + random_starts: False + clip_rewards: False + exploration_fraction: 0.1 diff --git a/python/ray/rllib/tuned_examples/pendulum-ddpg2.yaml b/python/ray/rllib/tuned_examples/pendulum-ddpg2.yaml deleted file mode 100644 index 43327705cd2ab..0000000000000 --- a/python/ray/rllib/tuned_examples/pendulum-ddpg2.yaml +++ /dev/null @@ -1,16 +0,0 @@ -# can expect improvement to -160 reward in ~30-40k timesteps -pendulum-ddpg-2: - env: Pendulum-v0 - run: DDPG2 - trial_resources: - cpu: 6 - gpu: 1 - stop: - episode_reward_mean: -160 - config: - use_huber: True - random_starts: False - clip_rewards: False - exploration_fraction: 0.4 - model: - fcnet_hiddens: [] diff --git a/python/ray/rllib/tuned_examples/regression_tests/pendulum-ddpg.yaml b/python/ray/rllib/tuned_examples/regression_tests/pendulum-ddpg.yaml index b25180ff0d057..840f6d963c4e7 100644 --- a/python/ray/rllib/tuned_examples/regression_tests/pendulum-ddpg.yaml +++ b/python/ray/rllib/tuned_examples/regression_tests/pendulum-ddpg.yaml @@ -2,9 +2,11 @@ pendulum-ddpg: env: Pendulum-v0 run: DDPG stop: - episode_reward_mean: -100 - time_total_s: 600 - trial_resources: - cpu: 1 + episode_reward_mean: -160 + time_total_s: 900 config: - num_workers: 1 + use_huber: True + random_starts: False + clip_rewards: False + exploration_fraction: 0.1 + smoothing_num_episodes: 10 diff --git a/python/ray/rllib/tuned_examples/regression_tests/pendulum-ddpg2.yaml b/python/ray/rllib/tuned_examples/regression_tests/pendulum-ddpg2.yaml index c60d098726b81..eaf33eb7e5e44 100644 --- a/python/ray/rllib/tuned_examples/regression_tests/pendulum-ddpg2.yaml +++ b/python/ray/rllib/tuned_examples/regression_tests/pendulum-ddpg2.yaml @@ -1,16 +1,8 @@ -pendulum-ddpg-2: +pendulum-ddpg2: env: Pendulum-v0 run: DDPG2 - trial_resources: - cpu: 2 stop: - episode_reward_mean: -160 - time_total_s: 900 + episode_reward_mean: -100 + time_total_s: 600 config: - use_huber: True - random_starts: False - clip_rewards: False - exploration_fraction: 0.4 - model: - fcnet_hiddens: [] - smoothing_num_episodes: 10 + num_workers: 1 diff --git a/python/ray/rllib/utils/compression.py b/python/ray/rllib/utils/compression.py index 24176285bbb64..dee8d875df3c3 100644 --- a/python/ray/rllib/utils/compression.py +++ b/python/ray/rllib/utils/compression.py @@ -28,6 +28,12 @@ def pack(data): return data +def pack_if_needed(data): + if isinstance(data, np.ndarray): + data = pack(data) + return data + + def unpack(data): if LZ4_ENABLED: data = base64.b64decode(data) diff --git a/src/common/cmake/Common.cmake b/src/common/cmake/Common.cmake index 9e12ab9cc0bb1..b46bdeeb1b499 100644 --- a/src/common/cmake/Common.cmake +++ b/src/common/cmake/Common.cmake @@ -10,7 +10,12 @@ if(UNIX AND NOT APPLE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -rdynamic") endif() -set(FLATBUFFERS_VERSION "1.7.1") +# The following is needed because in CentOS, the lib directory is named lib64 +if(EXISTS "/etc/redhat-release" AND CMAKE_SIZEOF_VOID_P EQUAL 8) + set(LIB_SUFFIX 64) +endif() + +set(FLATBUFFERS_VERSION "1.9.0") set(FLATBUFFERS_PREFIX "${CMAKE_BINARY_DIR}/flatbuffers_ep-prefix/src/flatbuffers_ep-install") if (NOT TARGET flatbuffers_ep) @@ -19,13 +24,14 @@ if (NOT TARGET flatbuffers_ep) CMAKE_ARGS "-DCMAKE_CXX_FLAGS=-fPIC" "-DCMAKE_INSTALL_PREFIX:PATH=${FLATBUFFERS_PREFIX}" - "-DFLATBUFFERS_BUILD_TESTS=OFF") + "-DFLATBUFFERS_BUILD_TESTS=OFF" + "-DCMAKE_BUILD_TYPE=RELEASE") endif() set(FBS_DEPENDS flatbuffers_ep) set(FLATBUFFERS_INCLUDE_DIR "${FLATBUFFERS_PREFIX}/include") -set(FLATBUFFERS_STATIC_LIB "${FLATBUFFERS_PREFIX}/lib/libflatbuffers.a") +set(FLATBUFFERS_STATIC_LIB "${FLATBUFFERS_PREFIX}/lib${LIB_SUFFIX}/libflatbuffers.a") set(FLATBUFFERS_COMPILER "${FLATBUFFERS_PREFIX}/bin/flatc") message(STATUS "Flatbuffers include dir: ${FLATBUFFERS_INCLUDE_DIR}") diff --git a/src/common/lib/python/common_extension.cc b/src/common/lib/python/common_extension.cc index 230a576e60c6b..82e8112817d3b 100644 --- a/src/common/lib/python/common_extension.cc +++ b/src/common/lib/python/common_extension.cc @@ -195,8 +195,8 @@ static PyObject *PyObjectID_richcompare(PyObjectID *self, static PyObject *PyObjectID_redis_shard_hash(PyObjectID *self) { /* NOTE: The hash function used here must match the one in get_redis_context * in src/common/state/redis.cc. Changes to the hash function should only be - * made through UniqueIDHasher in src/common/common.h */ - UniqueIDHasher hash; + * made through std::hash in src/common/common.h */ + std::hash hash; return PyLong_FromSize_t(hash(self->object_id)); } diff --git a/src/common/state/redis.cc b/src/common/state/redis.cc index 62d814a2669b1..e167510ede10f 100644 --- a/src/common/state/redis.cc +++ b/src/common/state/redis.cc @@ -80,13 +80,13 @@ redisAsyncContext *get_redis_context(DBHandle *db, UniqueID id) { /* NOTE: The hash function used here must match the one in * PyObjectID_redis_shard_hash in src/common/lib/python/common_extension.cc. * Changes to the hash function should only be made through - * UniqueIDHasher in src/common/common.h */ - UniqueIDHasher index; + * std::hash in src/common/common.h */ + std::hash index; return db->contexts[index(id) % db->contexts.size()]; } redisAsyncContext *get_redis_subscribe_context(DBHandle *db, UniqueID id) { - UniqueIDHasher index; + std::hash index; return db->subscribe_contexts[index(id) % db->subscribe_contexts.size()]; } diff --git a/src/common/state/redis.h b/src/common/state/redis.h index dc879eb8290b6..164069740d3e1 100644 --- a/src/common/state/redis.h +++ b/src/common/state/redis.h @@ -48,7 +48,7 @@ struct DBHandle { int64_t db_index; /** Cache for the IP addresses of db clients. This is an unordered map mapping * client IDs to addresses. */ - std::unordered_map db_client_cache; + std::unordered_map db_client_cache; /** Redis context for synchronous connections. This should only be used very * rarely, it is not asynchronous. */ redisContext *sync_context; diff --git a/src/global_scheduler/global_scheduler.cc b/src/global_scheduler/global_scheduler.cc index 3dcb1259393fb..db97e76eb38ce 100644 --- a/src/global_scheduler/global_scheduler.cc +++ b/src/global_scheduler/global_scheduler.cc @@ -234,11 +234,9 @@ void add_local_scheduler(GlobalSchedulerState *state, handle_new_local_scheduler(state, state->policy_state, db_client_id); } -std::unordered_map::iterator -remove_local_scheduler( +std::unordered_map::iterator remove_local_scheduler( GlobalSchedulerState *state, - std::unordered_map::iterator - it) { + std::unordered_map::iterator it) { RAY_CHECK(it != state->local_schedulers.end()); DBClientID local_scheduler_id = it->first; it = state->local_schedulers.erase(it); diff --git a/src/global_scheduler/global_scheduler.h b/src/global_scheduler/global_scheduler.h index 30a64b2fdb1f9..1945593932f89 100644 --- a/src/global_scheduler/global_scheduler.h +++ b/src/global_scheduler/global_scheduler.h @@ -55,18 +55,15 @@ typedef struct { ray::gcs::AsyncGcsClient gcs_client; /** A hash table mapping local scheduler ID to the local schedulers that are * connected to Redis. */ - std::unordered_map - local_schedulers; + std::unordered_map local_schedulers; /** The state managed by the scheduling policy. */ GlobalSchedulerPolicyState *policy_state; /** The plasma_manager ip:port -> local_scheduler_db_client_id association. */ std::unordered_map plasma_local_scheduler_map; /** The local_scheduler_db_client_id -> plasma_manager ip:port association. */ - std::unordered_map - local_scheduler_plasma_map; + std::unordered_map local_scheduler_plasma_map; /** Objects cached by this global scheduler instance. */ - std::unordered_map - scheduler_object_info_table; + std::unordered_map scheduler_object_info_table; /** An array of tasks that haven't been scheduled yet. */ std::vector pending_tasks; } GlobalSchedulerState; diff --git a/src/local_scheduler/local_scheduler.cc b/src/local_scheduler/local_scheduler.cc index 241d3e4fe7a93..279c2bb8b8621 100644 --- a/src/local_scheduler/local_scheduler.cc +++ b/src/local_scheduler/local_scheduler.cc @@ -1052,8 +1052,8 @@ void handle_set_actor_frontier(LocalSchedulerState *state, ActorFrontier const &frontier) { /* Parse the ActorFrontier flatbuffer. */ ActorID actor_id = from_flatbuf(*frontier.actor_id()); - std::unordered_map task_counters; - std::unordered_map frontier_dependencies; + std::unordered_map task_counters; + std::unordered_map frontier_dependencies; for (size_t i = 0; i < frontier.handle_ids()->size(); ++i) { ActorID handle_id = from_flatbuf(*frontier.handle_ids()->Get(i)); task_counters[handle_id] = frontier.task_counters()->Get(i); diff --git a/src/local_scheduler/local_scheduler_algorithm.cc b/src/local_scheduler/local_scheduler_algorithm.cc index 223b27ca56982..d16b46f7d0d64 100644 --- a/src/local_scheduler/local_scheduler_algorithm.cc +++ b/src/local_scheduler/local_scheduler_algorithm.cc @@ -53,13 +53,12 @@ typedef struct { * handle. This is used to guarantee execution of tasks on actors in the * order that the tasks were submitted, per handle. Tasks from different * handles to the same actor may be interleaved. */ - std::unordered_map task_counters; + std::unordered_map task_counters; /** These are the execution dependencies that make up the frontier of the * actor's runnable tasks. For each actor handle, we store the object ID * that represents the execution dependency for the next runnable task * submitted by that handle. */ - std::unordered_map - frontier_dependencies; + std::unordered_map frontier_dependencies; /** The return value of the most recently executed task. The next task to * execute should take this as an execution dependency at dispatch time. Set * to nil if there are no execution dependencies (e.g., this is the first @@ -85,12 +84,12 @@ struct SchedulingAlgorithmState { /** This is a hash table from actor ID to information about that actor. In * particular, a queue of tasks that are waiting to execute on that actor. * This is only used for actors that exist locally. */ - std::unordered_map local_actor_infos; + std::unordered_map local_actor_infos; /** This is a set of the IDs of the actors that have tasks waiting to run. * The purpose is to make it easier to dispatch tasks without looping over * all of the actors. Note that this is an optimization and is not strictly * necessary. */ - std::unordered_set actors_with_pending_tasks; + std::unordered_set actors_with_pending_tasks; /** A vector of actor tasks that have been submitted but this local scheduler * doesn't know which local scheduler is responsible for them, so cannot * assign them to the correct local scheduler yet. Whenever a notification @@ -112,13 +111,13 @@ struct SchedulingAlgorithmState { std::vector blocked_workers; /** A hash map of the objects that are available in the local Plasma store. * The key is the object ID. This information could be a little stale. */ - std::unordered_map local_objects; + std::unordered_map local_objects; /** A hash map of the objects that are not available locally. These are * currently being fetched by this local scheduler. The key is the object * ID. Every local_scheduler_fetch_timeout_milliseconds, a Plasma fetch * request will be sent the object IDs in this table. Each entry also holds * an array of queued tasks that are dependent on it. */ - std::unordered_map remote_objects; + std::unordered_map remote_objects; }; SchedulingAlgorithmState *SchedulingAlgorithmState_init(void) { @@ -809,7 +808,7 @@ int rerun_actor_creation_tasks_timeout_handler(event_loop *loop, // Create a set of the dummy object IDs for the actor creation tasks to // reconstruct. - std::unordered_set actor_dummy_objects; + std::unordered_set actor_dummy_objects; for (auto const &execution_spec : state->algorithm_state->cached_submitted_actor_tasks) { ObjectID actor_creation_dummy_object_id = @@ -1805,9 +1804,9 @@ void print_worker_info(const char *message, << " blocked"; } -std::unordered_map -get_actor_task_counters(SchedulingAlgorithmState *algorithm_state, - ActorID actor_id) { +std::unordered_map get_actor_task_counters( + SchedulingAlgorithmState *algorithm_state, + ActorID actor_id) { RAY_CHECK(algorithm_state->local_actor_infos.count(actor_id) != 0); return algorithm_state->local_actor_infos[actor_id].task_counters; } @@ -1815,8 +1814,7 @@ get_actor_task_counters(SchedulingAlgorithmState *algorithm_state, void set_actor_task_counters( SchedulingAlgorithmState *algorithm_state, ActorID actor_id, - const std::unordered_map - &task_counters) { + const std::unordered_map &task_counters) { RAY_CHECK(algorithm_state->local_actor_infos.count(actor_id) != 0); /* Overwrite the current task counters for the actor. This is necessary * during reconstruction when resuming from a checkpoint so that we can @@ -1860,7 +1858,7 @@ void set_actor_task_counters( } } -std::unordered_map get_actor_frontier( +std::unordered_map get_actor_frontier( SchedulingAlgorithmState *algorithm_state, ActorID actor_id) { RAY_CHECK(algorithm_state->local_actor_infos.count(actor_id) != 0); @@ -1871,8 +1869,7 @@ void set_actor_frontier( LocalSchedulerState *state, SchedulingAlgorithmState *algorithm_state, ActorID actor_id, - const std::unordered_map - &frontier_dependencies) { + const std::unordered_map &frontier_dependencies) { RAY_CHECK(algorithm_state->local_actor_infos.count(actor_id) != 0); auto entry = algorithm_state->local_actor_infos[actor_id]; entry.frontier_dependencies = frontier_dependencies; diff --git a/src/local_scheduler/local_scheduler_algorithm.h b/src/local_scheduler/local_scheduler_algorithm.h index 6ad0558c37d7a..9238d5db58e55 100644 --- a/src/local_scheduler/local_scheduler_algorithm.h +++ b/src/local_scheduler/local_scheduler_algorithm.h @@ -362,9 +362,9 @@ void print_worker_info(const char *message, * @return A map from handle ID to the number of tasks submitted by that handle * that have executed so far. */ -std::unordered_map -get_actor_task_counters(SchedulingAlgorithmState *algorithm_state, - ActorID actor_id); +std::unordered_map get_actor_task_counters( + SchedulingAlgorithmState *algorithm_state, + ActorID actor_id); /** * Set the number of tasks, per actor handle, that have been executed on an @@ -381,8 +381,7 @@ get_actor_task_counters(SchedulingAlgorithmState *algorithm_state, void set_actor_task_counters( SchedulingAlgorithmState *algorithm_state, ActorID actor_id, - const std::unordered_map - &task_counters); + const std::unordered_map &task_counters); /** * Get the actor's frontier of task dependencies. @@ -395,7 +394,7 @@ void set_actor_task_counters( * @return A map from handle ID to execution dependency for the earliest * runnable task submitted through that handle. */ -std::unordered_map get_actor_frontier( +std::unordered_map get_actor_frontier( SchedulingAlgorithmState *algorithm_state, ActorID actor_id); @@ -414,8 +413,7 @@ void set_actor_frontier( LocalSchedulerState *state, SchedulingAlgorithmState *algorithm_state, ActorID actor_id, - const std::unordered_map - &frontier_dependencies); + const std::unordered_map &frontier_dependencies); /** The following methods are for testing purposes only. */ #ifdef LOCAL_SCHEDULER_TEST diff --git a/src/local_scheduler/local_scheduler_shared.h b/src/local_scheduler/local_scheduler_shared.h index 762518a328714..013cf7a785a01 100644 --- a/src/local_scheduler/local_scheduler_shared.h +++ b/src/local_scheduler/local_scheduler_shared.h @@ -48,16 +48,16 @@ struct LocalSchedulerState { std::list workers; /** A set of driver IDs corresponding to drivers that have been removed. This * is used to make sure we don't execute any tasks belong to dead drivers. */ - std::unordered_set removed_drivers; + std::unordered_set removed_drivers; /** A set of actors IDs corresponding to local actors that have been removed. * This ensures we can reject any tasks destined for dead actors. */ - std::unordered_set removed_actors; + std::unordered_set removed_actors; /** List of the process IDs for child processes (workers) started by the * local scheduler that have not sent a REGISTER_PID message yet. */ std::vector child_pids; /** A hash table mapping actor IDs to the db_client_id of the local scheduler * that is responsible for the actor. */ - std::unordered_map actor_mapping; + std::unordered_map actor_mapping; /** The handle to the database. */ DBHandle *db; /** The handle to the GCS (modern version of the above). */ diff --git a/src/plasma/plasma_manager.cc b/src/plasma/plasma_manager.cc index 6da9e28dbf293..8529e16102f7f 100644 --- a/src/plasma/plasma_manager.cc +++ b/src/plasma/plasma_manager.cc @@ -221,20 +221,20 @@ struct PlasmaManagerState { int port; /** Unordered map of outstanding fetch requests. The key is the object ID. The * value is the data needed to perform the fetch. */ - std::unordered_map fetch_requests; + std::unordered_map fetch_requests; /** Unordered map of outstanding wait requests. The key is the object ID. The * value is the vector of wait requests that are waiting for the object to * arrive locally. */ - std::unordered_map, UniqueIDHasher> + std::unordered_map> object_wait_requests_local; /** Unordered map of outstanding wait requests. The key is the object ID. The * value is the vector of wait requests that are waiting for the object to * be available somewhere in the system. */ - std::unordered_map, UniqueIDHasher> + std::unordered_map> object_wait_requests_remote; /** Initialize an empty unordered set for the cache of local available object. */ - std::unordered_set local_available_objects; + std::unordered_set local_available_objects; /** The time (in milliseconds since the Unix epoch) when the most recent * heartbeat was sent. */ int64_t previous_heartbeat_time; @@ -247,7 +247,7 @@ struct PlasmaManagerState { * object is removed. If object transfers between managers is parallelized, * then all objects being received from a remote manager will need to be * removed if the connection to the remote manager fails. */ - std::unordered_set receives_in_progress; + std::unordered_set receives_in_progress; }; PlasmaManagerState *g_manager_state = NULL; @@ -265,8 +265,7 @@ struct ClientConnection { /* A set of object IDs which are queued in the transfer_queue and waiting to * be sent. This is used to avoid sending the same object ID to the same * manager multiple times. */ - std::unordered_map - pending_object_transfers; + std::unordered_map pending_object_transfers; /** Buffer used to receive transfers (data fetches) we want to ignore */ PlasmaRequestBuffer *ignore_buffer; /** File descriptor for the socket connected to the other @@ -317,7 +316,7 @@ bool ClientConnection_request_finished(ClientConnection *client_conn) { return client_conn->cursor == -1; } -std::unordered_map, UniqueIDHasher> & +std::unordered_map> & object_wait_requests_from_type(PlasmaManagerState *manager_state, int type) { /* We use different types of hash tables for different requests. */ RAY_CHECK(type == plasma::PLASMA_QUERY_LOCAL || diff --git a/src/plasma/plasma_protocol.cc b/src/plasma/plasma_protocol.cc index 1f97d93989f38..42fa84cc36e64 100644 --- a/src/plasma/plasma_protocol.cc +++ b/src/plasma/plasma_protocol.cc @@ -370,11 +370,10 @@ Status ReadGetRequest(uint8_t *data, return Status::OK(); } -Status SendGetReply( - int sock, - ObjectID object_ids[], - std::unordered_map &plasma_objects, - int64_t num_objects) { +Status SendGetReply(int sock, + ObjectID object_ids[], + std::unordered_map &plasma_objects, + int64_t num_objects) { flatbuffers::FlatBufferBuilder fbb; std::vector objects; diff --git a/src/ray/gcs/tables.h b/src/ray/gcs/tables.h index e46ccbddfb431..d5c4df088aa79 100644 --- a/src/ray/gcs/tables.h +++ b/src/ray/gcs/tables.h @@ -517,7 +517,7 @@ class ClientTable : private Log { /// The callback to call when a client is removed. ClientTableCallback client_removed_callback_; /// A cache for information about all clients. - std::unordered_map client_cache_; + std::unordered_map client_cache_; }; } // namespace gcs diff --git a/src/ray/id.cc b/src/ray/id.cc index e872aa29419f3..4d9634623a78b 100644 --- a/src/ray/id.cc +++ b/src/ray/id.cc @@ -81,6 +81,12 @@ bool UniqueID::operator==(const UniqueID &rhs) const { return std::memcmp(data(), rhs.data(), kUniqueIDSize) == 0; } +size_t UniqueID::hash() const { + size_t result; + std::memcpy(&result, id_, sizeof(size_t)); + return result; +} + std::ostream &operator<<(std::ostream &os, const UniqueID &id) { os << id.hex(); return os; diff --git a/src/ray/id.h b/src/ray/id.h index 51aa52f937de1..beb15fc5e6aa2 100644 --- a/src/ray/id.h +++ b/src/ray/id.h @@ -19,6 +19,7 @@ class RAY_EXPORT UniqueID { static UniqueID from_random(); static UniqueID from_binary(const std::string &binary); static const UniqueID nil(); + size_t hash() const; bool is_nil() const; bool operator==(const UniqueID &rhs) const; const uint8_t *data() const; @@ -35,15 +36,6 @@ class RAY_EXPORT UniqueID { static_assert(std::is_standard_layout::value, "UniqueID must be standard"); -struct UniqueIDHasher { - // ID hashing function. - size_t operator()(const UniqueID &id) const { - size_t result; - std::memcpy(&result, id.data(), sizeof(size_t)); - return result; - } -}; - std::ostream &operator<<(std::ostream &os, const UniqueID &id); typedef UniqueID TaskID; @@ -98,4 +90,15 @@ int64_t ComputeObjectIndex(const ObjectID &object_id); } // namespace ray +namespace std { +template <> +struct hash<::ray::UniqueID> { + size_t operator()(const ::ray::UniqueID &id) const { return id.hash(); } +}; + +template <> +struct hash { + size_t operator()(const ::ray::UniqueID &id) const { return id.hash(); } +}; +} #endif // RAY_ID_H_ diff --git a/src/ray/object_manager/connection_pool.h b/src/ray/object_manager/connection_pool.h index 4ce2133d426b4..15774a28798cc 100644 --- a/src/ray/object_manager/connection_pool.h +++ b/src/ray/object_manager/connection_pool.h @@ -91,11 +91,10 @@ class ConnectionPool { private: /// A container type that maps ClientID to a connection type. using SenderMapType = - std::unordered_map>, - ray::UniqueIDHasher>; + std::unordered_map>>; using ReceiverMapType = - std::unordered_map>, - ray::UniqueIDHasher>; + std::unordered_map>>; /// Adds a receiver for ClientID to the given map. void Add(ReceiverMapType &conn_map, const ClientID &client_id, diff --git a/src/ray/object_manager/object_buffer_pool.cc b/src/ray/object_manager/object_buffer_pool.cc index beab8473650ea..1ab9069bb36cd 100644 --- a/src/ray/object_manager/object_buffer_pool.cc +++ b/src/ray/object_manager/object_buffer_pool.cc @@ -51,7 +51,7 @@ std::pair ObjectBufferPool::Ge RAY_CHECK(object_buffer.metadata->data() == object_buffer.data->data() + object_buffer.data->size()); RAY_CHECK(data_size == static_cast(object_buffer.data->size() + - object_buffer.metadata_size)); + object_buffer.metadata->size())); auto *data = const_cast(object_buffer.data->data()); uint64_t num_chunks = GetNumChunks(data_size); get_buffer_state_.emplace( diff --git a/src/ray/object_manager/object_buffer_pool.h b/src/ray/object_manager/object_buffer_pool.h index 3edc1be30da75..4030e09e7d461 100644 --- a/src/ray/object_manager/object_buffer_pool.h +++ b/src/ray/object_manager/object_buffer_pool.h @@ -179,11 +179,9 @@ class ObjectBufferPool { /// Determines the maximum chunk size to be transferred by a single thread. const uint64_t chunk_size_; /// The state of a buffer that's currently being used. - std::unordered_map - get_buffer_state_; + std::unordered_map get_buffer_state_; /// The state of a buffer that's currently being used. - std::unordered_map - create_buffer_state_; + std::unordered_map create_buffer_state_; /// Plasma client pool. plasma::PlasmaClient store_client_; diff --git a/src/ray/object_manager/object_directory.cc b/src/ray/object_manager/object_directory.cc index 77f9a3a03250c..e7a6c8504c4ac 100644 --- a/src/ray/object_manager/object_directory.cc +++ b/src/ray/object_manager/object_directory.cc @@ -77,7 +77,7 @@ void ObjectDirectory::GetLocationsComplete( return; } // Build the set of current locations based on the entries in the log. - std::unordered_set locations; + std::unordered_set locations; for (auto entry : location_entries) { ClientID client_id = ClientID::from_binary(entry.manager); if (!entry.is_eviction) { diff --git a/src/ray/object_manager/object_directory.h b/src/ray/object_manager/object_directory.h index df326125cd2cf..7eca8c550526a 100644 --- a/src/ray/object_manager/object_directory.h +++ b/src/ray/object_manager/object_directory.h @@ -123,7 +123,7 @@ class ObjectDirectory : public ObjectDirectoryInterface { const std::vector &location_entries); /// Maintain map of in-flight GetLocation requests. - std::unordered_map existing_requests_; + std::unordered_map existing_requests_; /// Reference to the gcs client. std::shared_ptr gcs_client_; }; diff --git a/src/ray/object_manager/object_manager.h b/src/ray/object_manager/object_manager.h index 1dad93f60c77a..117a3073d4148 100644 --- a/src/ray/object_manager/object_manager.h +++ b/src/ray/object_manager/object_manager.h @@ -179,12 +179,11 @@ class ObjectManager { ConnectionPool connection_pool_; /// Timeout for failed pull requests. - std::unordered_map, - UniqueIDHasher> + std::unordered_map> pull_requests_; /// Cache of locally available objects. - std::unordered_map local_objects_; + std::unordered_map local_objects_; /// Handle starting, running, and stopping asio io_service. void StartIOService(); diff --git a/src/ray/object_manager/test/object_manager_stress_test.cc b/src/ray/object_manager/test/object_manager_stress_test.cc index 8019725e4e767..350b37c4caed9 100644 --- a/src/ray/object_manager/test/object_manager_stress_test.cc +++ b/src/ray/object_manager/test/object_manager_stress_test.cc @@ -296,9 +296,9 @@ class StressTestObjectManager : public TestObjectManagerBase { plasma::ObjectBuffer object_buffer_2 = GetObject(client2, object_id_2); uint8_t *data_1 = const_cast(object_buffer_1.data->data()); uint8_t *data_2 = const_cast(object_buffer_2.data->data()); - ASSERT_EQ(object_buffer_1.data->size(), object_buffer_2.data_size); - ASSERT_EQ(object_buffer_1.metadata_size, object_buffer_2.metadata_size); - int64_t total_size = object_buffer_1.data->size() + object_buffer_1.metadata_size; + ASSERT_EQ(object_buffer_1.data->size(), object_buffer_2.data->size()); + ASSERT_EQ(object_buffer_1.metadata->size(), object_buffer_2.metadata->size()); + int64_t total_size = object_buffer_1.data->size() + object_buffer_1.metadata->size(); RAY_LOG(DEBUG) << "total_size " << total_size; for (int i = -1; ++i < total_size;) { ASSERT_TRUE(data_1[i] == data_2[i]); diff --git a/src/ray/raylet/actor_registration.cc b/src/ray/raylet/actor_registration.cc index c1e6303fb637f..efb5699e619cd 100644 --- a/src/ray/raylet/actor_registration.cc +++ b/src/ray/raylet/actor_registration.cc @@ -23,7 +23,7 @@ const ObjectID ActorRegistration::GetExecutionDependency() const { return execution_dependency_; } -const std::unordered_map +const std::unordered_map &ActorRegistration::GetFrontier() const { return frontier_; } diff --git a/src/ray/raylet/actor_registration.h b/src/ray/raylet/actor_registration.h index 486be2719c17f..e5721b686f45b 100644 --- a/src/ray/raylet/actor_registration.h +++ b/src/ray/raylet/actor_registration.h @@ -63,8 +63,7 @@ class ActorRegistration { /// /// \return The actor frontier, a map from handle ID to execution state for /// that handle. - const std::unordered_map &GetFrontier() - const; + const std::unordered_map &GetFrontier() const; /// Extend the frontier of the actor by a single task. This should be called /// whenever the actor executes a task. @@ -86,7 +85,7 @@ class ActorRegistration { /// The execution frontier of the actor, which represents which tasks have /// executed so far and which tasks may execute next, based on execution /// dependencies. This is indexed by handle. - std::unordered_map frontier_; + std::unordered_map frontier_; }; } // namespace raylet diff --git a/src/ray/raylet/lineage_cache.cc b/src/ray/raylet/lineage_cache.cc index 11aef6007b2b3..592c26481c245 100644 --- a/src/ray/raylet/lineage_cache.cc +++ b/src/ray/raylet/lineage_cache.cc @@ -27,9 +27,8 @@ const TaskID LineageEntry::GetEntryId() const { return task_.GetTaskSpecification().TaskId(); } -const std::unordered_set LineageEntry::GetParentTaskIds() - const { - std::unordered_set parent_ids; +const std::unordered_set LineageEntry::GetParentTaskIds() const { + std::unordered_set parent_ids; // A task's parents are the tasks that created its arguments. auto dependencies = task_.GetDependencies(); for (auto &dependency : dependencies) { @@ -104,8 +103,7 @@ boost::optional Lineage::PopEntry(const UniqueID &task_id) { } } -const std::unordered_map - &Lineage::GetEntries() const { +const std::unordered_map &Lineage::GetEntries() const { return entries_; } diff --git a/src/ray/raylet/lineage_cache.h b/src/ray/raylet/lineage_cache.h index 3b40f8d8858f5..44b9b62f4776e 100644 --- a/src/ray/raylet/lineage_cache.h +++ b/src/ray/raylet/lineage_cache.h @@ -76,7 +76,7 @@ class LineageEntry { /// that created its arguments. /// /// \return The IDs of the parent entries. - const std::unordered_set GetParentTaskIds() const; + const std::unordered_set GetParentTaskIds() const; /// Get the task data. /// @@ -85,7 +85,6 @@ class LineageEntry { Task &TaskDataMutable(); - private: /// The current state of this entry according to its status in the GCS. GcsStatus status_; /// The task data to be written to the GCS. This is nullptr if the entry is @@ -139,8 +138,7 @@ class Lineage { /// Get all entries in the lineage. /// /// \return A const reference to the lineage entries. - const std::unordered_map &GetEntries() - const; + const std::unordered_map &GetEntries() const; /// Serialize this lineage to a ForwardTaskRequest flatbuffer. /// @@ -153,7 +151,7 @@ class Lineage { private: /// The lineage entries. - std::unordered_map entries_; + std::unordered_map entries_; }; /// \class LineageCache @@ -226,13 +224,13 @@ class LineageCache { // which tasks are flushable, to avoid iterating over tasks that are in // UNCOMMITTED_READY, but that have dependencies that have not been committed // yet. - std::unordered_set uncommitted_ready_tasks_; + std::unordered_set uncommitted_ready_tasks_; /// All tasks and objects that we are responsible for writing back to the /// GCS, and the tasks and objects in their lineage. Lineage lineage_; /// The tasks that we've subscribed to notifications for from the pubsub /// storage system. We will receive a notification for these tasks on commit. - std::unordered_set subscribed_tasks_; + std::unordered_set subscribed_tasks_; }; } // namespace raylet diff --git a/src/ray/raylet/lineage_cache_test.cc b/src/ray/raylet/lineage_cache_test.cc index de89a5b57ea65..9a51a3cf93219 100644 --- a/src/ray/raylet/lineage_cache_test.cc +++ b/src/ray/raylet/lineage_cache_test.cc @@ -68,21 +68,17 @@ class MockGcs : public gcs::TableInterface, callbacks_.clear(); } - const std::unordered_map, UniqueIDHasher> - &TaskTable() const { + const std::unordered_map> &TaskTable() const { return task_table_; } - const std::unordered_set &SubscribedTasks() const { - return subscribed_tasks_; - } + const std::unordered_set &SubscribedTasks() const { return subscribed_tasks_; } private: - std::unordered_map, UniqueIDHasher> - task_table_; + std::unordered_map> task_table_; std::vector> callbacks_; gcs::raylet::TaskTable::WriteCallback notification_callback_; - std::unordered_set subscribed_tasks_; + std::unordered_set subscribed_tasks_; }; class LineageCacheTest : public ::testing::Test { diff --git a/src/ray/raylet/mock_gcs_client.cc b/src/ray/raylet/mock_gcs_client.cc index c8c370227de30..69b197899b293 100644 --- a/src/ray/raylet/mock_gcs_client.cc +++ b/src/ray/raylet/mock_gcs_client.cc @@ -31,7 +31,7 @@ ray::Status ObjectTable::Add(const ObjectID &object_id, const ClientID &client_i const DoneCallback &done_callback) { if (client_lookup.count(object_id) == 0) { RAY_LOG(DEBUG) << "Add ObjectID set " << object_id; - client_lookup[object_id] = std::unordered_set(); + client_lookup[object_id] = std::unordered_set(); } else if (client_lookup[object_id].count(client_id) != 0) { return ray::Status::KeyError("ClientID already exists."); } diff --git a/src/ray/raylet/mock_gcs_client.h b/src/ray/raylet/mock_gcs_client.h index b519e6fea1817..f84b57dbc3636 100644 --- a/src/ray/raylet/mock_gcs_client.h +++ b/src/ray/raylet/mock_gcs_client.h @@ -31,9 +31,7 @@ class ObjectTable { private: std::vector empty_set_; - std::unordered_map, - UniqueIDHasher> - client_lookup; + std::unordered_map> client_lookup; }; class ClientInformation { @@ -53,7 +51,7 @@ class ClientInformation { class ClientTable { public: - typedef std::unordered_map info_type; + typedef std::unordered_map info_type; using ClientIDsCallback = std::function)>; using SingleInfoCallback = std::function; diff --git a/src/ray/raylet/monitor.h b/src/ray/raylet/monitor.h index 3b383160e101b..21bad9b3e4476 100644 --- a/src/ray/raylet/monitor.h +++ b/src/ray/raylet/monitor.h @@ -44,9 +44,9 @@ class Monitor { boost::asio::deadline_timer heartbeat_timer_; /// For each Raylet that we receive a heartbeat from, the number of ticks /// that may pass before the Raylet will be declared dead. - std::unordered_map heartbeats_; + std::unordered_map heartbeats_; /// The Raylets that have been marked as dead in the client table. - std::unordered_set dead_clients_; + std::unordered_set dead_clients_; }; } // namespace raylet diff --git a/src/ray/raylet/node_manager.cc b/src/ray/raylet/node_manager.cc index 14105e0576a33..82740f1f5985e 100644 --- a/src/ray/raylet/node_manager.cc +++ b/src/ray/raylet/node_manager.cc @@ -1,15 +1,38 @@ #include "ray/raylet/node_manager.h" #include "common_protocol.h" +#include "local_scheduler/format/local_scheduler_generated.h" #include "ray/raylet/format/node_manager_generated.h" namespace { +#define RAY_CHECK_ENUM(x, y) \ + static_assert(static_cast(x) == static_cast(y), "protocol mismatch") + +// Check consistency between client and server protocol. +RAY_CHECK_ENUM(protocol::MessageType_SubmitTask, MessageType_SubmitTask); +RAY_CHECK_ENUM(protocol::MessageType_TaskDone, MessageType_TaskDone); +RAY_CHECK_ENUM(protocol::MessageType_EventLogMessage, MessageType_EventLogMessage); +RAY_CHECK_ENUM(protocol::MessageType_RegisterClientRequest, + MessageType_RegisterClientRequest); +RAY_CHECK_ENUM(protocol::MessageType_RegisterClientReply, + MessageType_RegisterClientReply); +RAY_CHECK_ENUM(protocol::MessageType_DisconnectClient, MessageType_DisconnectClient); +RAY_CHECK_ENUM(protocol::MessageType_GetTask, MessageType_GetTask); +RAY_CHECK_ENUM(protocol::MessageType_ExecuteTask, MessageType_ExecuteTask); +RAY_CHECK_ENUM(protocol::MessageType_ReconstructObject, MessageType_ReconstructObject); +RAY_CHECK_ENUM(protocol::MessageType_NotifyUnblocked, MessageType_NotifyUnblocked); +RAY_CHECK_ENUM(protocol::MessageType_PutObject, MessageType_PutObject); +RAY_CHECK_ENUM(protocol::MessageType_GetActorFrontierRequest, + MessageType_GetActorFrontierRequest); +RAY_CHECK_ENUM(protocol::MessageType_GetActorFrontierReply, + MessageType_GetActorFrontierReply); +RAY_CHECK_ENUM(protocol::MessageType_SetActorFrontier, MessageType_SetActorFrontier); + /// A helper function to determine whether a given actor task has already been executed /// according to the given actor registry. Returns true if the task is a duplicate. bool CheckDuplicateActorTask( - const std::unordered_map - &actor_registry, + const std::unordered_map &actor_registry, const ray::raylet::TaskSpecification &spec) { auto actor_entry = actor_registry.find(spec.ActorId()); RAY_CHECK(actor_entry != actor_registry.end()); @@ -238,7 +261,7 @@ void NodeManager::HandleActorCreation(const ActorID &actor_id, // Dequeue any methods that were submitted before the actor's location was // known. const auto &methods = local_queues_.GetUncreatedActorMethods(); - std::unordered_set created_actor_method_ids; + std::unordered_set created_actor_method_ids; for (const auto &method : methods) { if (method.GetTaskSpecification().ActorId() == actor_id) { created_actor_method_ids.insert(method.GetTaskSpecification().TaskId()); @@ -458,7 +481,7 @@ void NodeManager::ScheduleTasks() { } // Extract decision for this local scheduler. - std::unordered_set local_task_ids; + std::unordered_set local_task_ids; // Iterate over (taskid, clientid) pairs, extract tasks assigned to the local node. for (const auto &task_schedule : policy_decision) { TaskID task_id = task_schedule.first; diff --git a/src/ray/raylet/node_manager.h b/src/ray/raylet/node_manager.h index e63aa1f406239..3cf77327d08dd 100644 --- a/src/ray/raylet/node_manager.h +++ b/src/ray/raylet/node_manager.h @@ -100,7 +100,7 @@ class NodeManager { /// The resources local to this node. const SchedulingResources local_resources_; // TODO(atumanov): Add resource information from other nodes. - std::unordered_map cluster_resource_map_; + std::unordered_map cluster_resource_map_; /// A pool of workers. WorkerPool worker_pool_; /// A set of queues to maintain tasks. @@ -114,9 +114,8 @@ class NodeManager { /// The lineage cache for the GCS object and task tables. LineageCache lineage_cache_; std::vector remote_clients_; - std::unordered_map - remote_server_connections_; - std::unordered_map actor_registry_; + std::unordered_map remote_server_connections_; + std::unordered_map actor_registry_; }; } // namespace raylet diff --git a/src/ray/raylet/scheduling_policy.cc b/src/ray/raylet/scheduling_policy.cc index 0ec8f3d08254a..4d13fdc4c9bf3 100644 --- a/src/ray/raylet/scheduling_policy.cc +++ b/src/ray/raylet/scheduling_policy.cc @@ -9,12 +9,11 @@ namespace raylet { SchedulingPolicy::SchedulingPolicy(const SchedulingQueue &scheduling_queue) : scheduling_queue_(scheduling_queue), gen_(rd_()) {} -std::unordered_map SchedulingPolicy::Schedule( - const std::unordered_map - &cluster_resources, +std::unordered_map SchedulingPolicy::Schedule( + const std::unordered_map &cluster_resources, const ClientID &local_client_id, const std::vector &others) { // The policy decision to be returned. - std::unordered_map decision; + std::unordered_map decision; // TODO(atumanov): protect DEBUG code blocks with ifdef DEBUG RAY_LOG(DEBUG) << "[Schedule] cluster resource map: "; for (const auto &client_resource_pair : cluster_resources) { diff --git a/src/ray/raylet/scheduling_policy.h b/src/ray/raylet/scheduling_policy.h index f049cfc228ae8..6785f189f6834 100644 --- a/src/ray/raylet/scheduling_policy.h +++ b/src/ray/raylet/scheduling_policy.h @@ -28,9 +28,8 @@ class SchedulingPolicy { /// \param cluster_resources: a set of cluster resources representing /// configured and current resource capacity on each node. /// \return Scheduling decision, mapping tasks to node managers for placement. - std::unordered_map Schedule( - const std::unordered_map - &cluster_resources, + std::unordered_map Schedule( + const std::unordered_map &cluster_resources, const ClientID &local_client_id, const std::vector &others); /// \brief SchedulingPolicy destructor. diff --git a/src/ray/raylet/scheduling_queue.cc b/src/ray/raylet/scheduling_queue.cc index 1ff6963cb7905..f8cd9d785766c 100644 --- a/src/ray/raylet/scheduling_queue.cc +++ b/src/ray/raylet/scheduling_queue.cc @@ -36,8 +36,7 @@ const std::list &SchedulingQueue::GetReadyMethods() const { // Helper function to remove tasks in the given set of task_ids from a // queue, and append them to the given vector removed_tasks. -void removeTasksFromQueue(std::list &queue, - std::unordered_set &task_ids, +void removeTasksFromQueue(std::list &queue, std::unordered_set &task_ids, std::vector &removed_tasks) { for (auto it = queue.begin(); it != queue.end();) { auto task_id = task_ids.find(it->GetTaskSpecification().TaskId()); @@ -58,8 +57,7 @@ void queueTasks(std::list &queue, const std::vector &tasks) { } } -std::vector SchedulingQueue::RemoveTasks( - std::unordered_set task_ids) { +std::vector SchedulingQueue::RemoveTasks(std::unordered_set task_ids) { // List of removed tasks to be returned. std::vector removed_tasks; diff --git a/src/ray/raylet/scheduling_queue.h b/src/ray/raylet/scheduling_queue.h index 068bfceb61aa9..7da3050f3bcf0 100644 --- a/src/ray/raylet/scheduling_queue.h +++ b/src/ray/raylet/scheduling_queue.h @@ -77,7 +77,7 @@ class SchedulingQueue { /// \param tasks The set of task IDs to remove from the queue. The /// corresponding tasks must be contained in the queue. /// \return A vector of the tasks that were removed. - std::vector RemoveTasks(std::unordered_set tasks); + std::vector RemoveTasks(std::unordered_set tasks); /// Queue tasks that are destined for actors that have not yet been created. /// diff --git a/src/ray/raylet/task_dependency_manager.h b/src/ray/raylet/task_dependency_manager.h index e45a826b187ac..b336ed4db6091 100644 --- a/src/ray/raylet/task_dependency_manager.h +++ b/src/ray/raylet/task_dependency_manager.h @@ -73,14 +73,12 @@ class TaskDependencyManager { ObjectManager &object_manager_; /// A mapping from task ID of each subscribed task to its list of /// dependencies. - std::unordered_map, UniqueIDHasher> - task_dependencies_; + std::unordered_map> task_dependencies_; // A mapping from object ID of each object that is not locally available to // the list of subscribed tasks that are dependent on it. - std::unordered_map, UniqueIDHasher> - remote_object_dependencies_; + std::unordered_map> remote_object_dependencies_; // The set of locally available objects. - std::unordered_set local_objects_; + std::unordered_set local_objects_; // The callback to call when a subscribed task becomes ready. std::function task_ready_callback_; }; diff --git a/src/ray/raylet/worker_pool.h b/src/ray/raylet/worker_pool.h index 5cb5bb2bf8989..8b6ef1e54d24a 100644 --- a/src/ray/raylet/worker_pool.h +++ b/src/ray/raylet/worker_pool.h @@ -103,7 +103,7 @@ class WorkerPool { /// The pool of idle workers. std::list> pool_; /// The pool of idle actor workers. - std::unordered_map, UniqueIDHasher> actor_pool_; + std::unordered_map> actor_pool_; /// All workers that have registered and are still connected, including both /// idle and executing. // TODO(swang): Make this a map to make GetRegisteredWorker faster. diff --git a/thirdparty/scripts/build_arrow.sh b/thirdparty/scripts/build_arrow.sh index 94020eb7289a4..f2fbb5c1d438e 100755 --- a/thirdparty/scripts/build_arrow.sh +++ b/thirdparty/scripts/build_arrow.sh @@ -44,14 +44,10 @@ if [[ ! -d $TP_DIR/../python/ray/pyarrow_files/pyarrow ]]; then pushd $TP_DIR/build/arrow git fetch origin master - # The PR for this commit is https://github.com/apache/arrow/pull/1874. We + # The PR for this commit is https://github.com/apache/arrow/pull/1939. We # include the link here to make it easier to find the right commit because # Arrow often rewrites git history and invalidates certain commits. - git checkout 0f87c12d45250ee763ac8c43b7e57e8f06a0b9f3 - - # Revert https://github.com/apache/arrow/pull/1807, which unfortunately - # introduces the issue in https://issues.apache.org/jira/browse/ARROW-2448. - git revert --no-commit cf396867df6f1f93948c69ce10ceb0f95e399242 + git checkout 5f9cf9c96709f92e9ac4828cf3e106a165576ce7 cd cpp if [ ! -d "build" ]; then diff --git a/thirdparty/scripts/build_flatbuffers.sh b/thirdparty/scripts/build_flatbuffers.sh index 59f14a05eaf14..539e06f4a7c45 100755 --- a/thirdparty/scripts/build_flatbuffers.sh +++ b/thirdparty/scripts/build_flatbuffers.sh @@ -7,7 +7,7 @@ set -e TP_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)/../ -FLATBUFFERS_VERSION=1.7.1 +FLATBUFFERS_VERSION=1.9.0 # Download and compile flatbuffers if it isn't already present. if [ ! -d $TP_DIR/pkg/flatbuffers ]; then @@ -20,6 +20,7 @@ if [ ! -d $TP_DIR/pkg/flatbuffers ]; then # Compile flatbuffers. pushd flatbuffers-$FLATBUFFERS_VERSION cmake -DCMAKE_CXX_FLAGS=-fPIC \ + -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX:PATH=$TP_DIR/pkg/flatbuffers \ -DFLATBUFFERS_BUILD_TESTS=OFF make -j5