pandas-dev
diff --git a/‎asv_bench/benchmarks/algorithms.py
Lines changed: 14 additions & 1 deletion b/‎asv_bench/benchmarks/algorithms.py
Lines changed: 14 additions & 1 deletion
diff --git a/‎asv_bench/benchmarks/io_bench.py
Lines changed: 1 addition & 1 deletion b/‎asv_bench/benchmarks/io_bench.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎asv_bench/benchmarks/join_merge.py
Lines changed: 13 additions & 0 deletions b/‎asv_bench/benchmarks/join_merge.py
Lines changed: 13 additions & 0 deletions
diff --git a/‎asv_bench/benchmarks/series_methods.py
Lines changed: 17 additions & 2 deletions b/‎asv_bench/benchmarks/series_methods.py
Lines changed: 17 additions & 2 deletions
diff --git a/‎ci/requirements-2.7-64.run
Lines changed: 1 addition & 1 deletion b/‎ci/requirements-2.7-64.run
Lines changed: 1 addition & 1 deletion
diff --git a/‎ci/requirements-2.7.run
Lines changed: 1 addition & 1 deletion b/‎ci/requirements-2.7.run
Lines changed: 1 addition & 1 deletion
diff --git a/‎ci/requirements-2.7_SLOW.run
Lines changed: 1 addition & 1 deletion b/‎ci/requirements-2.7_SLOW.run
Lines changed: 1 addition & 1 deletion
diff --git a/‎ci/requirements-3.5.run
Lines changed: 1 addition & 1 deletion b/‎ci/requirements-3.5.run
Lines changed: 1 addition & 1 deletion
diff --git a/‎ci/requirements-3.5_OSX.run
Lines changed: 1 addition & 1 deletion b/‎ci/requirements-3.5_OSX.run
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/install.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/install.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/io.rst
Lines changed: 17 additions & 0 deletions b/‎doc/source/io.rst
Lines changed: 17 additions & 0 deletions
diff --git a/‎doc/source/whatsnew/v0.19.2.txt
Lines changed: 5 additions & 0 deletions b/‎doc/source/whatsnew/v0.19.2.txt
Lines changed: 5 additions & 0 deletions
diff --git a/‎doc/source/whatsnew/v0.20.0.txt
Lines changed: 102 additions & 3 deletions b/‎doc/source/whatsnew/v0.20.0.txt
Lines changed: 102 additions & 3 deletions
@@ -18,14 +18,17 @@ def setup(self):
         self.float = pd.Float64Index(np.random.randn(N).repeat(5))
 
         # Convenience naming.
-        self.checked_add = pd.core.nanops._checked_add_with_arr
+        self.checked_add = pd.core.algorithms.checked_add_with_arr
 
         self.arr = np.arange(1000000)
         self.arrpos = np.arange(1000000)
         self.arrneg = np.arange(-1000000, 0)
         self.arrmixed = np.array([1, -1]).repeat(500000)
         self.strings = tm.makeStringIndex(100000)
 
+        self.arr_nan = np.random.choice([True, False], size=1000000)
+        self.arrmixed_nan = np.random.choice([True, False], size=1000000)
+
         # match
         self.uniques = tm.makeStringIndex(1000).values
         self.all = self.uniques.repeat(10)
@@ -69,6 +72,16 @@ def time_add_overflow_neg_arr(self):
     def time_add_overflow_mixed_arr(self):
         self.checked_add(self.arr, self.arrmixed)
 
+    def time_add_overflow_first_arg_nan(self):
+        self.checked_add(self.arr, self.arrmixed, arr_mask=self.arr_nan)
+
+    def time_add_overflow_second_arg_nan(self):
+        self.checked_add(self.arr, self.arrmixed, b_mask=self.arrmixed_nan)
+
+    def time_add_overflow_both_arg_nan(self):
+        self.checked_add(self.arr, self.arrmixed, arr_mask=self.arr_nan,
+                         b_mask=self.arrmixed_nan)
+
 
 class Hashing(object):
     goal_time = 0.2
 
@@ -153,7 +153,7 @@ def setup(self, compression, engine):
             # The Python 2 C parser can't read bz2 from open files.
             raise NotImplementedError
         try:
-            import boto
+            import s3fs
         except ImportError:
             # Skip these benchmarks if `boto` is not installed.
             raise NotImplementedError
 
@@ -302,12 +302,19 @@ def setup(self):
         self.df1 = self.df1.sort_values('time')
         self.df2 = self.df2.sort_values('time')
 
+        self.df1['time32'] = np.int32(self.df1.time)
+        self.df2['time32'] = np.int32(self.df2.time)
+
         self.df1a = self.df1[['time', 'value1']]
         self.df2a = self.df2[['time', 'value2']]
         self.df1b = self.df1[['time', 'key', 'value1']]
         self.df2b = self.df2[['time', 'key', 'value2']]
         self.df1c = self.df1[['time', 'key2', 'value1']]
         self.df2c = self.df2[['time', 'key2', 'value2']]
+        self.df1d = self.df1[['time32', 'value1']]
+        self.df2d = self.df2[['time32', 'value2']]
+        self.df1e = self.df1[['time', 'key', 'key2', 'value1']]
+        self.df2e = self.df2[['time', 'key', 'key2', 'value2']]
 
     def time_noby(self):
         merge_asof(self.df1a, self.df2a, on='time')
@@ -318,6 +325,12 @@ def time_by_object(self):
     def time_by_int(self):
         merge_asof(self.df1c, self.df2c, on='time', by='key2')
 
+    def time_on_int32(self):
+        merge_asof(self.df1d, self.df2d, on='time32')
+
+    def time_multiby(self):
+        merge_asof(self.df1e, self.df2e, on='time', by=['key', 'key2'])
+
 
 #----------------------------------------------------------------------
 # data alignment
 
@@ -8,13 +8,28 @@ def setup(self):
         self.dr = pd.date_range(
             start=datetime(2015,10,26),
             end=datetime(2016,1,1),
-            freq='10s'
-        )  # ~500k long
+            freq='50s'
+        )  # ~100k long
 
     def time_series_constructor_no_data_datetime_index(self):
         Series(data=None, index=self.dr)
 
 
+class series_constructor_dict_data_datetime_index(object):
+    goal_time = 0.2
+
+    def setup(self):
+        self.dr = pd.date_range(
+            start=datetime(2015, 10, 26),
+            end=datetime(2016, 1, 1),
+            freq='50s'
+        )  # ~100k long
+        self.data = {d: v for d, v in zip(self.dr, range(len(self.dr)))}
+
+    def time_series_constructor_no_data_datetime_index(self):
+        Series(data=self.data, index=self.dr)
+
+
 class series_isin_int64(object):
     goal_time = 0.2
 
 
@@ -11,7 +11,7 @@ sqlalchemy
 lxml=3.2.1
 scipy
 xlsxwriter
-boto
+s3fs
 bottleneck
 html5lib
 beautiful-soup
 
@@ -11,7 +11,7 @@ sqlalchemy=0.9.6
 lxml=3.2.1
 scipy
 xlsxwriter=0.4.6
-boto=2.36.0
+s3fs
 bottleneck
 psycopg2=2.5.2
 patsy
 
@@ -13,7 +13,7 @@ numexpr
 pytables
 sqlalchemy
 lxml
-boto
+s3fs
 bottleneck
 psycopg2
 pymysql
 
@@ -17,7 +17,7 @@ sqlalchemy
 pymysql
 psycopg2
 xarray
-boto
+s3fs
 
 # incompat with conda ATM
 # beautiful-soup
@@ -12,7 +12,7 @@ matplotlib
 jinja2
 bottleneck
 xarray
-boto
+s3fs
 
 # incompat with conda ATM
 # beautiful-soup
@@ -262,7 +262,7 @@ Optional Dependencies
   * `XlsxWriter <https://pypi.python.org/pypi/XlsxWriter>`__: Alternative Excel writer
 
 * `Jinja2 <http://jinja.pocoo.org/>`__: Template engine for conditional HTML formatting.
-* `boto <https://pypi.python.org/pypi/boto>`__: necessary for Amazon S3 access.
+* `s3fs <http://s3fs.readthedocs.io/>`__: necessary for Amazon S3 access (s3fs >= 0.0.7).
 * `blosc <https://pypi.python.org/pypi/blosc>`__: for msgpack compression using ``blosc``
 * One of `PyQt4
   <http://www.riverbankcomputing.com/software/pyqt/download>`__, `PySide
 
@@ -1487,6 +1487,23 @@ options include:
 Specifying any of the above options will produce a ``ParserWarning`` unless the
 python engine is selected explicitly using ``engine='python'``.
 
+Reading remote files
+''''''''''''''''''''
+
+You can pass in a URL to a CSV file:
+
+.. code-block:: python
+
+   df = pd.read_csv('https://download.bls.gov/pub/time.series/cu/cu.item',
+                    sep='\t')
+
+S3 URLs are handled as well:
+
+.. code-block:: python
+
+   df = pd.read_csv('s3://pandas-test/tips.csv')
+
+
 Writing out Data
 ''''''''''''''''
 
 
@@ -22,13 +22,15 @@ Performance Improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
 - Improved performance of ``.replace()`` (:issue:`12745`)
+- Improved performance ``Series`` creation with a datetime index and dictionary data (:issue:`14894`)
 
 .. _whatsnew_0192.enhancements.other:
 
 Other Enhancements
 ~~~~~~~~~~~~~~~~~~
 
 - ``pd.merge_asof()`` gained ``left_index``/``right_index`` and ``left_by``/``right_by`` arguments (:issue:`14253`)
+- ``pd.merge_asof()`` can take multiple columns in ``by`` parameter and has specialized dtypes for better performace (:issue:`13936`)
 
 
 
@@ -39,10 +41,13 @@ Bug Fixes
 
 - Compat with ``dateutil==2.6.0``; segfault reported in the testing suite (:issue:`14621`)
 - Allow ``nanoseconds`` in ``Timestamp.replace`` as a kwarg (:issue:`14621`)
+- Bug in ``pd.read_csv`` in which aliasing was being done for ``na_values`` when passed in as a dictionary (:issue:`14203`)
+- Bug in ``pd.read_csv`` in which column indices for a dict-like ``na_values`` were not being respected (:issue:`14203`)
 - Bug in ``pd.read_csv`` where reading files fails, if the number of headers is equal to the number of lines in the file (:issue:`14515`)
 - Bug in ``pd.read_csv`` for the Python engine in which an unhelpful error message was being raised when multi-char delimiters were not being respected with quotes (:issue:`14582`)
 - Fix bugs (:issue:`14734`, :issue:`13654`) in ``pd.read_sas`` and ``pandas.io.sas.sas7bdat.SAS7BDATReader`` that caused problems when reading a SAS file incrementally.
 - Bug in ``pd.read_csv`` for the Python engine in which an unhelpful error message was being raised when ``skipfooter`` was not being respected by Python's CSV library (:issue:`13879`)
+- Bug in ``.fillna()`` in which timezone aware datetime64 values were incorrectly rounded (:issue:`14872`)
 
 
 - Bug in ``.groupby(..., sort=True)`` of a non-lexsorted MultiIndex when grouping with multiple levels (:issue:`14776`)
 
@@ -64,6 +64,27 @@ Strings passed to ``DataFrame.groupby()`` as the ``by`` parameter may now refere
 
    df.groupby(['second', 'A']).sum()
 
+.. _whatsnew_0200.enhancements.compressed_urls:
+
+Better support for compressed URLs in ``read_csv``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The compression code was refactored (:issue:`12688`). As a result, reading
+dataframes from URLs in :func:`read_csv` or :func:`read_table` now supports
+additional compression methods: ``xz``, ``bz2``, and ``zip`` (:issue:`14570`).
+Previously, only ``gzip`` compression was supported. By default, compression of
+URLs and paths are now both inferred using their file extensions. Additionally,
+support for bz2 compression in the python 2 c-engine improved (:issue:`14874`).
+
+.. ipython:: python
+   url = 'https://github.com/{repo}/raw/{branch}/{path}'.format(
+       repo = 'pandas-dev/pandas',
+       branch = 'master',
+       path = 'pandas/io/tests/parser/data/salaries.csv.bz2',
+   )
+   df = pd.read_table(url, compression='infer')  # default, infer compression
+   df = pd.read_table(url, compression='bz2')  # explicitly specify compression
+   df.head(2)
 
 .. _whatsnew_0200.enhancements.other:
 
@@ -85,14 +106,92 @@ Other enhancements
 - ``pd.Series.interpolate`` now supports timedelta as an index type with ``method='time'`` (:issue:`6424`)
 - ``pandas.io.json.json_normalize()`` gained the option ``errors='ignore'|'raise'``; the default is ``errors='raise'`` which is backward compatible. (:issue:`14583`)
 
+- ``.select_dtypes()`` now allows `datetimetz` to generically select datetimes with tz (:issue:`14910`)
+
 
 .. _whatsnew_0200.api_breaking:
 
 Backwards incompatible API changes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-.. _whatsnew_0200.api:
+.. _whatsnew.api_breaking.index_map
+
+Map on Index types now return other Index types
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+-  ``map`` on an ``Index`` now returns an ``Index``, not a numpy array (:issue:`12766`)
+
+  .. ipython:: python
+
+     idx = Index([1, 2])
+     idx
+     mi = MultiIndex.from_tuples([(1, 2), (2, 4)])
+     mi
+
+  Previous Behavior:
+
+  .. code-block:: ipython
+
+     In [5]: idx.map(lambda x: x * 2)
+     Out[5]: array([2, 4])
+
+     In [6]: idx.map(lambda x: (x, x * 2))
+     Out[6]: array([(1, 2), (2, 4)], dtype=object)
+
+     In [7]: mi.map(lambda x: x)
+     Out[7]: array([(1, 2), (2, 4)], dtype=object)
+
+     In [8]: mi.map(lambda x: x[0])
+     Out[8]: array([1, 2])
+
+  New Behavior:
+
+  .. ipython:: python
+
+      idx.map(lambda x: x * 2)
+
+      idx.map(lambda x: (x, x * 2))
 
+      mi.map(lambda x: x)
+
+      mi.map(lambda x: x[0])
+
+
+-  ``map`` on a Series with datetime64 values may return int64 dtypes rather than int32
+
+  .. ipython:: python
+
+    s = Series(date_range('2011-01-02T00:00', '2011-01-02T02:00', freq='H').tz_localize('Asia/Tokyo'))
+    s
+
+  Previous Behavior:
+
+  .. code-block:: ipython
+
+    In [9]: s.map(lambda x: x.hour)
+    Out[9]:
+    0    0
+    1    1
+    2    2
+    dtype: int32
+
+
+  New Behavior:
+
+  .. ipython:: python
+
+    s.map(lambda x: x.hour)
+
+.. _whatsnew_0200.s3:
+
+S3 File Handling
+^^^^^^^^^^^^^^^^
+
+pandas now uses `s3fs <http://s3fs.readthedocs.io/>`_ for handling S3 connections. This shouldn't break
+any code. However, since s3fs is not a required dependency, you will need to install it separately (like boto
+in prior versions of pandas) (:issue:`11915`).
+
+.. _whatsnew_0200.api:
 
 - ``CParserError`` has been renamed to ``ParserError`` in ``pd.read_csv`` and will be removed in the future (:issue:`12665`)
 - ``SparseArray.cumsum()`` and ``SparseSeries.cumsum()`` will now always return ``SparseArray`` and ``SparseSeries`` respectively (:issue:`12855`)
@@ -103,7 +202,6 @@ Backwards incompatible API changes
 Other API Changes
 ^^^^^^^^^^^^^^^^^
 
-
 .. _whatsnew_0200.deprecations:
 
 Deprecations
@@ -144,6 +242,8 @@ Performance Improvements
 Bug Fixes
 ~~~~~~~~~
 
+- Bug in ``TimedeltaIndex`` addition where overflow was being allowed without error (:issue:`14816`)
+- Bug in ``DataFrame`` construction in which unsigned 64-bit integer elements were being converted to objects (:issue:`14881`)
 - Bug in ``astype()`` where ``inf`` values were incorrectly converted to integers. Now raises error now with ``astype()`` for Series and DataFrames (:issue:`14265`)
 
 
@@ -158,5 +258,4 @@ Bug Fixes
 
 
 
-
 - Require at least 0.23 version of cython to avoid problems with character encodings (:issue:`14699`)