Document chunked arrays (#2102)

davidbrochart · web-flow · commit 5b37b8455108 · 2020-11-06T00:43:04.000+01:00
Add xchunked_array documentation
diff --git a/docs/source/api/chunked_array.rst b/docs/source/api/chunked_array.rst
@@ -0,0 +1,13 @@
+.. Copyright (c) 2016, Johan Mabille, Sylvain Corlay and Wolf Vollprecht
+
+   Distributed under the terms of the BSD 3-Clause License.
+
+   The full license is in the file LICENSE, distributed with this software.
+
+chunked_array
+=============
+
+Defined in ``xtensor/xchunked_array.hpp``
+
+.. doxygenfunction:: xt::chunked_array
+   :project: xtensor
diff --git a/docs/source/api/container_index.rst b/docs/source/api/container_index.rst
@@ -18,6 +18,7 @@ xexpression API is actually implemented in ``xstrided_container`` and ``xcontain
    xiterable
    xarray
    xarray_adaptor
+   chunked_array
    xtensor
    xtensor_adaptor
    xfixed
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -87,6 +87,7 @@ for details.
    view
    quickref/iterator
    quickref/manipulation
+   quickref/chunked_arrays
 
 .. toctree::
    :caption: API REFERENCE
diff --git a/docs/source/quickref/basic.rst b/docs/source/quickref/basic.rst
@@ -13,6 +13,7 @@ Tensor types
 - ``xarray<T>``: tensor that can be reshaped to any number of dimensions.
 - ``xtensor<T, N>``: tensor with a number of dimensions set to ``N`` at compile time.
 - ``xtensor_fixed<T, xshape<I, J, K>``: tensor whose shape is fixed at compile time.
+- ``xchunked_array<CS>``: chunked array using the ``CS`` chunk storage.
 
 .. note::
 
@@ -28,7 +29,7 @@ Tensor with dynamic shape:
 
     #include "xarray.hpp"
 
-    xt::xarray<double>::shape_type shape = {2, 3}; 
+    xt::xarray<double>::shape_type shape = {2, 3};
     xt::xarray<double> a0(shape);
     xt::xarray<double> a1(shape, 2.5);
     xt::xarray<double> a2 = {{1., 2., 3.}, {4., 5., 6.}};
@@ -40,12 +41,12 @@ Tensor with static number of dimensions:
 
     #include "xtensor.hpp"
 
-    xt::xtensor<double, 2>::shape_type shape = {2, 3}; 
+    xt::xtensor<double, 2>::shape_type shape = {2, 3};
     xt::xtensor<double, 2> a0(shape);
     xt::xtensor<double, 2> a1(shape, 2.5);
     xt::xtensor<double, 2> a2 = {{1., 2., 3.}, {4., 5., 6.}};
     auto a3 = xt::xtensor<double, 2>::from_shape(shape);
-    
+
 Tensor with fixed shape:
 
 .. code::
@@ -54,6 +55,16 @@ Tensor with fixed shape:
 
     xt::xtensor_fixed<double, xt::xshape<2, 3>> = {{1., 2., 3.}, {4., 5., 6.}};
 
+In-memory chunked tensor with dynamic shape:
+
+.. code::
+
+    #include "xtensor/xchunked_array.hpp"
+
+    std::vector<std::size_t> shape = {10, 10, 10};
+    std::vector<std::size_t> chunk_shape = {2, 3, 4};
+    auto a = xt::chunked_array<double>(shape, chunk_shape);
+
 Output
 ------
 
@@ -234,4 +245,3 @@ The underlying 1D data buffer can be accessed with the ``data`` method:
     a.data()[4] = 8.;
     std::cout << a << std::endl;
     // Outputs {{1., 2., 3.}, {8., 5., 6.}}
-    
diff --git a/docs/source/quickref/chunked_arrays.rst b/docs/source/quickref/chunked_arrays.rst
@@ -0,0 +1,69 @@
+.. Copyright (c) 2016, Johan Mabille, Sylvain Corlay and Wolf Vollprecht
+
+   Distributed under the terms of the BSD 3-Clause License.
+
+   The full license is in the file LICENSE, distributed with this software.
+
+Chunked arrays
+==============
+
+Motivation
+----------
+
+Arrays can be very large and may not fit in memory. In this case, you may not be
+able to use an in-memory array such as an ``xarray``. A solution to this problem
+is to cut up the large array into many small arrays, called chunks. Not only do
+the chunks fit comfortably in memory, but this also allows to process them in
+parallel, including in a distributed environment (although this is not supported
+yet).
+
+Formats for the storage of arrays such as `Zarr <https://zarr.readthedocs.io>`_
+specifically target chunked arrays. Such formats are becoming increasingly
+popular in the field of big data, since the chunks can be stored in the cloud.
+
+In-memory chunked arrays
+------------------------
+
+This may not look very useful at first sight, since each chunk (and thus the
+whole array) is hold in memory. It means that it cannot work with very large
+arrays, but it may be used to parallelize an algorithm, by processing several
+chunks at the same time.
+
+An in-memory chunked array has the following type:
+
+.. code::
+
+    #include "xtensor/xchunked_array.hpp"
+
+    using data_type = double;
+    // don't use this code:
+    using inmemory_chunked_array = xt::xchunked_array<xarray<xarray<data_type>>>;
+
+But you should not directly use this type to create a chunked array. Instead,
+use the `chunked_array` factory function:
+
+.. code::
+
+    #include "xtensor/xchunked_array.hpp"
+
+    std::vector<std::size_t> shape = {10, 10, 10};
+    std::vector<std::size_t> chunk_shape = {2, 3, 4};
+    auto a = xt::chunked_array<double>(shape, chunk_shape);
+    // a is an in-memory chunked array
+    // each chunk is an xarray<double>, and chunks are hold in an xarray
+    // thus a is an xarray of xarray<double> elements
+    a(3, 9, 2) = 1.;  // this will address the chunk of index (1, 3, 0)
+                      // and in this chunk, the element of index (1, 0, 2)
+
+Chunked arrays implement the full semantic of ``xarray``, including lazy
+evaluation.
+
+Stored chunked arrays
+---------------------
+
+These are arrays whose chunks are stored on a file system, allowing for
+persistence of data. In particular, they are used as a building block for the
+`xtensor-zarr <https://github.com/xtensor-stack/xtensor-zarr>`_ library.
+
+For further dedails, please refer to the documentation
+of `xtensor-io <https://xtensor-io.readthedocs.io>`_.
diff --git a/include/xtensor/xchunked_array.hpp b/include/xtensor/xchunked_array.hpp
@@ -220,13 +220,52 @@ namespace xt
     template<class E>
     constexpr bool is_chunked(const xexpression<E>& e);
 
+    /**
+     * Creates an in-memory chunked array.
+     * This function returns an uninitialized ``xchunked_array<xarray<T>>``.
+     *
+     * @tparam T The type of the elements (e.g. double)
+     * @tparam L The layout_type of the array
+     * @tparam EXT The type of the array extension (default: empty_extension)
+     *
+     * @param shape The shape of the array
+     * @param chunk_shape The shape of a chunk
+     * @param chunk_memory_layout The layout of each chunk (default: XTENSOR_DEFAULT_LAYOUT)
+     *
+     * @return returns a ``xchunked_array<xarray<T>>`` with the given shape, chunk shape and memory layout.
+     */
     template <class T, layout_type L = XTENSOR_DEFAULT_LAYOUT, class EXT = empty_extension, class S>
     xchunked_array<xarray<xarray<T>>, EXT> chunked_array(S&& shape, S&& chunk_shape, layout_type chunk_memory_layout = XTENSOR_DEFAULT_LAYOUT);
 
+    /**
+     * Creates an in-memory chunked array.
+     * This function returns a ``xchunked_array<xarray<T>>`` initialized from an expression.
+     *
+     * @tparam L The layout_type of the array
+     * @tparam EXT The type of the array extension (default: empty_extension)
+     *
+     * @param e The expression to initialize the chunked array from
+     * @param chunk_shape The shape of a chunk
+     * @param chunk_memory_layout The layout of each chunk (default: XTENSOR_DEFAULT_LAYOUT)
+     *
+     * @return returns a ``xchunked_array<xarray<T>>`` from the given expression, with the given chunk shape and memory layout.
+     */
     template <layout_type L = XTENSOR_DEFAULT_LAYOUT, class EXT = empty_extension, class E, class S>
     xchunked_array<xarray<xarray<typename E::value_type>>, EXT>
     chunked_array(const xexpression<E>& e, S&& chunk_shape, layout_type chunk_memory_layout = XTENSOR_DEFAULT_LAYOUT);
 
+    /**
+     * Creates an in-memory chunked array.
+     * This function returns a ``xchunked_array<xarray<T>>`` initialized from an expression.
+     *
+     * @tparam L The layout_type of the array
+     * @tparam EXT The type of the array extension (default: empty_extension)
+     *
+     * @param e The expression to initialize the chunked array from
+     * @param chunk_memory_layout The layout of each chunk (default: XTENSOR_DEFAULT_LAYOUT)
+     *
+     * @return returns a ``xchunked_array<xarray<T>>`` from the given expression, with the expression's chunk shape and the given memory layout.
+     */
     template <layout_type L = XTENSOR_DEFAULT_LAYOUT, class EXT = empty_extension, class E>
     xchunked_array<xarray<xarray<typename E::value_type>>, EXT>
     chunked_array(const xexpression<E>&e, layout_type chunk_memory_layout = XTENSOR_DEFAULT_LAYOUT);
@@ -398,7 +437,7 @@ namespace xt
         }
         return this->derived_cast();
     }
-    
+
     template <class D>
     template <class E>
     inline auto xchunked_semantic<D>::operator=(const xexpression<E>& e) -> derived_type&
@@ -407,7 +446,7 @@ namespace xt
         get_assigner(d.chunks()).build_and_assign_temporary(e, d);
         return d;
     }
-    
+
     template <class D>
     template <class CS>
     inline auto xchunked_semantic<D>::get_assigner(const CS&) const -> xchunked_assigner<temporary_type, CS>