diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9d6af3c7b9917..0048488be14fc 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -66,8 +66,6 @@ _chained_assignment_msg, ) from pandas.util._decorators import ( - Appender, - Substitution, deprecate_nonkeyword_arguments, doc, set_module, @@ -153,7 +151,6 @@ ) from pandas.core.generic import ( NDFrame, - make_doc, ) from pandas.core.indexers import check_key_length from pandas.core.indexes.api import ( @@ -200,9 +197,7 @@ format as fmt, ) from pandas.io.formats.info import ( - INFO_DOCSTRING, DataFrameInfo, - frame_sub_kwargs, ) import pandas.plotting @@ -1302,17 +1297,6 @@ def to_string( encoding: str | None = ..., ) -> None: ... - @Substitution( - header_type="bool or list of str", - header="Write out the column names. If a list of columns " - "is given, it is assumed to be aliases for the " - "column names", - col_space_type="int, list or dict of int", - col_space="The minimum width of each column. If a list of ints is given " - "every integers corresponds with one column. If a dict is given, the key " - "references the column, while the value defines the space to use.", - ) - @Substitution(shared_params=fmt.common_docstring, returns=fmt.return_docstring) def to_string( self, buf: FilePath | WriteBuffer[str] | None = None, @@ -1338,7 +1322,65 @@ def to_string( ) -> str | None: """ Render a DataFrame to a console-friendly tabular output. - %(shared_params)s + + Parameters + ---------- + buf : str, Path or StringIO-like, optional, default None + Buffer to write to. If None, the output is returned as a string. + columns : array-like, optional, default None + The subset of columns to write. Writes all columns by default. + col_space : int, list or dict of int, optional + The minimum width of each column. + If a list of ints is given every integers corresponds with one column. + If a dict is given, the key references the column, + while the value defines the space to use. + header : bool or list of str, optional + Write out the column names. If a list of columns is given, + it is assumed to be aliases for the column names. + index : bool, optional, default True + Whether to print index (row) labels. + na_rep : str, optional, default 'NaN' + String representation of ``NaN`` to use. + formatters : list, tuple or dict of one-param. functions, optional + Formatter functions to apply to columns' elements by position or + name. + The result of each function must be a unicode string. + List/tuple must be of length equal to the number of columns. + float_format : one-parameter function, optional, default None + Formatter function to apply to columns' elements if they are + floats. This function must return a unicode string and will be + applied only to the non-``NaN`` elements, with ``NaN`` being + handled by ``na_rep``. + sparsify : bool, optional, default True + Set to False for a DataFrame with a hierarchical index to print + every multiindex key at each row. + index_names : bool, optional, default True + Prints the names of the indexes. + justify : str, default None + How to justify the column labels. If None uses the option from + the print configuration (controlled by set_option), 'right' out + of the box. Valid values are + + * left + * right + * center + * justify + * justify-all + * start + * end + * inherit + * match-parent + * initial + * unset. + max_rows : int, optional + Maximum number of rows to display in the console. + max_cols : int, optional + Maximum number of columns to display in the console. + show_dimensions : bool, default False + Display DataFrame dimensions (number of rows by number of columns). + decimal : str, default '.' + Character recognized as decimal separator, e.g. ',' in Europe. + line_width : int, optional Width to wrap a line in characters. min_rows : int, optional @@ -1348,7 +1390,13 @@ def to_string( Max width to truncate each column in characters. By default, no limit. encoding : str, default "utf-8" Set character encoding. - %(returns)s + + Returns + ------- + str or None + If buf is None, returns the result as a string. Otherwise returns + None. + See Also -------- to_html : Convert DataFrame to HTML. @@ -1358,7 +1406,7 @@ def to_string( >>> d = {"col1": [1, 2, 3], "col2": [4, 5, 6]} >>> df = pd.DataFrame(d) >>> print(df.to_string()) - col1 col2 + col1 col2 0 1 4 1 2 5 2 3 6 @@ -2655,10 +2703,6 @@ def _from_arrays( ) return cls._from_mgr(mgr, axes=mgr.axes) - @doc( - storage_options=_shared_docs["storage_options"], - compression_options=_shared_docs["compression_options"] % "path", - ) def to_stata( self, path: FilePath | WriteBuffer[bytes], @@ -2706,7 +2750,7 @@ def to_stata( variable_labels : dict Dictionary containing columns as keys and variable labels as values. Each label must be 80 characters or smaller. - version : {{114, 117, 118, 119, None}}, default 114 + version : {114, 117, 118, 119, None}, default 114 Version to use in the output dta file. Set to None to let pandas decide between 118 or 119 formats depending on the number of columns in the frame. Version 114 can be read by Stata 10 and @@ -2728,9 +2772,34 @@ def to_stata( format. Only available if version is 117. Storing strings in the StrL format can produce smaller dta files if strings have more than 8 characters and values are repeated. - {compression_options} + compression : str or dict, default 'infer' + For on-the-fly compression of the output data. If 'infer' and 'path' is + path-like, then detect compression from the following extensions: '.gz', + '.bz2', '.zip', '.xz', '.zst', '.tar', '.tar.gz', '.tar.xz' or '.tar.bz2' + (otherwise no compression). + Set to ``None`` for no compression. + Can also be a dict with key ``'method'`` set + to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, + ``'xz'``, ``'tar'``} and other key-value pairs are forwarded to + ``zipfile.ZipFile``, ``gzip.GzipFile``, + ``bz2.BZ2File``, ``zstandard.ZstdCompressor``, ``lzma.LZMAFile`` or + ``tarfile.TarFile``, respectively. + As an example, the following could be passed for faster compression + and to create a reproducible gzip archive: + ``compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1}``. + + .. versionadded:: 1.5.0 + Added support for `.tar` files. - {storage_options} + storage_options : dict, optional + Extra options that make sense for a particular storage connection, e.g. + host, port, username, password, etc. For HTTP(S) URLs the key-value pairs + are forwarded to ``urllib.request.Request`` as header options. For other + URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are + forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more + details, and for more examples on storage options refer `here + `_. value_labels : dict of dicts Dictionary containing columns as keys and dictionaries of column value @@ -2991,7 +3060,6 @@ def to_parquet( **kwargs, ) -> None: ... - @doc(storage_options=_shared_docs["storage_options"]) def to_parquet( self, path: FilePath | WriteBuffer[bytes] | None = None, @@ -3019,7 +3087,7 @@ def to_parquet( object implementing a binary ``write()`` function. If None, the result is returned as bytes. If a string or path, it will be used as Root Directory path when writing a partitioned dataset. - engine : {{'auto', 'pyarrow', 'fastparquet'}}, default 'auto' + engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto' Parquet library to use. If 'auto', then the option ``io.parquet.engine`` is used. The default ``io.parquet.engine`` behavior is to try 'pyarrow', falling back to 'fastparquet' if @@ -3039,7 +3107,15 @@ def to_parquet( Column names by which to partition the dataset. Columns are partitioned in the order they are given. Must be None if path is not a string. - {storage_options} + storage_options : dict, optional + Extra options that make sense for a particular storage connection, e.g. + host, port, username, password, etc. For HTTP(S) URLs the key-value pairs + are forwarded to ``urllib.request.Request`` as header options. For other + URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are + forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more + details, and for more examples on storage options refer `here + `_. filesystem : fsspec or pyarrow filesystem, default None Filesystem object to use when reading the parquet file. Only implemented @@ -3081,7 +3157,7 @@ def to_parquet( Examples -------- - >>> df = pd.DataFrame(data={{"col1": [1, 2], "col2": [3, 4]}}) + >>> df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) >>> df.to_parquet("df.parquet.gzip", compression="gzip") # doctest: +SKIP >>> pd.read_parquet("df.parquet.gzip") # doctest: +SKIP col1 col2 @@ -3289,14 +3365,6 @@ def to_html( encoding: str | None = ..., ) -> str: ... - @Substitution( - header_type="bool", - header="Whether to print column labels, default True", - col_space_type="str or int, list or dict of int or str", - col_space="The minimum width of each column in CSS length " - "units. An int is assumed to be px units.", - ) - @Substitution(shared_params=fmt.common_docstring, returns=fmt.return_docstring) def to_html( self, buf: FilePath | WriteBuffer[str] | None = None, @@ -3326,7 +3394,62 @@ def to_html( ) -> str | None: """ Render a DataFrame as an HTML table. - %(shared_params)s + + Parameters + ---------- + buf : str, Path or StringIO-like, optional, default None + Buffer to write to. If None, the output is returned as a string. + columns : array-like, optional, default None + The subset of columns to write. Writes all columns by default. + col_space : str or int, list or dict of int or str, optional + The minimum width of each column in CSS length units. + An int is assumed to be px units. + header : bool, optional + Whether to print column labels, default True. + index : bool, optional, default True + Whether to print index (row) labels. + na_rep : str, optional, default 'NaN' + String representation of ``NaN`` to use. + formatters : list, tuple or dict of one-param. functions, optional + Formatter functions to apply to columns' elements by position or + name. + The result of each function must be a unicode string. + List/tuple must be of length equal to the number of columns. + float_format : one-parameter function, optional, default None + Formatter function to apply to columns' elements if they are + floats. This function must return a unicode string and will be + applied only to the non-``NaN`` elements, with ``NaN`` being + handled by ``na_rep``. + sparsify : bool, optional, default True + Set to False for a DataFrame with a hierarchical index to print + every multiindex key at each row. + index_names : bool, optional, default True + Prints the names of the indexes. + justify : str, default None + How to justify the column labels. If None uses the option from + the print configuration (controlled by set_option), 'right' out + of the box. Valid values are + + * left + * right + * center + * justify + * justify-all + * start + * end + * inherit + * match-parent + * initial + * unset. + max_rows : int, optional + Maximum number of rows to display in the console. + max_cols : int, optional + Maximum number of columns to display in the console. + show_dimensions : bool, default False + Display DataFrame dimensions (number of rows by number of columns). + decimal : str, default '.' + Character recognized as decimal separator, e.g. ',' in Europe. + bold_rows : bool, default True Make the row labels bold in the output. classes : str or list or tuple, default None @@ -3348,79 +3471,85 @@ def to_html( Convert URLs to HTML links. encoding : str, default "utf-8" Set character encoding. - %(returns)s - See Also - -------- - to_string : Convert DataFrame to a string. - Examples - -------- - >>> df = pd.DataFrame(data={"col1": [1, 2], "col2": [4, 3]}) - >>> html_string = df.to_html() - >>> print(html_string) - - - - - - - - - - - - - - - - - - - - -
col1col2
014
123
- - HTML output - - +----+-----+-----+ - | |col1 |col2 | - +====+=====+=====+ - |0 |1 |4 | - +----+-----+-----+ - |1 |2 |3 | - +----+-----+-----+ + Returns + ------- + str or None + If buf is None, returns the result as a string. Otherwise returns + None. - >>> df = pd.DataFrame(data={"col1": [1, 2], "col2": [4, 3]}) - >>> html_string = df.to_html(index=False) - >>> print(html_string) - - - - - - - - - - - - - - - - - -
col1col2
14
23
- - HTML output - - +-----+-----+ - |col1 |col2 | - +=====+=====+ - |1 |4 | - +-----+-----+ - |2 |3 | - +-----+-----+ + See Also + -------- + to_string : Convert DataFrame to a string. + + Examples + -------- + >>> df = pd.DataFrame(data={"col1": [1, 2], "col2": [4, 3]}) + >>> html_string = df.to_html() + >>> print(html_string) + + + + + + + + + + + + + + + + + + + + +
col1col2
014
123
+ + HTML output + + +----+-----+-----+ + | |col1 |col2 | + +====+=====+=====+ + |0 |1 |4 | + +----+-----+-----+ + |1 |2 |3 | + +----+-----+-----+ + + >>> df = pd.DataFrame(data={"col1": [1, 2], "col2": [4, 3]}) + >>> html_string = df.to_html(index=False) + >>> print(html_string) + + + + + + + + + + + + + + + + + +
col1col2
14
23
+ + HTML output + + +-----+-----+ + |col1 |col2 | + +=====+=====+ + |1 |4 | + +-----+-----+ + |2 |3 | + +-----+-----+ """ if justify is not None and justify not in fmt.VALID_JUSTIFY_PARAMETERS: raise ValueError("Invalid value for justify parameter") @@ -3499,10 +3628,6 @@ def to_xml( storage_options: StorageOptions | None = ..., ) -> None: ... - @doc( - storage_options=_shared_docs["storage_options"], - compression_options=_shared_docs["compression_options"] % "path_or_buffer", - ) def to_xml( self, path_or_buffer: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None, @@ -3555,7 +3680,7 @@ def to_xml( Default namespaces should be given empty string key. For example, :: - namespaces = {{"": "https://example.com"}} + namespaces = {"": "https://example.com"} prefix : str, optional Namespace prefix to be used for every element and/or attribute @@ -3568,7 +3693,7 @@ def to_xml( pretty_print : bool, default True Whether output should be pretty printed with indentation and line breaks. - parser : {{'lxml','etree'}}, default 'lxml' + parser : {'lxml','etree'}, default 'lxml' Parser module to use for building of tree. Only 'lxml' and 'etree' are supported. With 'lxml', the ability to use XSLT stylesheet is supported. @@ -3578,9 +3703,35 @@ def to_xml( layout of elements and attributes from original output. This argument requires ``lxml`` to be installed. Only XSLT 1.0 scripts and not later versions is currently supported. - {compression_options} + compression : str or dict, default 'infer' + For on-the-fly compression of the output data. If 'infer' + and 'path_or_buffer' is path-like, + then detect compression from the following extensions: '.gz', + '.bz2', '.zip', '.xz', '.zst', '.tar', '.tar.gz', '.tar.xz' or '.tar.bz2' + (otherwise no compression). + Set to ``None`` for no compression. + Can also be a dict with key ``'method'`` set + to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``, ``'xz'``, + ``'tar'``} and other key-value pairs are forwarded to + ``zipfile.ZipFile``, ``gzip.GzipFile``, + ``bz2.BZ2File``, ``zstandard.ZstdCompressor``, ``lzma.LZMAFile`` or + ``tarfile.TarFile``, respectively. + As an example, the following could be passed for faster + compression and to create a reproducible gzip archive: + ``compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1}``. + + .. versionadded:: 1.5.0 + Added support for `.tar` files. - {storage_options} + storage_options : dict, optional + Extra options that make sense for a particular storage connection, e.g. + host, port, username, password, etc. For HTTP(S) URLs the key-value pairs + are forwarded to ``urllib.request.Request`` as header options. For other + URLs (e.g. starting with "s3://", and "gcs://") the key-value pairs are + forwarded to ``fsspec.open``. Please see ``fsspec`` and ``urllib`` for more + details, and for more examples on storage options refer `here + `_. Returns ------- @@ -3634,7 +3785,7 @@ def to_xml( >>> df.to_xml( - ... namespaces={{"doc": "https://example.com"}}, prefix="doc" + ... namespaces={"doc": "https://example.com"}, prefix="doc" ... ) # doctest: +SKIP @@ -3658,7 +3809,6 @@ def to_xml( """ - from pandas.io.formats.xml import ( EtreeXMLFormatter, LxmlXMLFormatter, @@ -3760,7 +3910,6 @@ def to_iceberg( ) # ---------------------------------------------------------------------- - @doc(INFO_DOCSTRING, **frame_sub_kwargs) def info( self, verbose: bool | None = None, @@ -3769,6 +3918,149 @@ def info( memory_usage: bool | str | None = None, show_counts: bool | None = None, ) -> None: + """ + Print a concise summary of a DataFrame. + + This method prints information about a DataFrame including + the index dtype and columns, non-NA values and memory usage. + + Parameters + ---------- + verbose : bool, optional + Whether to print the full summary. By default, the setting in + ``pandas.options.display.max_info_columns`` is followed. + buf : writable buffer, defaults to sys.stdout + Where to send the output. By default, the output is printed to + sys.stdout. Pass a writable buffer if you need to further process + the output. + max_cols : int, optional + When to switch from the verbose to the truncated output. If the + DataFrame has more than `max_cols` columns, the truncated output + is used. By default, the setting in + ``pandas.options.display.max_info_columns`` is used. + memory_usage : bool, str, optional + Specifies whether total memory usage of the DataFrame + elements (including the index) should be displayed. By default, + this follows the ``pandas.options.display.memory_usage`` setting. + + True always show memory usage. False never shows memory usage. + A value of 'deep' is equivalent to "True with deep introspection". + Memory usage is shown in human-readable units (base-2 + representation). Without deep introspection a memory estimation is + made based in column dtype and number of rows assuming values + consume the same memory amount for corresponding dtypes. With deep + memory introspection, a real memory usage calculation is performed + at the cost of computational resources. See the + :ref:`Frequently Asked Questions ` for more + details. + show_counts : bool, optional + Whether to show the non-null counts. By default, this is shown + only if the DataFrame is smaller than + ``pandas.options.display.max_info_rows`` and + ``pandas.options.display.max_info_columns``. A value of True always + shows the counts, and False never shows the counts. + + Returns + ------- + None + This method prints a summary of a DataFrame and returns None. + + See Also + -------- + DataFrame.describe: Generate descriptive statistics of DataFrame + columns. + DataFrame.memory_usage: Memory usage of DataFrame columns. + + Examples + -------- + >>> int_values = [1, 2, 3, 4, 5] + >>> text_values = ["alpha", "beta", "gamma", "delta", "epsilon"] + >>> float_values = [0.0, 0.25, 0.5, 0.75, 1.0] + >>> df = pd.DataFrame( + ... { + ... "int_col": int_values, + ... "text_col": text_values, + ... "float_col": float_values, + ... } + ... ) + >>> df + int_col text_col float_col + 0 1 alpha 0.00 + 1 2 beta 0.25 + 2 3 gamma 0.50 + 3 4 delta 0.75 + 4 5 epsilon 1.00 + + Prints information of all columns: + + >>> df.info(verbose=True) + + RangeIndex: 5 entries, 0 to 4 + Data columns (total 3 columns): + # Column Non-Null Count Dtype + --- ------ -------------- ----- + 0 int_col 5 non-null int64 + 1 text_col 5 non-null object + 2 float_col 5 non-null float64 + dtypes: float64(1), int64(1), object(1) + memory usage: 248.0+ bytes + + Prints a summary of columns count and its dtypes but not per column + information: + + >>> df.info(verbose=False) + + RangeIndex: 5 entries, 0 to 4 + Columns: 3 entries, int_col to float_col + dtypes: float64(1), int64(1), object(1) + memory usage: 248.0+ bytes + + Pipe output of DataFrame.info to buffer instead of sys.stdout, get + buffer content and writes to a text file: + + >>> import io + >>> buffer = io.StringIO() + >>> df.info(buf=buffer) + >>> s = buffer.getvalue() + >>> with open("df_info.txt", "w", encoding="utf-8") as f: # doctest: +SKIP + ... f.write(s) + 260 + + The `memory_usage` parameter allows deep introspection mode, specially + useful for big DataFrames and fine-tune memory optimization: + + >>> random_strings_array = np.random.choice(["a", "b", "c"], 10**6) + >>> df = pd.DataFrame( + ... { + ... "column_1": np.random.choice(["a", "b", "c"], 10**6), + ... "column_2": np.random.choice(["a", "b", "c"], 10**6), + ... "column_3": np.random.choice(["a", "b", "c"], 10**6), + ... } + ... ) + >>> df.info() + + RangeIndex: 1000000 entries, 0 to 999999 + Data columns (total 3 columns): + # Column Non-Null Count Dtype + --- ------ -------------- ----- + 0 column_1 1000000 non-null object + 1 column_2 1000000 non-null object + 2 column_3 1000000 non-null object + dtypes: object(3) + memory usage: 22.9+ MB + + >>> df.info(memory_usage="deep") + + RangeIndex: 1000000 entries, 0 to 999999 + Data columns (total 3 columns): + # Column Non-Null Count Dtype + --- ------ -------------- ----- + 0 column_1 1000000 non-null object + 1 column_2 1000000 non-null object + 2 column_3 1000000 non-null object + dtypes: object(3) + memory usage: 165.9 MB + """ info = DataFrameInfo( data=self, memory_usage=memory_usage, @@ -5582,37 +5874,6 @@ def _reindex_multi(self, axes: dict[str, Index], fill_value) -> DataFrame: fill_value=fill_value, ) - @Appender( - """ - Examples - -------- - >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) - - Change the row labels. - - >>> df.set_axis(['a', 'b', 'c'], axis='index') - A B - a 1 4 - b 2 5 - c 3 6 - - Change the column labels. - - >>> df.set_axis(['I', 'II'], axis='columns') - I II - 0 1 4 - 1 2 5 - 2 3 6 - """ - ) - @Substitution( - klass=_shared_doc_kwargs["klass"], - axes_single_arg=_shared_doc_kwargs["axes_single_arg"], - extended_summary_sub=" column or", - axis_description_sub=", and 1 identifies the columns", - see_also_sub=" or columns", - ) - @Appender(NDFrame.set_axis.__doc__) def set_axis( self, labels, @@ -5620,13 +5881,65 @@ def set_axis( axis: Axis = 0, copy: bool | lib.NoDefault = lib.no_default, ) -> DataFrame: + """ + Assign desired index to given axis. + + Indexes for column or row labels can be changed by assigning + a list-like or Index. + + Parameters + ---------- + labels : list-like, Index + The values for the new index. + + axis : {0 or 'index', 1 or 'columns'}, default 0 + The axis to update. The value 0 identifies the rows. For `Series` + this parameter is unused and defaults to 0. + + copy : bool, default False + This keyword is now ignored; changing its value will have no + impact on the method. + + .. deprecated:: 3.0.0 + + This keyword is ignored and will be removed in pandas 4.0. Since + pandas 3.0, this method always returns a new object using a lazy + copy mechanism that defers copies until necessary + (Copy-on-Write). See the `user guide on Copy-on-Write + `__ + for more details. + + Returns + ------- + DataFrame + An object of type DataFrame. + + See Also + -------- + DataFrame.rename_axis : Alter the name of the index or columns. + + Examples + -------- + >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) + + Change the row labels. + + >>> df.set_axis(["a", "b", "c"], axis="index") + A B + a 1 4 + b 2 5 + c 3 6 + + Change the column labels. + + >>> df.set_axis(["I", "II"], axis="columns") + I II + 0 1 4 + 1 2 5 + 2 3 6 + """ return super().set_axis(labels, axis=axis, copy=copy) - @doc( - NDFrame.reindex, - klass=_shared_doc_kwargs["klass"], - optional_reindex=_shared_doc_kwargs["optional_reindex"], - ) def reindex( self, labels=None, @@ -5641,6 +5954,227 @@ def reindex( limit: int | None = None, tolerance=None, ) -> DataFrame: + """ + Conform DataFrame to new index with optional filling logic. + + Places NA/NaN in locations having no value in the previous index. A new object + is produced unless the new index is equivalent to the current one and + ``copy=False``. + + Parameters + ---------- + + labels : array-like, optional + New labels / index to conform the axis specified by 'axis' to. + index : array-like, optional + New labels for the index. Preferably an Index object to avoid + duplicating data. + columns : array-like, optional + New labels for the columns. Preferably an Index object to avoid + duplicating data. + axis : int or str, optional + Axis to target. Can be either the axis name ('index', 'columns') + or number (0, 1). + method : {None, 'backfill'/'bfill', 'pad'/'ffill', 'nearest'} + Method to use for filling holes in reindexed DataFrame. + Please note: this is only applicable to DataFrames/Series with a + monotonically increasing/decreasing index. + + * None (default): don't fill gaps + * pad / ffill: Propagate last valid observation forward to next + valid. + * backfill / bfill: Use next valid observation to fill gap. + * nearest: Use nearest valid observations to fill gap. + + copy : bool, default False + This keyword is now ignored; changing its value will have no + impact on the method. + + .. deprecated:: 3.0.0 + + This keyword is ignored and will be removed in pandas 4.0. Since + pandas 3.0, this method always returns a new object using a lazy + copy mechanism that defers copies until necessary + (Copy-on-Write). See the `user guide on Copy-on-Write + `__ + for more details. + + level : int or name + Broadcast across a level, matching Index values on the + passed MultiIndex level. + fill_value : scalar, default np.nan + Value to use for missing values. Defaults to NaN, but can be any + "compatible" value. + limit : int, default None + Maximum number of consecutive elements to forward or backward fill. + tolerance : optional + Maximum distance between original and new labels for inexact + matches. The values of the index at the matching locations most + satisfy the equation ``abs(index[indexer] - target) <= tolerance``. + + Tolerance may be a scalar value, which applies the same tolerance + to all values, or list-like, which applies variable tolerance per + element. List-like includes list, tuple, array, Series, and must be + the same size as the index and its dtype must exactly match the + index's type. + + Returns + ------- + DataFrame + DataFrame with changed index. + + See Also + -------- + DataFrame.set_index : Set row labels. + DataFrame.reset_index : Remove row labels or move them to new columns. + DataFrame.reindex_like : Change to same indices as other DataFrame. + + Examples + -------- + ``DataFrame.reindex`` supports two calling conventions + + * ``(index=index_labels, columns=column_labels, ...)`` + * ``(labels, axis={'index', 'columns'}, ...)`` + + We *highly* recommend using keyword arguments to clarify your + intent. + + Create a DataFrame with some fictional data. + + >>> index = ["Firefox", "Chrome", "Safari", "IE10", "Konqueror"] + >>> columns = ["http_status", "response_time"] + >>> df = pd.DataFrame( + ... [[200, 0.04], [200, 0.02], [404, 0.07], [404, 0.08], [301, 1.0]], + ... columns=columns, + ... index=index, + ... ) + >>> df + http_status response_time + Firefox 200 0.04 + Chrome 200 0.02 + Safari 404 0.07 + IE10 404 0.08 + Konqueror 301 1.00 + + Create a new index and reindex the DataFrame. By default + values in the new index that do not have corresponding + records in the DataFrame are assigned ``NaN``. + + >>> new_index = ["Safari", "Iceweasel", "Comodo Dragon", "IE10", "Chrome"] + >>> df.reindex(new_index) + http_status response_time + Safari 404.0 0.07 + Iceweasel NaN NaN + Comodo Dragon NaN NaN + IE10 404.0 0.08 + Chrome 200.0 0.02 + + We can fill in the missing values by passing a value to + the keyword ``fill_value``. Because the index is not monotonically + increasing or decreasing, we cannot use arguments to the keyword + ``method`` to fill the ``NaN`` values. + + >>> df.reindex(new_index, fill_value=0) + http_status response_time + Safari 404 0.07 + Iceweasel 0 0.00 + Comodo Dragon 0 0.00 + IE10 404 0.08 + Chrome 200 0.02 + + >>> df.reindex(new_index, fill_value="missing") + http_status response_time + Safari 404 0.07 + Iceweasel missing missing + Comodo Dragon missing missing + IE10 404 0.08 + Chrome 200 0.02 + + We can also reindex the columns. + + >>> df.reindex(columns=["http_status", "user_agent"]) + http_status user_agent + Firefox 200 NaN + Chrome 200 NaN + Safari 404 NaN + IE10 404 NaN + Konqueror 301 NaN + + Or we can use "axis-style" keyword arguments + + >>> df.reindex(["http_status", "user_agent"], axis="columns") + http_status user_agent + Firefox 200 NaN + Chrome 200 NaN + Safari 404 NaN + IE10 404 NaN + Konqueror 301 NaN + + To further illustrate the filling functionality in + ``reindex``, we will create a DataFrame with a + monotonically increasing index (for example, a sequence + of dates). + + >>> date_index = pd.date_range("1/1/2010", periods=6, freq="D") + >>> df2 = pd.DataFrame( + ... {"prices": [100, 101, np.nan, 100, 89, 88]}, index=date_index + ... ) + >>> df2 + prices + 2010-01-01 100.0 + 2010-01-02 101.0 + 2010-01-03 NaN + 2010-01-04 100.0 + 2010-01-05 89.0 + 2010-01-06 88.0 + + Suppose we decide to expand the DataFrame to cover a wider + date range. + + >>> date_index2 = pd.date_range("12/29/2009", periods=10, freq="D") + >>> df2.reindex(date_index2) + prices + 2009-12-29 NaN + 2009-12-30 NaN + 2009-12-31 NaN + 2010-01-01 100.0 + 2010-01-02 101.0 + 2010-01-03 NaN + 2010-01-04 100.0 + 2010-01-05 89.0 + 2010-01-06 88.0 + 2010-01-07 NaN + + The index entries that did not have a value in the original data frame + (for example, '2009-12-29') are by default filled with ``NaN``. + If desired, we can fill in the missing values using one of several + options. + + For example, to back-propagate the last valid value to fill the ``NaN`` + values, pass ``bfill`` as an argument to the ``method`` keyword. + + >>> df2.reindex(date_index2, method="bfill") + prices + 2009-12-29 100.0 + 2009-12-30 100.0 + 2009-12-31 100.0 + 2010-01-01 100.0 + 2010-01-02 101.0 + 2010-01-03 NaN + 2010-01-04 100.0 + 2010-01-05 89.0 + 2010-01-06 88.0 + 2010-01-07 NaN + + Please note that the ``NaN`` value present in the original DataFrame + (at index value 2010-01-03) will not be filled by any of the + value propagation schemes. This is because filling while reindexing + does not look at DataFrame values, but only compares the original and + desired indexes. If you do want to fill in the ``NaN`` values present + in the original DataFrame, use the ``fillna()`` method. + + See the :ref:`user guide ` for more. + """ return super().reindex( labels=labels, index=index, @@ -6129,7 +6663,6 @@ def _replace_columnwise( return res if inplace else res.__finalize__(self) - @doc(NDFrame.shift, klass=_shared_doc_kwargs["klass"]) def shift( self, periods: int | Sequence[int] = 1, @@ -6138,6 +6671,120 @@ def shift( fill_value: Hashable = lib.no_default, suffix: str | None = None, ) -> DataFrame: + """ + Shift index by desired number of periods with an optional time `freq`. + + When `freq` is not passed, shift the index without realigning the data. + If `freq` is passed (in this case, the index must be date or datetime, + or it will raise a `NotImplementedError`), the index will be + increased using the periods and the `freq`. `freq` can be inferred + when specified as "infer" as long as either freq or inferred_freq + attribute is set in the index. + + Parameters + ---------- + periods : int or Sequence + Number of periods to shift. Can be positive or negative. + If an iterable of ints, the data will be shifted once by each int. + This is equivalent to shifting by one value at a time and + concatenating all resulting frames. The resulting columns will have + the shift suffixed to their column names. For multiple periods, + axis must not be 1. + freq : DateOffset, tseries.offsets, timedelta, or str, optional + Offset to use from the tseries module or time rule (e.g. 'EOM'). + If `freq` is specified then the index values are shifted but the + data is not realigned. That is, use `freq` if you would like to + extend the index when shifting and preserve the original data. + If `freq` is specified as "infer" then it will be inferred from + the freq or inferred_freq attributes of the index. If neither of + those attributes exist, a ValueError is thrown. + axis : {0 or 'index', 1 or 'columns', None}, default None + Shift direction. For `Series` this parameter is unused and defaults to 0. + fill_value : object, optional + The scalar value to use for newly introduced missing values. + the default depends on the dtype of `self`. + For Boolean and numeric NumPy data types, ``np.nan`` is used. + For datetime, timedelta, or period data, etc. :attr:`NaT` is used. + For extension dtypes, ``self.dtype.na_value`` is used. + suffix : str, optional + If str and periods is an iterable, this is added after the column + name and before the shift value for each shifted column name. + For `Series` this parameter is unused and defaults to `None`. + + Returns + ------- + DataFrame + Copy of input object, shifted. + + See Also + -------- + Index.shift : Shift values of Index. + DatetimeIndex.shift : Shift values of DatetimeIndex. + PeriodIndex.shift : Shift values of PeriodIndex. + + Examples + -------- + >>> df = pd.DataFrame( + ... [[10, 13, 17], [20, 23, 27], [15, 18, 22], [30, 33, 37], [45, 48, 52]], + ... columns=["Col1", "Col2", "Col3"], + ... index=pd.date_range("2020-01-01", "2020-01-05"), + ... ) + >>> df + Col1 Col2 Col3 + 2020-01-01 10 13 17 + 2020-01-02 20 23 27 + 2020-01-03 15 18 22 + 2020-01-04 30 33 37 + 2020-01-05 45 48 52 + + >>> df.shift(periods=3) + Col1 Col2 Col3 + 2020-01-01 NaN NaN NaN + 2020-01-02 NaN NaN NaN + 2020-01-03 NaN NaN NaN + 2020-01-04 10.0 13.0 17.0 + 2020-01-05 20.0 23.0 27.0 + + >>> df.shift(periods=1, axis="columns") + Col1 Col2 Col3 + 2020-01-01 NaN 10 13 + 2020-01-02 NaN 20 23 + 2020-01-03 NaN 15 18 + 2020-01-04 NaN 30 33 + 2020-01-05 NaN 45 48 + + >>> df.shift(periods=3, fill_value=0) + Col1 Col2 Col3 + 2020-01-01 0 0 0 + 2020-01-02 0 0 0 + 2020-01-03 0 0 0 + 2020-01-04 10 13 17 + 2020-01-05 20 23 27 + + >>> df.shift(periods=3, freq="D") + Col1 Col2 Col3 + 2020-01-04 10 13 17 + 2020-01-05 20 23 27 + 2020-01-06 15 18 22 + 2020-01-07 30 33 37 + 2020-01-08 45 48 52 + + >>> df.shift(periods=3, freq="infer") + Col1 Col2 Col3 + 2020-01-04 10 13 17 + 2020-01-05 20 23 27 + 2020-01-06 15 18 22 + 2020-01-07 30 33 37 + 2020-01-08 45 48 52 + + >>> df["Col1"].shift(periods=[0, 1, 2]) + Col1_0 Col1_1 Col1_2 + 2020-01-01 10 NaN NaN + 2020-01-02 20 10.0 NaN + 2020-01-03 15 20.0 10.0 + 2020-01-04 30 15.0 20.0 + 2020-01-05 45 30.0 15.0 + """ if freq is not None and fill_value is not lib.no_default: # GH#53832 raise ValueError( @@ -6790,8 +7437,75 @@ class max type # ---------------------------------------------------------------------- # Reindex-based selection methods - @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) def isna(self) -> DataFrame: + """ + Detect missing values. + + Return a boolean same-sized object indicating if the values are NA. + NA values, such as None or :attr:`numpy.NaN`, gets mapped to True + values. + Everything else gets mapped to False values. Characters such as empty + strings ``''`` or :attr:`numpy.inf` are not considered NA values. + + Returns + ------- + Series/DataFrame + Mask of bool values for each element in Series/DataFrame + that indicates whether an element is an NA value. + + See Also + -------- + Series.isnull : Alias of isna. + DataFrame.isnull : Alias of isna. + Series.notna : Boolean inverse of isna. + DataFrame.notna : Boolean inverse of isna. + Series.dropna : Omit axes labels with missing values. + DataFrame.dropna : Omit axes labels with missing values. + isna : Top-level isna. + + Examples + -------- + Show which entries in a DataFrame are NA. + + >>> df = pd.DataFrame( + ... dict( + ... age=[5, 6, np.nan], + ... born=[ + ... pd.NaT, + ... pd.Timestamp("1939-05-27"), + ... pd.Timestamp("1940-04-25"), + ... ], + ... name=["Alfred", "Batman", ""], + ... toy=[None, "Batmobile", "Joker"], + ... ) + ... ) + >>> df + age born name toy + 0 5.0 NaT Alfred NaN + 1 6.0 1939-05-27 Batman Batmobile + 2 NaN 1940-04-25 Joker + + >>> df.isna() + age born name toy + 0 False True False True + 1 False False False False + 2 True False False False + + Show which entries in a Series are NA. + + >>> ser = pd.Series([5, 6, np.nan]) + >>> ser + 0 5.0 + 1 6.0 + 2 NaN + dtype: float64 + + >>> ser.isna() + 0 False + 1 False + 2 True + dtype: bool + """ res_mgr = self._mgr.isna(func=isna) result = self._constructor_from_mgr(res_mgr, axes=res_mgr.axes) return result.__finalize__(self, method="isna") @@ -6803,8 +7517,75 @@ def isnull(self) -> DataFrame: """ return self.isna() - @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"]) def notna(self) -> DataFrame: + """ + Detect existing (non-missing) values. + + Return a boolean same-sized object indicating if the values are not NA. + Non-missing values get mapped to True. Characters such as empty + strings ``''`` or :attr:`numpy.inf` are not considered NA values. + NA values, such as None or :attr:`numpy.NaN`, get mapped to False + values. + + Returns + ------- + Series/DataFrame + Mask of bool values for each element in Series/DataFrame + that indicates whether an element is not an NA value. + + See Also + -------- + Series.notnull : Alias of notna. + DataFrame.notnull : Alias of notna. + Series.isna : Boolean inverse of notna. + DataFrame.isna : Boolean inverse of notna. + Series.dropna : Omit axes labels with missing values. + DataFrame.dropna : Omit axes labels with missing values. + notna : Top-level notna. + + Examples + -------- + Show which entries in a DataFrame are not NA. + + >>> df = pd.DataFrame( + ... dict( + ... age=[5, 6, np.nan], + ... born=[ + ... pd.NaT, + ... pd.Timestamp("1939-05-27"), + ... pd.Timestamp("1940-04-25"), + ... ], + ... name=["Alfred", "Batman", ""], + ... toy=[None, "Batmobile", "Joker"], + ... ) + ... ) + >>> df + age born name toy + 0 5.0 NaT Alfred NaN + 1 6.0 1939-05-27 Batman Batmobile + 2 NaN 1940-04-25 Joker + + >>> df.notna() + age born name toy + 0 True False True False + 1 True True True True + 2 False True True True + + Show which entries in a Series are not NA. + + >>> ser = pd.Series([5, 6, np.nan]) + >>> ser + 0 5.0 + 1 6.0 + 2 NaN + dtype: float64 + + >>> ser.notna() + 0 True + 1 True + 2 False + dtype: bool + """ return ~self.isna() @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"]) @@ -8743,86 +9524,2347 @@ def _flex_cmp_method(self, other, op, *, axis: Axis = "columns", level=None): new_data = self._dispatch_frame_op(other, op, axis=axis) return self._construct_result(new_data, other=other) - @Appender(ops.make_flex_doc("eq", "dataframe")) def eq(self, other, axis: Axis = "columns", level=None) -> DataFrame: - return self._flex_cmp_method(other, operator.eq, axis=axis, level=level) + """ + Get Not equal to of dataframe and other, element-wise (binary operator `eq`). - @Appender(ops.make_flex_doc("ne", "dataframe")) - def ne(self, other, axis: Axis = "columns", level=None) -> DataFrame: - return self._flex_cmp_method(other, operator.ne, axis=axis, level=level) + Among flexible wrappers (`eq`, `ne`, `le`, `lt`, `ge`, `gt`) to comparison + operators. - @Appender(ops.make_flex_doc("le", "dataframe")) - def le(self, other, axis: Axis = "columns", level=None) -> DataFrame: - return self._flex_cmp_method(other, operator.le, axis=axis, level=level) + Equivalent to `==`, `!=`, `<=`, `<`, `>=`, `>` with support to choose axis + (rows or columns) and level for comparison. - @Appender(ops.make_flex_doc("lt", "dataframe")) - def lt(self, other, axis: Axis = "columns", level=None) -> DataFrame: + Parameters + ---------- + other : scalar, sequence, Series, or DataFrame + Any single or multiple element data structure, or list-like object. + axis : {0 or 'index', 1 or 'columns'}, default 'columns' + Whether to compare by the index (0 or 'index') or columns + (1 or 'columns'). + level : int or label + Broadcast across a level, matching Index values on the passed + MultiIndex level. + + Returns + ------- + DataFrame of bool + Result of the comparison. + + See Also + -------- + DataFrame.eq : Compare DataFrames for equality elementwise. + DataFrame.ne : Compare DataFrames for inequality elementwise. + DataFrame.le : Compare DataFrames for less than inequality + or equality elementwise. + DataFrame.lt : Compare DataFrames for strictly less than + inequality elementwise. + DataFrame.ge : Compare DataFrames for greater than inequality + or equality elementwise. + DataFrame.gt : Compare DataFrames for strictly greater than + inequality elementwise. + + Notes + ----- + Mismatched indices will be unioned together. + `NaN` values are considered different (i.e. `NaN` != `NaN`). + + Examples + -------- + >>> df = pd.DataFrame( + ... {"cost": [250, 150, 100], "revenue": [100, 250, 300]}, + ... index=["A", "B", "C"], + ... ) + >>> df + cost revenue + A 250 100 + B 150 250 + C 100 300 + + Comparison with a scalar, using either the operator or method: + + >>> df == 100 + cost revenue + A False True + B False False + C True False + + >>> df.eq(100) + cost revenue + A False True + B False False + C True False + + When `other` is a :class:`Series`, the columns of a DataFrame are aligned + with the index of `other` and broadcast: + + >>> df != pd.Series([100, 250], index=["cost", "revenue"]) + cost revenue + A True True + B True False + C False True + + Use the method to control the broadcast axis: + + >>> df.ne(pd.Series([100, 300], index=["A", "D"]), axis="index") + cost revenue + A True False + B True True + C True True + D True True + + When comparing to an arbitrary sequence, the number of columns must + match the number elements in `other`: + + >>> df == [250, 100] + cost revenue + A True True + B False False + C False False + + Use the method to control the axis: + + >>> df.eq([250, 250, 100], axis="index") + cost revenue + A True False + B False True + C True False + + Compare to a DataFrame of different shape. + + >>> other = pd.DataFrame( + ... {"revenue": [300, 250, 100, 150]}, index=["A", "B", "C", "D"] + ... ) + >>> other + revenue + A 300 + B 250 + C 100 + D 150 + + >>> df.gt(other) + cost revenue + A False False + B False False + C False True + D False False + + Compare to a MultiIndex by level. + + >>> df_multindex = pd.DataFrame( + ... { + ... "cost": [250, 150, 100, 150, 300, 220], + ... "revenue": [100, 250, 300, 200, 175, 225], + ... }, + ... index=[ + ... ["Q1", "Q1", "Q1", "Q2", "Q2", "Q2"], + ... ["A", "B", "C", "A", "B", "C"], + ... ], + ... ) + >>> df_multindex + cost revenue + Q1 A 250 100 + B 150 250 + C 100 300 + Q2 A 150 200 + B 300 175 + C 220 225 + + >>> df.le(df_multindex, level=1) + cost revenue + Q1 A True True + B True True + C True True + Q2 A False True + B True False + C True False + """ + return self._flex_cmp_method(other, operator.eq, axis=axis, level=level) + + def ne(self, other, axis: Axis = "columns", level=None) -> DataFrame: + """ + Get Not equal to of dataframe and other, element-wise (binary operator `ne`). + + Among flexible wrappers (`eq`, `ne`, `le`, `lt`, `ge`, `gt`) to comparison + operators. + + Equivalent to `==`, `!=`, `<=`, `<`, `>=`, `>` with support to choose axis + (rows or columns) and level for comparison. + + Parameters + ---------- + other : scalar, sequence, Series, or DataFrame + Any single or multiple element data structure, or list-like object. + axis : {0 or 'index', 1 or 'columns'}, default 'columns' + Whether to compare by the index (0 or 'index') or columns + (1 or 'columns'). + level : int or label + Broadcast across a level, matching Index values on the passed + MultiIndex level. + + Returns + ------- + DataFrame of bool + Result of the comparison. + + See Also + -------- + DataFrame.eq : Compare DataFrames for equality elementwise. + DataFrame.ne : Compare DataFrames for inequality elementwise. + DataFrame.le : Compare DataFrames for less than inequality + or equality elementwise. + DataFrame.lt : Compare DataFrames for strictly less than + inequality elementwise. + DataFrame.ge : Compare DataFrames for greater than inequality + or equality elementwise. + DataFrame.gt : Compare DataFrames for strictly greater than + inequality elementwise. + + Notes + ----- + Mismatched indices will be unioned together. + `NaN` values are considered different (i.e. `NaN` != `NaN`). + + Examples + -------- + >>> df = pd.DataFrame( + ... {"cost": [250, 150, 100], "revenue": [100, 250, 300]}, + ... index=["A", "B", "C"], + ... ) + >>> df + cost revenue + A 250 100 + B 150 250 + C 100 300 + + Comparison with a scalar, using either the operator or method: + + >>> df == 100 + cost revenue + A False True + B False False + C True False + + >>> df.eq(100) + cost revenue + A False True + B False False + C True False + + When `other` is a :class:`Series`, the columns of a DataFrame are aligned + with the index of `other` and broadcast: + + >>> df != pd.Series([100, 250], index=["cost", "revenue"]) + cost revenue + A True True + B True False + C False True + + Use the method to control the broadcast axis: + + >>> df.ne(pd.Series([100, 300], index=["A", "D"]), axis="index") + cost revenue + A True False + B True True + C True True + D True True + + When comparing to an arbitrary sequence, the number of columns must + match the number elements in `other`: + + >>> df == [250, 100] + cost revenue + A True True + B False False + C False False + + Use the method to control the axis: + + >>> df.eq([250, 250, 100], axis="index") + cost revenue + A True False + B False True + C True False + + Compare to a DataFrame of different shape. + + >>> other = pd.DataFrame( + ... {"revenue": [300, 250, 100, 150]}, index=["A", "B", "C", "D"] + ... ) + >>> other + revenue + A 300 + B 250 + C 100 + D 150 + + >>> df.gt(other) + cost revenue + A False False + B False False + C False True + D False False + + Compare to a MultiIndex by level. + + >>> df_multindex = pd.DataFrame( + ... { + ... "cost": [250, 150, 100, 150, 300, 220], + ... "revenue": [100, 250, 300, 200, 175, 225], + ... }, + ... index=[ + ... ["Q1", "Q1", "Q1", "Q2", "Q2", "Q2"], + ... ["A", "B", "C", "A", "B", "C"], + ... ], + ... ) + >>> df_multindex + cost revenue + Q1 A 250 100 + B 150 250 + C 100 300 + Q2 A 150 200 + B 300 175 + C 220 225 + + >>> df.le(df_multindex, level=1) + cost revenue + Q1 A True True + B True True + C True True + Q2 A False True + B True False + C True False + """ + return self._flex_cmp_method(other, operator.ne, axis=axis, level=level) + + def le(self, other, axis: Axis = "columns", level=None) -> DataFrame: + """ + Binary operator `le`. + + Get Greater than or equal to of dataframe and other,element-wise. + + Among flexible wrappers (`eq`, `ne`, `le`, `lt`, `ge`, `gt`) to comparison + operators. + + Equivalent to `==`, `!=`, `<=`, `<`, `>=`, `>` with support to choose axis + (rows or columns) and level for comparison. + + Parameters + ---------- + other : scalar, sequence, Series, or DataFrame + Any single or multiple element data structure, or list-like object. + axis : {0 or 'index', 1 or 'columns'}, default 'columns' + Whether to compare by the index (0 or 'index') or columns + (1 or 'columns'). + level : int or label + Broadcast across a level, matching Index values on the passed + MultiIndex level. + + Returns + ------- + DataFrame of bool + Result of the comparison. + + See Also + -------- + DataFrame.eq : Compare DataFrames for equality elementwise. + DataFrame.ne : Compare DataFrames for inequality elementwise. + DataFrame.le : Compare DataFrames for less than inequality + or equality elementwise. + DataFrame.lt : Compare DataFrames for strictly less than + inequality elementwise. + DataFrame.ge : Compare DataFrames for greater than inequality + or equality elementwise. + DataFrame.gt : Compare DataFrames for strictly greater than + inequality elementwise. + + Notes + ----- + Mismatched indices will be unioned together. + `NaN` values are considered different (i.e. `NaN` != `NaN`). + + Examples + -------- + >>> df = pd.DataFrame( + ... {"cost": [250, 150, 100], "revenue": [100, 250, 300]}, + ... index=["A", "B", "C"], + ... ) + >>> df + cost revenue + A 250 100 + B 150 250 + C 100 300 + + Comparison with a scalar, using either the operator or method: + + >>> df == 100 + cost revenue + A False True + B False False + C True False + + >>> df.eq(100) + cost revenue + A False True + B False False + C True False + + When `other` is a :class:`Series`, the columns of a DataFrame are aligned + with the index of `other` and broadcast: + + >>> df != pd.Series([100, 250], index=["cost", "revenue"]) + cost revenue + A True True + B True False + C False True + + Use the method to control the broadcast axis: + + >>> df.ne(pd.Series([100, 300], index=["A", "D"]), axis="index") + cost revenue + A True False + B True True + C True True + D True True + + When comparing to an arbitrary sequence, the number of columns must + match the number elements in `other`: + + >>> df == [250, 100] + cost revenue + A True True + B False False + C False False + + Use the method to control the axis: + + >>> df.eq([250, 250, 100], axis="index") + cost revenue + A True False + B False True + C True False + + Compare to a DataFrame of different shape. + + >>> other = pd.DataFrame( + ... {"revenue": [300, 250, 100, 150]}, index=["A", "B", "C", "D"] + ... ) + >>> other + revenue + A 300 + B 250 + C 100 + D 150 + + >>> df.gt(other) + cost revenue + A False False + B False False + C False True + D False False + + Compare to a MultiIndex by level. + + >>> df_multindex = pd.DataFrame( + ... { + ... "cost": [250, 150, 100, 150, 300, 220], + ... "revenue": [100, 250, 300, 200, 175, 225], + ... }, + ... index=[ + ... ["Q1", "Q1", "Q1", "Q2", "Q2", "Q2"], + ... ["A", "B", "C", "A", "B", "C"], + ... ], + ... ) + >>> df_multindex + cost revenue + Q1 A 250 100 + B 150 250 + C 100 300 + Q2 A 150 200 + B 300 175 + C 220 225 + + >>> df.le(df_multindex, level=1) + cost revenue + Q1 A True True + B True True + C True True + Q2 A False True + B True False + C True False + """ + return self._flex_cmp_method(other, operator.le, axis=axis, level=level) + + def lt(self, other, axis: Axis = "columns", level=None) -> DataFrame: + """ + Get Greater than of dataframe and other, element-wise (binary operator `lt`). + + Among flexible wrappers (`eq`, `ne`, `le`, `lt`, `ge`, `gt`) to comparison + operators. + + Equivalent to `==`, `!=`, `<=`, `<`, `>=`, `>` with support to choose axis + (rows or columns) and level for comparison. + + Parameters + ---------- + other : scalar, sequence, Series, or DataFrame + Any single or multiple element data structure, or list-like object. + axis : {0 or 'index', 1 or 'columns'}, default 'columns' + Whether to compare by the index (0 or 'index') or columns + (1 or 'columns'). + level : int or label + Broadcast across a level, matching Index values on the passed + MultiIndex level. + + Returns + ------- + DataFrame of bool + Result of the comparison. + + See Also + -------- + DataFrame.eq : Compare DataFrames for equality elementwise. + DataFrame.ne : Compare DataFrames for inequality elementwise. + DataFrame.le : Compare DataFrames for less than inequality + or equality elementwise. + DataFrame.lt : Compare DataFrames for strictly less than + inequality elementwise. + DataFrame.ge : Compare DataFrames for greater than inequality + or equality elementwise. + DataFrame.gt : Compare DataFrames for strictly greater than + inequality elementwise. + + Notes + ----- + Mismatched indices will be unioned together. + `NaN` values are considered different (i.e. `NaN` != `NaN`). + + Examples + -------- + >>> df = pd.DataFrame( + ... {"cost": [250, 150, 100], "revenue": [100, 250, 300]}, + ... index=["A", "B", "C"], + ... ) + >>> df + cost revenue + A 250 100 + B 150 250 + C 100 300 + + Comparison with a scalar, using either the operator or method: + + >>> df == 100 + cost revenue + A False True + B False False + C True False + + >>> df.eq(100) + cost revenue + A False True + B False False + C True False + + When `other` is a :class:`Series`, the columns of a DataFrame are aligned + with the index of `other` and broadcast: + + >>> df != pd.Series([100, 250], index=["cost", "revenue"]) + cost revenue + A True True + B True False + C False True + + Use the method to control the broadcast axis: + + >>> df.ne(pd.Series([100, 300], index=["A", "D"]), axis="index") + cost revenue + A True False + B True True + C True True + D True True + + When comparing to an arbitrary sequence, the number of columns must + match the number elements in `other`: + + >>> df == [250, 100] + cost revenue + A True True + B False False + C False False + + Use the method to control the axis: + + >>> df.eq([250, 250, 100], axis="index") + cost revenue + A True False + B False True + C True False + + Compare to a DataFrame of different shape. + + >>> other = pd.DataFrame( + ... {"revenue": [300, 250, 100, 150]}, index=["A", "B", "C", "D"] + ... ) + >>> other + revenue + A 300 + B 250 + C 100 + D 150 + + >>> df.gt(other) + cost revenue + A False False + B False False + C False True + D False False + + Compare to a MultiIndex by level. + + >>> df_multindex = pd.DataFrame( + ... { + ... "cost": [250, 150, 100, 150, 300, 220], + ... "revenue": [100, 250, 300, 200, 175, 225], + ... }, + ... index=[ + ... ["Q1", "Q1", "Q1", "Q2", "Q2", "Q2"], + ... ["A", "B", "C", "A", "B", "C"], + ... ], + ... ) + >>> df_multindex + cost revenue + Q1 A 250 100 + B 150 250 + C 100 300 + Q2 A 150 200 + B 300 175 + C 220 225 + + >>> df.le(df_multindex, level=1) + cost revenue + Q1 A True True + B True True + C True True + Q2 A False True + B True False + C True False + """ return self._flex_cmp_method(other, operator.lt, axis=axis, level=level) - @Appender(ops.make_flex_doc("ge", "dataframe")) def ge(self, other, axis: Axis = "columns", level=None) -> DataFrame: + """ + Binary operator `ge`. + + Get Greater than or equal to of dataframe and other,element-wise. + + Among flexible wrappers (`eq`, `ne`, `le`, `lt`, `ge`, `gt`) to comparison + operators. + + Equivalent to `==`, `!=`, `<=`, `<`, `>=`, `>` with support to choose axis + (rows or columns) and level for comparison. + + Parameters + ---------- + other : scalar, sequence, Series, or DataFrame + Any single or multiple element data structure, or list-like object. + axis : {0 or 'index', 1 or 'columns'}, default 'columns' + Whether to compare by the index (0 or 'index') or columns + (1 or 'columns'). + level : int or label + Broadcast across a level, matching Index values on the passed + MultiIndex level. + + Returns + ------- + DataFrame of bool + Result of the comparison. + + See Also + -------- + DataFrame.eq : Compare DataFrames for equality elementwise. + DataFrame.ne : Compare DataFrames for inequality elementwise. + DataFrame.le : Compare DataFrames for less than inequality + or equality elementwise. + DataFrame.lt : Compare DataFrames for strictly less than + inequality elementwise. + DataFrame.ge : Compare DataFrames for greater than inequality + or equality elementwise. + DataFrame.gt : Compare DataFrames for strictly greater than + inequality elementwise. + + Notes + ----- + Mismatched indices will be unioned together. + `NaN` values are considered different (i.e. `NaN` != `NaN`). + + Examples + -------- + >>> df = pd.DataFrame( + ... {"cost": [250, 150, 100], "revenue": [100, 250, 300]}, + ... index=["A", "B", "C"], + ... ) + >>> df + cost revenue + A 250 100 + B 150 250 + C 100 300 + + Comparison with a scalar, using either the operator or method: + + >>> df == 100 + cost revenue + A False True + B False False + C True False + + >>> df.eq(100) + cost revenue + A False True + B False False + C True False + + When `other` is a :class:`Series`, the columns of a DataFrame are aligned + with the index of `other` and broadcast: + + >>> df != pd.Series([100, 250], index=["cost", "revenue"]) + cost revenue + A True True + B True False + C False True + + Use the method to control the broadcast axis: + + >>> df.ne(pd.Series([100, 300], index=["A", "D"]), axis="index") + cost revenue + A True False + B True True + C True True + D True True + + When comparing to an arbitrary sequence, the number of columns must + match the number elements in `other`: + + >>> df == [250, 100] + cost revenue + A True True + B False False + C False False + + Use the method to control the axis: + + >>> df.eq([250, 250, 100], axis="index") + cost revenue + A True False + B False True + C True False + + Compare to a DataFrame of different shape. + + >>> other = pd.DataFrame( + ... {"revenue": [300, 250, 100, 150]}, index=["A", "B", "C", "D"] + ... ) + >>> other + revenue + A 300 + B 250 + C 100 + D 150 + + >>> df.gt(other) + cost revenue + A False False + B False False + C False True + D False False + + Compare to a MultiIndex by level. + + >>> df_multindex = pd.DataFrame( + ... { + ... "cost": [250, 150, 100, 150, 300, 220], + ... "revenue": [100, 250, 300, 200, 175, 225], + ... }, + ... index=[ + ... ["Q1", "Q1", "Q1", "Q2", "Q2", "Q2"], + ... ["A", "B", "C", "A", "B", "C"], + ... ], + ... ) + >>> df_multindex + cost revenue + Q1 A 250 100 + B 150 250 + C 100 300 + Q2 A 150 200 + B 300 175 + C 220 225 + + >>> df.le(df_multindex, level=1) + cost revenue + Q1 A True True + B True True + C True True + Q2 A False True + B True False + C True False + """ return self._flex_cmp_method(other, operator.ge, axis=axis, level=level) - @Appender(ops.make_flex_doc("gt", "dataframe")) def gt(self, other, axis: Axis = "columns", level=None) -> DataFrame: + """ + Get Greater than of dataframe and other, element-wise (binary operator `gt`). + + Among flexible wrappers (`eq`, `ne`, `le`, `lt`, `ge`, `gt`) to comparison + operators. + + Equivalent to `==`, `!=`, `<=`, `<`, `>=`, `>` with support to choose axis + (rows or columns) and level for comparison. + + Parameters + ---------- + other : scalar, sequence, Series, or DataFrame + Any single or multiple element data structure, or list-like object. + axis : {0 or 'index', 1 or 'columns'}, default 'columns' + Whether to compare by the index (0 or 'index') or columns + (1 or 'columns'). + level : int or label + Broadcast across a level, matching Index values on the passed + MultiIndex level. + + Returns + ------- + DataFrame of bool + Result of the comparison. + + See Also + -------- + DataFrame.eq : Compare DataFrames for equality elementwise. + DataFrame.ne : Compare DataFrames for inequality elementwise. + DataFrame.le : Compare DataFrames for less than inequality + or equality elementwise. + DataFrame.lt : Compare DataFrames for strictly less than + inequality elementwise. + DataFrame.ge : Compare DataFrames for greater than inequality + or equality elementwise. + DataFrame.gt : Compare DataFrames for strictly greater than + inequality elementwise. + + Notes + ----- + Mismatched indices will be unioned together. + `NaN` values are considered different (i.e. `NaN` != `NaN`). + + Examples + -------- + >>> df = pd.DataFrame( + ... {"cost": [250, 150, 100], "revenue": [100, 250, 300]}, + ... index=["A", "B", "C"], + ... ) + >>> df + cost revenue + A 250 100 + B 150 250 + C 100 300 + + Comparison with a scalar, using either the operator or method: + + >>> df == 100 + cost revenue + A False True + B False False + C True False + + >>> df.eq(100) + cost revenue + A False True + B False False + C True False + + When `other` is a :class:`Series`, the columns of a DataFrame are aligned + with the index of `other` and broadcast: + + >>> df != pd.Series([100, 250], index=["cost", "revenue"]) + cost revenue + A True True + B True False + C False True + + Use the method to control the broadcast axis: + + >>> df.ne(pd.Series([100, 300], index=["A", "D"]), axis="index") + cost revenue + A True False + B True True + C True True + D True True + + When comparing to an arbitrary sequence, the number of columns must + match the number elements in `other`: + + >>> df == [250, 100] + cost revenue + A True True + B False False + C False False + + Use the method to control the axis: + + >>> df.eq([250, 250, 100], axis="index") + cost revenue + A True False + B False True + C True False + + Compare to a DataFrame of different shape. + + >>> other = pd.DataFrame( + ... {"revenue": [300, 250, 100, 150]}, index=["A", "B", "C", "D"] + ... ) + >>> other + revenue + A 300 + B 250 + C 100 + D 150 + + >>> df.gt(other) + cost revenue + A False False + B False False + C False True + D False False + + Compare to a MultiIndex by level. + + >>> df_multindex = pd.DataFrame( + ... { + ... "cost": [250, 150, 100, 150, 300, 220], + ... "revenue": [100, 250, 300, 200, 175, 225], + ... }, + ... index=[ + ... ["Q1", "Q1", "Q1", "Q2", "Q2", "Q2"], + ... ["A", "B", "C", "A", "B", "C"], + ... ], + ... ) + >>> df_multindex + cost revenue + Q1 A 250 100 + B 150 250 + C 100 300 + Q2 A 150 200 + B 300 175 + C 220 225 + + >>> df.le(df_multindex, level=1) + cost revenue + Q1 A True True + B True True + C True True + Q2 A False True + B True False + C True False + """ return self._flex_cmp_method(other, operator.gt, axis=axis, level=level) - @Appender(ops.make_flex_doc("add", "dataframe")) def add( self, other, axis: Axis = "columns", level=None, fill_value=None ) -> DataFrame: + """ + Get Addition of dataframe and other, element-wise (binary operator `add`). + + Equivalent to ``dataframe + other``, but with support to substitute a fill_value + for missing data in one of the inputs. With reverse version, `radd`. + + Among flexible wrappers (`add`, `sub`, `mul`, `div`, + `floordiv`, `mod`, `pow`) to arithmetic operators: + `+`, `-`, `*`, `/`, `//`, `%`, `**`. + + Parameters + ---------- + other : scalar, sequence, Series, dict or DataFrame + Any single or multiple element data structure, or list-like object. + axis : {0 or 'index', 1 or 'columns'} + Whether to compare by the index (0 or 'index') or columns. + (1 or 'columns'). For Series input, axis to match Series index on. + level : int or label + Broadcast across a level, matching Index values on the + passed MultiIndex level. + fill_value : float or None, default None + Fill existing missing (NaN) values, and any new element needed for + successful DataFrame alignment, with this value before computation. + If data in both corresponding DataFrame locations is missing + the result will be missing. + + Returns + ------- + DataFrame + Result of the arithmetic operation. + + See Also + -------- + DataFrame.add : Add DataFrames. + DataFrame.sub : Subtract DataFrames. + DataFrame.mul : Multiply DataFrames. + DataFrame.div : Divide DataFrames (float division). + DataFrame.truediv : Divide DataFrames (float division). + DataFrame.floordiv : Divide DataFrames (integer division). + DataFrame.mod : Calculate modulo (remainder after division). + DataFrame.pow : Calculate exponential power. + + Notes + ----- + Mismatched indices will be unioned together. + + Examples + -------- + >>> df = pd.DataFrame( + ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, + ... index=["circle", "triangle", "rectangle"], + ... ) + >>> df + angles degrees + circle 0 360 + triangle 3 180 + rectangle 4 360 + + Add a scalar with operator version which return the same + results. + + >>> df + 1 + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + >>> df.add(1) + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + Divide by constant with reverse version. + + >>> df.div(10) + angles degrees + circle 0.0 36.0 + triangle 0.3 18.0 + rectangle 0.4 36.0 + + >>> df.rdiv(10) + angles degrees + circle inf 0.027778 + triangle 3.333333 0.055556 + rectangle 2.500000 0.027778 + + Subtract a list and Series by axis with operator version. + + >>> df - [1, 2] + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub([1, 2], axis="columns") + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub( + ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), + ... axis="index", + ... ) + angles degrees + circle -1 359 + triangle 2 179 + rectangle 3 359 + + Multiply a dictionary by axis. + + >>> df.mul({"angles": 0, "degrees": 2}) + angles degrees + circle 0 720 + triangle 0 360 + rectangle 0 720 + + >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") + angles degrees + circle 0 0 + triangle 6 360 + rectangle 12 1080 + + Multiply a DataFrame of different shape with operator version. + + >>> other = pd.DataFrame( + ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] + ... ) + >>> other + angles + circle 0 + triangle 3 + rectangle 4 + + >>> df * other + angles degrees + circle 0 NaN + triangle 9 NaN + rectangle 16 NaN + + >>> df.mul(other, fill_value=0) + angles degrees + circle 0 0.0 + triangle 9 0.0 + rectangle 16 0.0 + + Divide by a MultiIndex by level. + + >>> df_multindex = pd.DataFrame( + ... { + ... "angles": [0, 3, 4, 4, 5, 6], + ... "degrees": [360, 180, 360, 360, 540, 720], + ... }, + ... index=[ + ... ["A", "A", "A", "B", "B", "B"], + ... [ + ... "circle", + ... "triangle", + ... "rectangle", + ... "square", + ... "pentagon", + ... "hexagon", + ... ], + ... ], + ... ) + >>> df_multindex + angles degrees + A circle 0 360 + triangle 3 180 + rectangle 4 360 + B square 4 360 + pentagon 5 540 + hexagon 6 720 + + >>> df.div(df_multindex, level=1, fill_value=0) + angles degrees + A circle NaN 1.0 + triangle 1.0 1.0 + rectangle 1.0 1.0 + B square 0.0 0.0 + pentagon 0.0 0.0 + hexagon 0.0 0.0 + + >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) + >>> df_pow.pow(2) + A B + 0 4 36 + 1 9 49 + 2 16 64 + 3 25 81 + """ return self._flex_arith_method( other, operator.add, level=level, fill_value=fill_value, axis=axis ) - @Appender(ops.make_flex_doc("radd", "dataframe")) def radd( self, other, axis: Axis = "columns", level=None, fill_value=None ) -> DataFrame: + """ + Get Addition of dataframe and other, element-wise (binary operator `radd`). + + Equivalent to ``other + dataframe``, but with support to substitute a fill_value + for missing data in one of the inputs. With reverse version, `add`. + + Among flexible wrappers (`add`, `sub`, `mul`, `div`, + `floordiv`, `mod`, `pow`) to arithmetic operators: + `+`, `-`, `*`, `/`, `//`, `%`, `**`. + + Parameters + ---------- + other : scalar, sequence, Series, dict or DataFrame + Any single or multiple element data structure, or list-like object. + axis : {0 or 'index', 1 or 'columns'} + Whether to compare by the index (0 or 'index') or columns. + (1 or 'columns'). For Series input, axis to match Series index on. + level : int or label + Broadcast across a level, matching Index values on the + passed MultiIndex level. + fill_value : float or None, default None + Fill existing missing (NaN) values, and any new element needed for + successful DataFrame alignment, with this value before computation. + If data in both corresponding DataFrame locations is missing + the result will be missing. + + Returns + ------- + DataFrame + Result of the arithmetic operation. + + See Also + -------- + DataFrame.add : Add DataFrames. + DataFrame.sub : Subtract DataFrames. + DataFrame.mul : Multiply DataFrames. + DataFrame.div : Divide DataFrames (float division). + DataFrame.truediv : Divide DataFrames (float division). + DataFrame.floordiv : Divide DataFrames (integer division). + DataFrame.mod : Calculate modulo (remainder after division). + DataFrame.pow : Calculate exponential power. + + Notes + ----- + Mismatched indices will be unioned together. + + Examples + -------- + >>> df = pd.DataFrame( + ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, + ... index=["circle", "triangle", "rectangle"], + ... ) + >>> df + angles degrees + circle 0 360 + triangle 3 180 + rectangle 4 360 + + Add a scalar with operator version which return the same + results. + + >>> df + 1 + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + >>> df.add(1) + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + Divide by constant with reverse version. + + >>> df.div(10) + angles degrees + circle 0.0 36.0 + triangle 0.3 18.0 + rectangle 0.4 36.0 + + >>> df.rdiv(10) + angles degrees + circle inf 0.027778 + triangle 3.333333 0.055556 + rectangle 2.500000 0.027778 + + Subtract a list and Series by axis with operator version. + + >>> df - [1, 2] + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub([1, 2], axis="columns") + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub( + ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), + ... axis="index", + ... ) + angles degrees + circle -1 359 + triangle 2 179 + rectangle 3 359 + + Multiply a dictionary by axis. + + >>> df.mul({"angles": 0, "degrees": 2}) + angles degrees + circle 0 720 + triangle 0 360 + rectangle 0 720 + + >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") + angles degrees + circle 0 0 + triangle 6 360 + rectangle 12 1080 + + Multiply a DataFrame of different shape with operator version. + + >>> other = pd.DataFrame( + ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] + ... ) + >>> other + angles + circle 0 + triangle 3 + rectangle 4 + + >>> df * other + angles degrees + circle 0 NaN + triangle 9 NaN + rectangle 16 NaN + + >>> df.mul(other, fill_value=0) + angles degrees + circle 0 0.0 + triangle 9 0.0 + rectangle 16 0.0 + + Divide by a MultiIndex by level. + + >>> df_multindex = pd.DataFrame( + ... { + ... "angles": [0, 3, 4, 4, 5, 6], + ... "degrees": [360, 180, 360, 360, 540, 720], + ... }, + ... index=[ + ... ["A", "A", "A", "B", "B", "B"], + ... [ + ... "circle", + ... "triangle", + ... "rectangle", + ... "square", + ... "pentagon", + ... "hexagon", + ... ], + ... ], + ... ) + >>> df_multindex + angles degrees + A circle 0 360 + triangle 3 180 + rectangle 4 360 + B square 4 360 + pentagon 5 540 + hexagon 6 720 + + >>> df.div(df_multindex, level=1, fill_value=0) + angles degrees + A circle NaN 1.0 + triangle 1.0 1.0 + rectangle 1.0 1.0 + B square 0.0 0.0 + pentagon 0.0 0.0 + hexagon 0.0 0.0 + + >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) + >>> df_pow.pow(2) + A B + 0 4 36 + 1 9 49 + 2 16 64 + 3 25 81 + """ return self._flex_arith_method( other, roperator.radd, level=level, fill_value=fill_value, axis=axis ) - @Appender(ops.make_flex_doc("sub", "dataframe")) def sub( self, other, axis: Axis = "columns", level=None, fill_value=None ) -> DataFrame: + """ + Get Subtraction of dataframe and other, element-wise (binary operator `sub`). + + Equivalent to ``dataframe - other``, but with support to substitute a fill_value + for missing data in one of the inputs. With reverse version, `rsub`. + + Among flexible wrappers (`add`, `sub`, `mul`, `div`, + `floordiv`, `mod`, `pow`) to arithmetic operators: + `+`, `-`, `*`, `/`, `//`, `%`, `**`. + + Parameters + ---------- + other : scalar, sequence, Series, dict or DataFrame + Any single or multiple element data structure, or list-like object. + axis : {0 or 'index', 1 or 'columns'} + Whether to compare by the index (0 or 'index') or columns. + (1 or 'columns'). For Series input, axis to match Series index on. + level : int or label + Broadcast across a level, matching Index values on the + passed MultiIndex level. + fill_value : float or None, default None + Fill existing missing (NaN) values, and any new element needed for + successful DataFrame alignment, with this value before computation. + If data in both corresponding DataFrame locations is missing + the result will be missing. + + Returns + ------- + DataFrame + Result of the arithmetic operation. + + See Also + -------- + DataFrame.add : Add DataFrames. + DataFrame.sub : Subtract DataFrames. + DataFrame.mul : Multiply DataFrames. + DataFrame.div : Divide DataFrames (float division). + DataFrame.truediv : Divide DataFrames (float division). + DataFrame.floordiv : Divide DataFrames (integer division). + DataFrame.mod : Calculate modulo (remainder after division). + DataFrame.pow : Calculate exponential power. + + Notes + ----- + Mismatched indices will be unioned together. + + Examples + -------- + >>> df = pd.DataFrame( + ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, + ... index=["circle", "triangle", "rectangle"], + ... ) + >>> df + angles degrees + circle 0 360 + triangle 3 180 + rectangle 4 360 + + Add a scalar with operator version which return the same + results. + + >>> df + 1 + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + >>> df.add(1) + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + Divide by constant with reverse version. + + >>> df.div(10) + angles degrees + circle 0.0 36.0 + triangle 0.3 18.0 + rectangle 0.4 36.0 + + >>> df.rdiv(10) + angles degrees + circle inf 0.027778 + triangle 3.333333 0.055556 + rectangle 2.500000 0.027778 + + Subtract a list and Series by axis with operator version. + + >>> df - [1, 2] + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub([1, 2], axis="columns") + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub( + ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), + ... axis="index", + ... ) + angles degrees + circle -1 359 + triangle 2 179 + rectangle 3 359 + + Multiply a dictionary by axis. + + >>> df.mul({"angles": 0, "degrees": 2}) + angles degrees + circle 0 720 + triangle 0 360 + rectangle 0 720 + + >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") + angles degrees + circle 0 0 + triangle 6 360 + rectangle 12 1080 + + Multiply a DataFrame of different shape with operator version. + + >>> other = pd.DataFrame( + ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] + ... ) + >>> other + angles + circle 0 + triangle 3 + rectangle 4 + + >>> df * other + angles degrees + circle 0 NaN + triangle 9 NaN + rectangle 16 NaN + + >>> df.mul(other, fill_value=0) + angles degrees + circle 0 0.0 + triangle 9 0.0 + rectangle 16 0.0 + + Divide by a MultiIndex by level. + + >>> df_multindex = pd.DataFrame( + ... { + ... "angles": [0, 3, 4, 4, 5, 6], + ... "degrees": [360, 180, 360, 360, 540, 720], + ... }, + ... index=[ + ... ["A", "A", "A", "B", "B", "B"], + ... [ + ... "circle", + ... "triangle", + ... "rectangle", + ... "square", + ... "pentagon", + ... "hexagon", + ... ], + ... ], + ... ) + >>> df_multindex + angles degrees + A circle 0 360 + triangle 3 180 + rectangle 4 360 + B square 4 360 + pentagon 5 540 + hexagon 6 720 + + >>> df.div(df_multindex, level=1, fill_value=0) + angles degrees + A circle NaN 1.0 + triangle 1.0 1.0 + rectangle 1.0 1.0 + B square 0.0 0.0 + pentagon 0.0 0.0 + hexagon 0.0 0.0 + + >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) + >>> df_pow.pow(2) + A B + 0 4 36 + 1 9 49 + 2 16 64 + 3 25 81 + """ return self._flex_arith_method( other, operator.sub, level=level, fill_value=fill_value, axis=axis ) subtract = sub - @Appender(ops.make_flex_doc("rsub", "dataframe")) def rsub( self, other, axis: Axis = "columns", level=None, fill_value=None ) -> DataFrame: + """ + Binary operator `rsub`. + + Get Subtraction of dataframe and other,element-wise. + + Equivalent to ``other - dataframe``, but with support to substitute a fill_value + for missing data in one of the inputs. With reverse version, `sub`. + + Among flexible wrappers (`add`, `sub`, `mul`, `div`, + `floordiv`, `mod`, `pow`) to arithmetic operators: + `+`, `-`, `*`, `/`, `//`, `%`, `**`. + + Parameters + ---------- + other : scalar, sequence, Series, dict or DataFrame + Any single or multiple element data structure, or list-like object. + axis : {0 or 'index', 1 or 'columns'} + Whether to compare by the index (0 or 'index') or columns. + (1 or 'columns'). For Series input, axis to match Series index on. + level : int or label + Broadcast across a level, matching Index values on the + passed MultiIndex level. + fill_value : float or None, default None + Fill existing missing (NaN) values, and any new element needed for + successful DataFrame alignment, with this value before computation. + If data in both corresponding DataFrame locations is missing + the result will be missing. + + Returns + ------- + DataFrame + Result of the arithmetic operation. + + See Also + -------- + DataFrame.add : Add DataFrames. + DataFrame.sub : Subtract DataFrames. + DataFrame.mul : Multiply DataFrames. + DataFrame.div : Divide DataFrames (float division). + DataFrame.truediv : Divide DataFrames (float division). + DataFrame.floordiv : Divide DataFrames (integer division). + DataFrame.mod : Calculate modulo (remainder after division). + DataFrame.pow : Calculate exponential power. + + Notes + ----- + Mismatched indices will be unioned together. + + Examples + -------- + >>> df = pd.DataFrame( + ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, + ... index=["circle", "triangle", "rectangle"], + ... ) + >>> df + angles degrees + circle 0 360 + triangle 3 180 + rectangle 4 360 + + Add a scalar with operator version which return the same + results. + + >>> df + 1 + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + >>> df.add(1) + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + Divide by constant with reverse version. + + >>> df.div(10) + angles degrees + circle 0.0 36.0 + triangle 0.3 18.0 + rectangle 0.4 36.0 + + >>> df.rdiv(10) + angles degrees + circle inf 0.027778 + triangle 3.333333 0.055556 + rectangle 2.500000 0.027778 + + Subtract a list and Series by axis with operator version. + + >>> df - [1, 2] + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub([1, 2], axis="columns") + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub( + ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), + ... axis="index", + ... ) + angles degrees + circle -1 359 + triangle 2 179 + rectangle 3 359 + + Multiply a dictionary by axis. + + >>> df.mul({"angles": 0, "degrees": 2}) + angles degrees + circle 0 720 + triangle 0 360 + rectangle 0 720 + + >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") + angles degrees + circle 0 0 + triangle 6 360 + rectangle 12 1080 + + Multiply a DataFrame of different shape with operator version. + + >>> other = pd.DataFrame( + ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] + ... ) + >>> other + angles + circle 0 + triangle 3 + rectangle 4 + + >>> df * other + angles degrees + circle 0 NaN + triangle 9 NaN + rectangle 16 NaN + + >>> df.mul(other, fill_value=0) + angles degrees + circle 0 0.0 + triangle 9 0.0 + rectangle 16 0.0 + + Divide by a MultiIndex by level. + + >>> df_multindex = pd.DataFrame( + ... { + ... "angles": [0, 3, 4, 4, 5, 6], + ... "degrees": [360, 180, 360, 360, 540, 720], + ... }, + ... index=[ + ... ["A", "A", "A", "B", "B", "B"], + ... [ + ... "circle", + ... "triangle", + ... "rectangle", + ... "square", + ... "pentagon", + ... "hexagon", + ... ], + ... ], + ... ) + >>> df_multindex + angles degrees + A circle 0 360 + triangle 3 180 + rectangle 4 360 + B square 4 360 + pentagon 5 540 + hexagon 6 720 + + >>> df.div(df_multindex, level=1, fill_value=0) + angles degrees + A circle NaN 1.0 + triangle 1.0 1.0 + rectangle 1.0 1.0 + B square 0.0 0.0 + pentagon 0.0 0.0 + hexagon 0.0 0.0 + + >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) + >>> df_pow.pow(2) + A B + 0 4 36 + 1 9 49 + 2 16 64 + 3 25 81 + """ return self._flex_arith_method( other, roperator.rsub, level=level, fill_value=fill_value, axis=axis ) - @Appender(ops.make_flex_doc("mul", "dataframe")) - def mul( - self, other, axis: Axis = "columns", level=None, fill_value=None - ) -> DataFrame: - return self._flex_arith_method( - other, operator.mul, level=level, fill_value=fill_value, axis=axis - ) + def mul( + self, other, axis: Axis = "columns", level=None, fill_value=None + ) -> DataFrame: + """ + Binary operator `mul`. + + Get Multiplication of dataframe and other,element-wise. + + Equivalent to ``dataframe * other``, but with support to substitute a fill_value + for missing data in one of the inputs. With reverse version, `rmul`. + + Among flexible wrappers (`add`, `sub`, `mul`, `div`, + `floordiv`, `mod`, `pow`) to arithmetic operators: + `+`, `-`, `*`, `/`, `//`, `%`, `**`. + + Parameters + ---------- + other : scalar, sequence, Series, dict or DataFrame + Any single or multiple element data structure, or list-like object. + axis : {0 or 'index', 1 or 'columns'} + Whether to compare by the index (0 or 'index') or columns. + (1 or 'columns'). For Series input, axis to match Series index on. + level : int or label + Broadcast across a level, matching Index values on the + passed MultiIndex level. + fill_value : float or None, default None + Fill existing missing (NaN) values, and any new element needed for + successful DataFrame alignment, with this value before computation. + If data in both corresponding DataFrame locations is missing + the result will be missing. + + Returns + ------- + DataFrame + Result of the arithmetic operation. + + See Also + -------- + DataFrame.add : Add DataFrames. + DataFrame.sub : Subtract DataFrames. + DataFrame.mul : Multiply DataFrames. + DataFrame.div : Divide DataFrames (float division). + DataFrame.truediv : Divide DataFrames (float division). + DataFrame.floordiv : Divide DataFrames (integer division). + DataFrame.mod : Calculate modulo (remainder after division). + DataFrame.pow : Calculate exponential power. + + Notes + ----- + Mismatched indices will be unioned together. + + Examples + -------- + >>> df = pd.DataFrame( + ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, + ... index=["circle", "triangle", "rectangle"], + ... ) + >>> df + angles degrees + circle 0 360 + triangle 3 180 + rectangle 4 360 + + Add a scalar with operator version which return the same + results. + + >>> df + 1 + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + >>> df.add(1) + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + Divide by constant with reverse version. + + >>> df.div(10) + angles degrees + circle 0.0 36.0 + triangle 0.3 18.0 + rectangle 0.4 36.0 + + >>> df.rdiv(10) + angles degrees + circle inf 0.027778 + triangle 3.333333 0.055556 + rectangle 2.500000 0.027778 + + Subtract a list and Series by axis with operator version. + + >>> df - [1, 2] + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub([1, 2], axis="columns") + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub( + ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), + ... axis="index", + ... ) + angles degrees + circle -1 359 + triangle 2 179 + rectangle 3 359 + + Multiply a dictionary by axis. + + >>> df.mul({"angles": 0, "degrees": 2}) + angles degrees + circle 0 720 + triangle 0 360 + rectangle 0 720 + + >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") + angles degrees + circle 0 0 + triangle 6 360 + rectangle 12 1080 + + Multiply a DataFrame of different shape with operator version. + + >>> other = pd.DataFrame( + ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] + ... ) + >>> other + angles + circle 0 + triangle 3 + rectangle 4 + + >>> df * other + angles degrees + circle 0 NaN + triangle 9 NaN + rectangle 16 NaN + + >>> df.mul(other, fill_value=0) + angles degrees + circle 0 0.0 + triangle 9 0.0 + rectangle 16 0.0 + + Divide by a MultiIndex by level. + + >>> df_multindex = pd.DataFrame( + ... { + ... "angles": [0, 3, 4, 4, 5, 6], + ... "degrees": [360, 180, 360, 360, 540, 720], + ... }, + ... index=[ + ... ["A", "A", "A", "B", "B", "B"], + ... [ + ... "circle", + ... "triangle", + ... "rectangle", + ... "square", + ... "pentagon", + ... "hexagon", + ... ], + ... ], + ... ) + >>> df_multindex + angles degrees + A circle 0 360 + triangle 3 180 + rectangle 4 360 + B square 4 360 + pentagon 5 540 + hexagon 6 720 + + >>> df.div(df_multindex, level=1, fill_value=0) + angles degrees + A circle NaN 1.0 + triangle 1.0 1.0 + rectangle 1.0 1.0 + B square 0.0 0.0 + pentagon 0.0 0.0 + hexagon 0.0 0.0 + + >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) + >>> df_pow.pow(2) + A B + 0 4 36 + 1 9 49 + 2 16 64 + 3 25 81 + """ + return self._flex_arith_method( + other, operator.mul, level=level, fill_value=fill_value, axis=axis + ) + + multiply = mul + + def rmul( + self, other, axis: Axis = "columns", level=None, fill_value=None + ) -> DataFrame: + """ + Binary operator `rmul`. + + Get Multiplication of dataframe and other,element-wise. + + Equivalent to ``other * dataframe``, but with support to substitute a fill_value + for missing data in one of the inputs. With reverse version, `mul`. + + Among flexible wrappers (`add`, `sub`, `mul`, `div`, + `floordiv`, `mod`, `pow`) to arithmetic operators: + `+`, `-`, `*`, `/`, `//`, `%`, `**`. + + Parameters + ---------- + other : scalar, sequence, Series, dict or DataFrame + Any single or multiple element data structure, or list-like object. + axis : {0 or 'index', 1 or 'columns'} + Whether to compare by the index (0 or 'index') or columns. + (1 or 'columns'). For Series input, axis to match Series index on. + level : int or label + Broadcast across a level, matching Index values on the + passed MultiIndex level. + fill_value : float or None, default None + Fill existing missing (NaN) values, and any new element needed for + successful DataFrame alignment, with this value before computation. + If data in both corresponding DataFrame locations is missing + the result will be missing. + + Returns + ------- + DataFrame + Result of the arithmetic operation. + + See Also + -------- + DataFrame.add : Add DataFrames. + DataFrame.sub : Subtract DataFrames. + DataFrame.mul : Multiply DataFrames. + DataFrame.div : Divide DataFrames (float division). + DataFrame.truediv : Divide DataFrames (float division). + DataFrame.floordiv : Divide DataFrames (integer division). + DataFrame.mod : Calculate modulo (remainder after division). + DataFrame.pow : Calculate exponential power. + + Notes + ----- + Mismatched indices will be unioned together. + + Examples + -------- + >>> df = pd.DataFrame( + ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, + ... index=["circle", "triangle", "rectangle"], + ... ) + >>> df + angles degrees + circle 0 360 + triangle 3 180 + rectangle 4 360 + + Add a scalar with operator version which return the same + results. + + >>> df + 1 + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + >>> df.add(1) + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + Divide by constant with reverse version. + + >>> df.div(10) + angles degrees + circle 0.0 36.0 + triangle 0.3 18.0 + rectangle 0.4 36.0 + + >>> df.rdiv(10) + angles degrees + circle inf 0.027778 + triangle 3.333333 0.055556 + rectangle 2.500000 0.027778 + + Subtract a list and Series by axis with operator version. + + >>> df - [1, 2] + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub([1, 2], axis="columns") + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub( + ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), + ... axis="index", + ... ) + angles degrees + circle -1 359 + triangle 2 179 + rectangle 3 359 + + Multiply a dictionary by axis. - multiply = mul + >>> df.mul({"angles": 0, "degrees": 2}) + angles degrees + circle 0 720 + triangle 0 360 + rectangle 0 720 - @Appender(ops.make_flex_doc("rmul", "dataframe")) - def rmul( - self, other, axis: Axis = "columns", level=None, fill_value=None - ) -> DataFrame: + >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") + angles degrees + circle 0 0 + triangle 6 360 + rectangle 12 1080 + + Multiply a DataFrame of different shape with operator version. + + >>> other = pd.DataFrame( + ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] + ... ) + >>> other + angles + circle 0 + triangle 3 + rectangle 4 + + >>> df * other + angles degrees + circle 0 NaN + triangle 9 NaN + rectangle 16 NaN + + >>> df.mul(other, fill_value=0) + angles degrees + circle 0 0.0 + triangle 9 0.0 + rectangle 16 0.0 + + Divide by a MultiIndex by level. + + >>> df_multindex = pd.DataFrame( + ... { + ... "angles": [0, 3, 4, 4, 5, 6], + ... "degrees": [360, 180, 360, 360, 540, 720], + ... }, + ... index=[ + ... ["A", "A", "A", "B", "B", "B"], + ... [ + ... "circle", + ... "triangle", + ... "rectangle", + ... "square", + ... "pentagon", + ... "hexagon", + ... ], + ... ], + ... ) + >>> df_multindex + angles degrees + A circle 0 360 + triangle 3 180 + rectangle 4 360 + B square 4 360 + pentagon 5 540 + hexagon 6 720 + + >>> df.div(df_multindex, level=1, fill_value=0) + angles degrees + A circle NaN 1.0 + triangle 1.0 1.0 + rectangle 1.0 1.0 + B square 0.0 0.0 + pentagon 0.0 0.0 + hexagon 0.0 0.0 + + >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) + >>> df_pow.pow(2) + A B + 0 4 36 + 1 9 49 + 2 16 64 + 3 25 81 + """ return self._flex_arith_method( other, roperator.rmul, level=level, fill_value=fill_value, axis=axis ) - @Appender(ops.make_flex_doc("truediv", "dataframe")) def truediv( self, other, axis: Axis = "columns", level=None, fill_value=None ) -> DataFrame: + """ + Binary operator `truediv`. + + Get Floating division of dataframe and other,element-wise. + + Equivalent to ``dataframe / other``, but with support to substitute a fill_value + for missing data in one of the inputs. With reverse version, `rtruediv`. + + Among flexible wrappers (`add`, `sub`, `mul`, `div`, + `floordiv`, `mod`, `pow`) to arithmetic operators: + `+`, `-`, `*`, `/`, `//`, `%`, `**`. + + Parameters + ---------- + other : scalar, sequence, Series, dict or DataFrame + Any single or multiple element data structure, or list-like object. + axis : {0 or 'index', 1 or 'columns'} + Whether to compare by the index (0 or 'index') or columns. + (1 or 'columns'). For Series input, axis to match Series index on. + level : int or label + Broadcast across a level, matching Index values on the + passed MultiIndex level. + fill_value : float or None, default None + Fill existing missing (NaN) values, and any new element needed for + successful DataFrame alignment, with this value before computation. + If data in both corresponding DataFrame locations is missing + the result will be missing. + + Returns + ------- + DataFrame + Result of the arithmetic operation. + + See Also + -------- + DataFrame.add : Add DataFrames. + DataFrame.sub : Subtract DataFrames. + DataFrame.mul : Multiply DataFrames. + DataFrame.div : Divide DataFrames (float division). + DataFrame.truediv : Divide DataFrames (float division). + DataFrame.floordiv : Divide DataFrames (integer division). + DataFrame.mod : Calculate modulo (remainder after division). + DataFrame.pow : Calculate exponential power. + + Notes + ----- + Mismatched indices will be unioned together. + + Examples + -------- + >>> df = pd.DataFrame( + ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, + ... index=["circle", "triangle", "rectangle"], + ... ) + >>> df + angles degrees + circle 0 360 + triangle 3 180 + rectangle 4 360 + + Add a scalar with operator version which return the same + results. + + >>> df + 1 + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + >>> df.add(1) + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + Divide by constant with reverse version. + + >>> df.div(10) + angles degrees + circle 0.0 36.0 + triangle 0.3 18.0 + rectangle 0.4 36.0 + + >>> df.rdiv(10) + angles degrees + circle inf 0.027778 + triangle 3.333333 0.055556 + rectangle 2.500000 0.027778 + + Subtract a list and Series by axis with operator version. + + >>> df - [1, 2] + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub([1, 2], axis="columns") + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub( + ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), + ... axis="index", + ... ) + angles degrees + circle -1 359 + triangle 2 179 + rectangle 3 359 + + Multiply a dictionary by axis. + + >>> df.mul({"angles": 0, "degrees": 2}) + angles degrees + circle 0 720 + triangle 0 360 + rectangle 0 720 + + >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") + angles degrees + circle 0 0 + triangle 6 360 + rectangle 12 1080 + + Multiply a DataFrame of different shape with operator version. + + >>> other = pd.DataFrame( + ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] + ... ) + >>> other + angles + circle 0 + triangle 3 + rectangle 4 + + >>> df * other + angles degrees + circle 0 NaN + triangle 9 NaN + rectangle 16 NaN + + >>> df.mul(other, fill_value=0) + angles degrees + circle 0 0.0 + triangle 9 0.0 + rectangle 16 0.0 + + Divide by a MultiIndex by level. + + >>> df_multindex = pd.DataFrame( + ... { + ... "angles": [0, 3, 4, 4, 5, 6], + ... "degrees": [360, 180, 360, 360, 540, 720], + ... }, + ... index=[ + ... ["A", "A", "A", "B", "B", "B"], + ... [ + ... "circle", + ... "triangle", + ... "rectangle", + ... "square", + ... "pentagon", + ... "hexagon", + ... ], + ... ], + ... ) + >>> df_multindex + angles degrees + A circle 0 360 + triangle 3 180 + rectangle 4 360 + B square 4 360 + pentagon 5 540 + hexagon 6 720 + + >>> df.div(df_multindex, level=1, fill_value=0) + angles degrees + A circle NaN 1.0 + triangle 1.0 1.0 + rectangle 1.0 1.0 + B square 0.0 0.0 + pentagon 0.0 0.0 + hexagon 0.0 0.0 + + >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) + >>> df_pow.pow(2) + A B + 0 4 36 + 1 9 49 + 2 16 64 + 3 25 81 + """ return self._flex_arith_method( other, operator.truediv, level=level, fill_value=fill_value, axis=axis ) @@ -8830,60 +11872,1411 @@ def truediv( div = truediv divide = truediv - @Appender(ops.make_flex_doc("rtruediv", "dataframe")) def rtruediv( self, other, axis: Axis = "columns", level=None, fill_value=None ) -> DataFrame: + """ + Binary operator `rtruediv`. + + Get Floating division of dataframe and other,element-wise. + + Equivalent to ``other / dataframe``, but with support to substitute a fill_value + for missing data in one of the inputs. With reverse version, `truediv`. + + Among flexible wrappers (`add`, `sub`, `mul`, `div`, + `floordiv`, `mod`, `pow`) to arithmetic operators: + `+`, `-`, `*`, `/`, `//`, `%`, `**`. + + Parameters + ---------- + other : scalar, sequence, Series, dict or DataFrame + Any single or multiple element data structure, or list-like object. + axis : {0 or 'index', 1 or 'columns'} + Whether to compare by the index (0 or 'index') or columns. + (1 or 'columns'). For Series input, axis to match Series index on. + level : int or label + Broadcast across a level, matching Index values on the + passed MultiIndex level. + fill_value : float or None, default None + Fill existing missing (NaN) values, and any new element needed for + successful DataFrame alignment, with this value before computation. + If data in both corresponding DataFrame locations is missing + the result will be missing. + + Returns + ------- + DataFrame + Result of the arithmetic operation. + + See Also + -------- + DataFrame.add : Add DataFrames. + DataFrame.sub : Subtract DataFrames. + DataFrame.mul : Multiply DataFrames. + DataFrame.div : Divide DataFrames (float division). + DataFrame.truediv : Divide DataFrames (float division). + DataFrame.floordiv : Divide DataFrames (integer division). + DataFrame.mod : Calculate modulo (remainder after division). + DataFrame.pow : Calculate exponential power. + + Notes + ----- + Mismatched indices will be unioned together. + + Examples + -------- + >>> df = pd.DataFrame( + ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, + ... index=["circle", "triangle", "rectangle"], + ... ) + >>> df + angles degrees + circle 0 360 + triangle 3 180 + rectangle 4 360 + + Add a scalar with operator version which return the same + results. + + >>> df + 1 + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + >>> df.add(1) + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + Divide by constant with reverse version. + + >>> df.div(10) + angles degrees + circle 0.0 36.0 + triangle 0.3 18.0 + rectangle 0.4 36.0 + + >>> df.rdiv(10) + angles degrees + circle inf 0.027778 + triangle 3.333333 0.055556 + rectangle 2.500000 0.027778 + + Subtract a list and Series by axis with operator version. + + >>> df - [1, 2] + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub([1, 2], axis="columns") + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub( + ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), + ... axis="index", + ... ) + angles degrees + circle -1 359 + triangle 2 179 + rectangle 3 359 + + Multiply a dictionary by axis. + + >>> df.mul({"angles": 0, "degrees": 2}) + angles degrees + circle 0 720 + triangle 0 360 + rectangle 0 720 + + >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") + angles degrees + circle 0 0 + triangle 6 360 + rectangle 12 1080 + + Multiply a DataFrame of different shape with operator version. + + >>> other = pd.DataFrame( + ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] + ... ) + >>> other + angles + circle 0 + triangle 3 + rectangle 4 + + >>> df * other + angles degrees + circle 0 NaN + triangle 9 NaN + rectangle 16 NaN + + >>> df.mul(other, fill_value=0) + angles degrees + circle 0 0.0 + triangle 9 0.0 + rectangle 16 0.0 + + Divide by a MultiIndex by level. + + >>> df_multindex = pd.DataFrame( + ... { + ... "angles": [0, 3, 4, 4, 5, 6], + ... "degrees": [360, 180, 360, 360, 540, 720], + ... }, + ... index=[ + ... ["A", "A", "A", "B", "B", "B"], + ... [ + ... "circle", + ... "triangle", + ... "rectangle", + ... "square", + ... "pentagon", + ... "hexagon", + ... ], + ... ], + ... ) + >>> df_multindex + angles degrees + A circle 0 360 + triangle 3 180 + rectangle 4 360 + B square 4 360 + pentagon 5 540 + hexagon 6 720 + + >>> df.div(df_multindex, level=1, fill_value=0) + angles degrees + A circle NaN 1.0 + triangle 1.0 1.0 + rectangle 1.0 1.0 + B square 0.0 0.0 + pentagon 0.0 0.0 + hexagon 0.0 0.0 + + >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) + >>> df_pow.pow(2) + A B + 0 4 36 + 1 9 49 + 2 16 64 + 3 25 81 + """ return self._flex_arith_method( other, roperator.rtruediv, level=level, fill_value=fill_value, axis=axis ) rdiv = rtruediv - @Appender(ops.make_flex_doc("floordiv", "dataframe")) def floordiv( self, other, axis: Axis = "columns", level=None, fill_value=None ) -> DataFrame: + """ + Binary operator `floordiv`. + + Get Integer division of dataframe and other,element-wise. + + Equivalent to ``dataframe // other``, + but with support to substitute a fill_value + for missing data in one of the inputs. With reverse version, `rfloordiv`. + + Among flexible wrappers (`add`, `sub`, `mul`, `div`, + `floordiv`, `mod`, `pow`) to arithmetic operators: + `+`, `-`, `*`, `/`, `//`, `%`, `**`. + + Parameters + ---------- + other : scalar, sequence, Series, dict or DataFrame + Any single or multiple element data structure, or list-like object. + axis : {0 or 'index', 1 or 'columns'} + Whether to compare by the index (0 or 'index') or columns. + (1 or 'columns'). For Series input, axis to match Series index on. + level : int or label + Broadcast across a level, matching Index values on the + passed MultiIndex level. + fill_value : float or None, default None + Fill existing missing (NaN) values, and any new element needed for + successful DataFrame alignment, with this value before computation. + If data in both corresponding DataFrame locations is missing + the result will be missing. + + Returns + ------- + DataFrame + Result of the arithmetic operation. + + See Also + -------- + DataFrame.add : Add DataFrames. + DataFrame.sub : Subtract DataFrames. + DataFrame.mul : Multiply DataFrames. + DataFrame.div : Divide DataFrames (float division). + DataFrame.truediv : Divide DataFrames (float division). + DataFrame.floordiv : Divide DataFrames (integer division). + DataFrame.mod : Calculate modulo (remainder after division). + DataFrame.pow : Calculate exponential power. + + Notes + ----- + Mismatched indices will be unioned together. + + Examples + -------- + >>> df = pd.DataFrame( + ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, + ... index=["circle", "triangle", "rectangle"], + ... ) + >>> df + angles degrees + circle 0 360 + triangle 3 180 + rectangle 4 360 + + Add a scalar with operator version which return the same + results. + + >>> df + 1 + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + >>> df.add(1) + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + Divide by constant with reverse version. + + >>> df.div(10) + angles degrees + circle 0.0 36.0 + triangle 0.3 18.0 + rectangle 0.4 36.0 + + >>> df.rdiv(10) + angles degrees + circle inf 0.027778 + triangle 3.333333 0.055556 + rectangle 2.500000 0.027778 + + Subtract a list and Series by axis with operator version. + + >>> df - [1, 2] + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub([1, 2], axis="columns") + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub( + ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), + ... axis="index", + ... ) + angles degrees + circle -1 359 + triangle 2 179 + rectangle 3 359 + + Multiply a dictionary by axis. + + >>> df.mul({"angles": 0, "degrees": 2}) + angles degrees + circle 0 720 + triangle 0 360 + rectangle 0 720 + + >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") + angles degrees + circle 0 0 + triangle 6 360 + rectangle 12 1080 + + Multiply a DataFrame of different shape with operator version. + + >>> other = pd.DataFrame( + ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] + ... ) + >>> other + angles + circle 0 + triangle 3 + rectangle 4 + + >>> df * other + angles degrees + circle 0 NaN + triangle 9 NaN + rectangle 16 NaN + + >>> df.mul(other, fill_value=0) + angles degrees + circle 0 0.0 + triangle 9 0.0 + rectangle 16 0.0 + + Divide by a MultiIndex by level. + + >>> df_multindex = pd.DataFrame( + ... { + ... "angles": [0, 3, 4, 4, 5, 6], + ... "degrees": [360, 180, 360, 360, 540, 720], + ... }, + ... index=[ + ... ["A", "A", "A", "B", "B", "B"], + ... [ + ... "circle", + ... "triangle", + ... "rectangle", + ... "square", + ... "pentagon", + ... "hexagon", + ... ], + ... ], + ... ) + >>> df_multindex + angles degrees + A circle 0 360 + triangle 3 180 + rectangle 4 360 + B square 4 360 + pentagon 5 540 + hexagon 6 720 + + >>> df.div(df_multindex, level=1, fill_value=0) + angles degrees + A circle NaN 1.0 + triangle 1.0 1.0 + rectangle 1.0 1.0 + B square 0.0 0.0 + pentagon 0.0 0.0 + hexagon 0.0 0.0 + + >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) + >>> df_pow.pow(2) + A B + 0 4 36 + 1 9 49 + 2 16 64 + 3 25 81 + """ return self._flex_arith_method( other, operator.floordiv, level=level, fill_value=fill_value, axis=axis ) - @Appender(ops.make_flex_doc("rfloordiv", "dataframe")) def rfloordiv( self, other, axis: Axis = "columns", level=None, fill_value=None ) -> DataFrame: + """ + Binary operator `rfloordiv`. + + Get Integer division of dataframe and other,element-wise. + + Equivalent to ``other // dataframe``, + but with support to substitute a fill_value + for missing data in one of the inputs. With reverse version, `floordiv`. + + Among flexible wrappers (`add`, `sub`, `mul`, `div`, + `floordiv`, `mod`, `pow`) to arithmetic operators: + `+`, `-`, `*`, `/`, `//`, `%`, `**`. + + Parameters + ---------- + other : scalar, sequence, Series, dict or DataFrame + Any single or multiple element data structure, or list-like object. + axis : {0 or 'index', 1 or 'columns'} + Whether to compare by the index (0 or 'index') or columns. + (1 or 'columns'). For Series input, axis to match Series index on. + level : int or label + Broadcast across a level, matching Index values on the + passed MultiIndex level. + fill_value : float or None, default None + Fill existing missing (NaN) values, and any new element needed for + successful DataFrame alignment, with this value before computation. + If data in both corresponding DataFrame locations is missing + the result will be missing. + + Returns + ------- + DataFrame + Result of the arithmetic operation. + + See Also + -------- + DataFrame.add : Add DataFrames. + DataFrame.sub : Subtract DataFrames. + DataFrame.mul : Multiply DataFrames. + DataFrame.div : Divide DataFrames (float division). + DataFrame.truediv : Divide DataFrames (float division). + DataFrame.floordiv : Divide DataFrames (integer division). + DataFrame.mod : Calculate modulo (remainder after division). + DataFrame.pow : Calculate exponential power. + + Notes + ----- + Mismatched indices will be unioned together. + + Examples + -------- + >>> df = pd.DataFrame( + ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, + ... index=["circle", "triangle", "rectangle"], + ... ) + >>> df + angles degrees + circle 0 360 + triangle 3 180 + rectangle 4 360 + + Add a scalar with operator version which return the same + results. + + >>> df + 1 + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + >>> df.add(1) + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + Divide by constant with reverse version. + + >>> df.div(10) + angles degrees + circle 0.0 36.0 + triangle 0.3 18.0 + rectangle 0.4 36.0 + + >>> df.rdiv(10) + angles degrees + circle inf 0.027778 + triangle 3.333333 0.055556 + rectangle 2.500000 0.027778 + + Subtract a list and Series by axis with operator version. + + >>> df - [1, 2] + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub([1, 2], axis="columns") + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub( + ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), + ... axis="index", + ... ) + angles degrees + circle -1 359 + triangle 2 179 + rectangle 3 359 + + Multiply a dictionary by axis. + + >>> df.mul({"angles": 0, "degrees": 2}) + angles degrees + circle 0 720 + triangle 0 360 + rectangle 0 720 + + >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") + angles degrees + circle 0 0 + triangle 6 360 + rectangle 12 1080 + + Multiply a DataFrame of different shape with operator version. + + >>> other = pd.DataFrame( + ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] + ... ) + >>> other + angles + circle 0 + triangle 3 + rectangle 4 + + >>> df * other + angles degrees + circle 0 NaN + triangle 9 NaN + rectangle 16 NaN + + >>> df.mul(other, fill_value=0) + angles degrees + circle 0 0.0 + triangle 9 0.0 + rectangle 16 0.0 + + Divide by a MultiIndex by level. + + >>> df_multindex = pd.DataFrame( + ... { + ... "angles": [0, 3, 4, 4, 5, 6], + ... "degrees": [360, 180, 360, 360, 540, 720], + ... }, + ... index=[ + ... ["A", "A", "A", "B", "B", "B"], + ... [ + ... "circle", + ... "triangle", + ... "rectangle", + ... "square", + ... "pentagon", + ... "hexagon", + ... ], + ... ], + ... ) + >>> df_multindex + angles degrees + A circle 0 360 + triangle 3 180 + rectangle 4 360 + B square 4 360 + pentagon 5 540 + hexagon 6 720 + + >>> df.div(df_multindex, level=1, fill_value=0) + angles degrees + A circle NaN 1.0 + triangle 1.0 1.0 + rectangle 1.0 1.0 + B square 0.0 0.0 + pentagon 0.0 0.0 + hexagon 0.0 0.0 + + >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) + >>> df_pow.pow(2) + A B + 0 4 36 + 1 9 49 + 2 16 64 + 3 25 81 + """ return self._flex_arith_method( other, roperator.rfloordiv, level=level, fill_value=fill_value, axis=axis ) - @Appender(ops.make_flex_doc("mod", "dataframe")) def mod( self, other, axis: Axis = "columns", level=None, fill_value=None ) -> DataFrame: + """ + Get Modulo of dataframe and other, element-wise (binary operator `mod`). + + Equivalent to ``dataframe % other``, but with support to substitute a fill_value + for missing data in one of the inputs. With reverse version, `rmod`. + + Among flexible wrappers (`add`, `sub`, `mul`, `div`, + `floordiv`, `mod`, `pow`) to arithmetic operators: + `+`, `-`, `*`, `/`, `//`, `%`, `**`. + + Parameters + ---------- + other : scalar, sequence, Series, dict or DataFrame + Any single or multiple element data structure, or list-like object. + axis : {0 or 'index', 1 or 'columns'} + Whether to compare by the index (0 or 'index') or columns. + (1 or 'columns'). For Series input, axis to match Series index on. + level : int or label + Broadcast across a level, matching Index values on the + passed MultiIndex level. + fill_value : float or None, default None + Fill existing missing (NaN) values, and any new element needed for + successful DataFrame alignment, with this value before computation. + If data in both corresponding DataFrame locations is missing + the result will be missing. + + Returns + ------- + DataFrame + Result of the arithmetic operation. + + See Also + -------- + DataFrame.add : Add DataFrames. + DataFrame.sub : Subtract DataFrames. + DataFrame.mul : Multiply DataFrames. + DataFrame.div : Divide DataFrames (float division). + DataFrame.truediv : Divide DataFrames (float division). + DataFrame.floordiv : Divide DataFrames (integer division). + DataFrame.mod : Calculate modulo (remainder after division). + DataFrame.pow : Calculate exponential power. + + Notes + ----- + Mismatched indices will be unioned together. + + Examples + -------- + >>> df = pd.DataFrame( + ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, + ... index=["circle", "triangle", "rectangle"], + ... ) + >>> df + angles degrees + circle 0 360 + triangle 3 180 + rectangle 4 360 + + Add a scalar with operator version which return the same + results. + + >>> df + 1 + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + >>> df.add(1) + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + Divide by constant with reverse version. + + >>> df.div(10) + angles degrees + circle 0.0 36.0 + triangle 0.3 18.0 + rectangle 0.4 36.0 + + >>> df.rdiv(10) + angles degrees + circle inf 0.027778 + triangle 3.333333 0.055556 + rectangle 2.500000 0.027778 + + Subtract a list and Series by axis with operator version. + + >>> df - [1, 2] + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub([1, 2], axis="columns") + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub( + ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), + ... axis="index", + ... ) + angles degrees + circle -1 359 + triangle 2 179 + rectangle 3 359 + + Multiply a dictionary by axis. + + >>> df.mul({"angles": 0, "degrees": 2}) + angles degrees + circle 0 720 + triangle 0 360 + rectangle 0 720 + + >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") + angles degrees + circle 0 0 + triangle 6 360 + rectangle 12 1080 + + Multiply a DataFrame of different shape with operator version. + + >>> other = pd.DataFrame( + ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] + ... ) + >>> other + angles + circle 0 + triangle 3 + rectangle 4 + + >>> df * other + angles degrees + circle 0 NaN + triangle 9 NaN + rectangle 16 NaN + + >>> df.mul(other, fill_value=0) + angles degrees + circle 0 0.0 + triangle 9 0.0 + rectangle 16 0.0 + + Divide by a MultiIndex by level. + + >>> df_multindex = pd.DataFrame( + ... { + ... "angles": [0, 3, 4, 4, 5, 6], + ... "degrees": [360, 180, 360, 360, 540, 720], + ... }, + ... index=[ + ... ["A", "A", "A", "B", "B", "B"], + ... [ + ... "circle", + ... "triangle", + ... "rectangle", + ... "square", + ... "pentagon", + ... "hexagon", + ... ], + ... ], + ... ) + >>> df_multindex + angles degrees + A circle 0 360 + triangle 3 180 + rectangle 4 360 + B square 4 360 + pentagon 5 540 + hexagon 6 720 + + >>> df.div(df_multindex, level=1, fill_value=0) + angles degrees + A circle NaN 1.0 + triangle 1.0 1.0 + rectangle 1.0 1.0 + B square 0.0 0.0 + pentagon 0.0 0.0 + hexagon 0.0 0.0 + + >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) + >>> df_pow.pow(2) + A B + 0 4 36 + 1 9 49 + 2 16 64 + 3 25 81 + """ return self._flex_arith_method( other, operator.mod, level=level, fill_value=fill_value, axis=axis ) - @Appender(ops.make_flex_doc("rmod", "dataframe")) def rmod( self, other, axis: Axis = "columns", level=None, fill_value=None ) -> DataFrame: + """ + Get Modulo of dataframe and other, element-wise (binary operator `rmod`). + + Equivalent to ``other % dataframe``, but with support to substitute a fill_value + for missing data in one of the inputs. With reverse version, `mod`. + + Among flexible wrappers (`add`, `sub`, `mul`, `div`, + `floordiv`, `mod`, `pow`) to arithmetic operators: + `+`, `-`, `*`, `/`, `//`, `%`, `**`. + + Parameters + ---------- + other : scalar, sequence, Series, dict or DataFrame + Any single or multiple element data structure, or list-like object. + axis : {0 or 'index', 1 or 'columns'} + Whether to compare by the index (0 or 'index') or columns. + (1 or 'columns'). For Series input, axis to match Series index on. + level : int or label + Broadcast across a level, matching Index values on the + passed MultiIndex level. + fill_value : float or None, default None + Fill existing missing (NaN) values, and any new element needed for + successful DataFrame alignment, with this value before computation. + If data in both corresponding DataFrame locations is missing + the result will be missing. + + Returns + ------- + DataFrame + Result of the arithmetic operation. + + See Also + -------- + DataFrame.add : Add DataFrames. + DataFrame.sub : Subtract DataFrames. + DataFrame.mul : Multiply DataFrames. + DataFrame.div : Divide DataFrames (float division). + DataFrame.truediv : Divide DataFrames (float division). + DataFrame.floordiv : Divide DataFrames (integer division). + DataFrame.mod : Calculate modulo (remainder after division). + DataFrame.pow : Calculate exponential power. + + Notes + ----- + Mismatched indices will be unioned together. + + Examples + -------- + >>> df = pd.DataFrame( + ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, + ... index=["circle", "triangle", "rectangle"], + ... ) + >>> df + angles degrees + circle 0 360 + triangle 3 180 + rectangle 4 360 + + Add a scalar with operator version which return the same + results. + + >>> df + 1 + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + >>> df.add(1) + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + Divide by constant with reverse version. + + >>> df.div(10) + angles degrees + circle 0.0 36.0 + triangle 0.3 18.0 + rectangle 0.4 36.0 + + >>> df.rdiv(10) + angles degrees + circle inf 0.027778 + triangle 3.333333 0.055556 + rectangle 2.500000 0.027778 + + Subtract a list and Series by axis with operator version. + + >>> df - [1, 2] + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub([1, 2], axis="columns") + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub( + ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), + ... axis="index", + ... ) + angles degrees + circle -1 359 + triangle 2 179 + rectangle 3 359 + + Multiply a dictionary by axis. + + >>> df.mul({"angles": 0, "degrees": 2}) + angles degrees + circle 0 720 + triangle 0 360 + rectangle 0 720 + + >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") + angles degrees + circle 0 0 + triangle 6 360 + rectangle 12 1080 + + Multiply a DataFrame of different shape with operator version. + + >>> other = pd.DataFrame( + ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] + ... ) + >>> other + angles + circle 0 + triangle 3 + rectangle 4 + + >>> df * other + angles degrees + circle 0 NaN + triangle 9 NaN + rectangle 16 NaN + + >>> df.mul(other, fill_value=0) + angles degrees + circle 0 0.0 + triangle 9 0.0 + rectangle 16 0.0 + + Divide by a MultiIndex by level. + + >>> df_multindex = pd.DataFrame( + ... { + ... "angles": [0, 3, 4, 4, 5, 6], + ... "degrees": [360, 180, 360, 360, 540, 720], + ... }, + ... index=[ + ... ["A", "A", "A", "B", "B", "B"], + ... [ + ... "circle", + ... "triangle", + ... "rectangle", + ... "square", + ... "pentagon", + ... "hexagon", + ... ], + ... ], + ... ) + >>> df_multindex + angles degrees + A circle 0 360 + triangle 3 180 + rectangle 4 360 + B square 4 360 + pentagon 5 540 + hexagon 6 720 + + >>> df.div(df_multindex, level=1, fill_value=0) + angles degrees + A circle NaN 1.0 + triangle 1.0 1.0 + rectangle 1.0 1.0 + B square 0.0 0.0 + pentagon 0.0 0.0 + hexagon 0.0 0.0 + + >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) + >>> df_pow.pow(2) + A B + 0 4 36 + 1 9 49 + 2 16 64 + 3 25 81 + """ return self._flex_arith_method( other, roperator.rmod, level=level, fill_value=fill_value, axis=axis ) - @Appender(ops.make_flex_doc("pow", "dataframe")) def pow( self, other, axis: Axis = "columns", level=None, fill_value=None ) -> DataFrame: + """ + Binary operator `pow`. + + Get Exponential power of dataframe and other,element-wise. + + Equivalent to ``dataframe ** other``, + but with support to substitute a fill_value + for missing data in one of the inputs. With reverse version, `rpow`. + + Among flexible wrappers (`add`, `sub`, `mul`, `div`, + `floordiv`, `mod`, `pow`) to arithmetic operators: + `+`, `-`, `*`, `/`, `//`, `%`, `**`. + + Parameters + ---------- + other : scalar, sequence, Series, dict or DataFrame + Any single or multiple element data structure, or list-like object. + axis : {0 or 'index', 1 or 'columns'} + Whether to compare by the index (0 or 'index') or columns. + (1 or 'columns'). For Series input, axis to match Series index on. + level : int or label + Broadcast across a level, matching Index values on the + passed MultiIndex level. + fill_value : float or None, default None + Fill existing missing (NaN) values, and any new element needed for + successful DataFrame alignment, with this value before computation. + If data in both corresponding DataFrame locations is missing + the result will be missing. + + Returns + ------- + DataFrame + Result of the arithmetic operation. + + See Also + -------- + DataFrame.add : Add DataFrames. + DataFrame.sub : Subtract DataFrames. + DataFrame.mul : Multiply DataFrames. + DataFrame.div : Divide DataFrames (float division). + DataFrame.truediv : Divide DataFrames (float division). + DataFrame.floordiv : Divide DataFrames (integer division). + DataFrame.mod : Calculate modulo (remainder after division). + DataFrame.pow : Calculate exponential power. + + Notes + ----- + Mismatched indices will be unioned together. + + Examples + -------- + >>> df = pd.DataFrame( + ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, + ... index=["circle", "triangle", "rectangle"], + ... ) + >>> df + angles degrees + circle 0 360 + triangle 3 180 + rectangle 4 360 + + Add a scalar with operator version which return the same + results. + + >>> df + 1 + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + >>> df.add(1) + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + Divide by constant with reverse version. + + >>> df.div(10) + angles degrees + circle 0.0 36.0 + triangle 0.3 18.0 + rectangle 0.4 36.0 + + >>> df.rdiv(10) + angles degrees + circle inf 0.027778 + triangle 3.333333 0.055556 + rectangle 2.500000 0.027778 + + Subtract a list and Series by axis with operator version. + + >>> df - [1, 2] + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub([1, 2], axis="columns") + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub( + ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), + ... axis="index", + ... ) + angles degrees + circle -1 359 + triangle 2 179 + rectangle 3 359 + + Multiply a dictionary by axis. + + >>> df.mul({"angles": 0, "degrees": 2}) + angles degrees + circle 0 720 + triangle 0 360 + rectangle 0 720 + + >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") + angles degrees + circle 0 0 + triangle 6 360 + rectangle 12 1080 + + Multiply a DataFrame of different shape with operator version. + + >>> other = pd.DataFrame( + ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] + ... ) + >>> other + angles + circle 0 + triangle 3 + rectangle 4 + + >>> df * other + angles degrees + circle 0 NaN + triangle 9 NaN + rectangle 16 NaN + + >>> df.mul(other, fill_value=0) + angles degrees + circle 0 0.0 + triangle 9 0.0 + rectangle 16 0.0 + + Divide by a MultiIndex by level. + + >>> df_multindex = pd.DataFrame( + ... { + ... "angles": [0, 3, 4, 4, 5, 6], + ... "degrees": [360, 180, 360, 360, 540, 720], + ... }, + ... index=[ + ... ["A", "A", "A", "B", "B", "B"], + ... [ + ... "circle", + ... "triangle", + ... "rectangle", + ... "square", + ... "pentagon", + ... "hexagon", + ... ], + ... ], + ... ) + >>> df_multindex + angles degrees + A circle 0 360 + triangle 3 180 + rectangle 4 360 + B square 4 360 + pentagon 5 540 + hexagon 6 720 + + >>> df.div(df_multindex, level=1, fill_value=0) + angles degrees + A circle NaN 1.0 + triangle 1.0 1.0 + rectangle 1.0 1.0 + B square 0.0 0.0 + pentagon 0.0 0.0 + hexagon 0.0 0.0 + + >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) + >>> df_pow.pow(2) + A B + 0 4 36 + 1 9 49 + 2 16 64 + 3 25 81 + """ return self._flex_arith_method( other, operator.pow, level=level, fill_value=fill_value, axis=axis ) - @Appender(ops.make_flex_doc("rpow", "dataframe")) def rpow( self, other, axis: Axis = "columns", level=None, fill_value=None ) -> DataFrame: + """ + Binary operator `rpow`. + + Get Exponential power of dataframe and other,element-wise. + + Equivalent to ``other ** dataframe``, + but with support to substitute a fill_value + for missing data in one of the inputs. With reverse version, `pow`. + + Among flexible wrappers (`add`, `sub`, `mul`, `div`, + `floordiv`, `mod`, `pow`) to arithmetic operators: + `+`, `-`, `*`, `/`, `//`, `%`, `**`. + + Parameters + ---------- + other : scalar, sequence, Series, dict or DataFrame + Any single or multiple element data structure, or list-like object. + axis : {0 or 'index', 1 or 'columns'} + Whether to compare by the index (0 or 'index') or columns. + (1 or 'columns'). For Series input, axis to match Series index on. + level : int or label + Broadcast across a level, matching Index values on the + passed MultiIndex level. + fill_value : float or None, default None + Fill existing missing (NaN) values, and any new element needed for + successful DataFrame alignment, with this value before computation. + If data in both corresponding DataFrame locations is missing + the result will be missing. + + Returns + ------- + DataFrame + Result of the arithmetic operation. + + See Also + -------- + DataFrame.add : Add DataFrames. + DataFrame.sub : Subtract DataFrames. + DataFrame.mul : Multiply DataFrames. + DataFrame.div : Divide DataFrames (float division). + DataFrame.truediv : Divide DataFrames (float division). + DataFrame.floordiv : Divide DataFrames (integer division). + DataFrame.mod : Calculate modulo (remainder after division). + DataFrame.pow : Calculate exponential power. + + Notes + ----- + Mismatched indices will be unioned together. + + Examples + -------- + >>> df = pd.DataFrame( + ... {"angles": [0, 3, 4], "degrees": [360, 180, 360]}, + ... index=["circle", "triangle", "rectangle"], + ... ) + >>> df + angles degrees + circle 0 360 + triangle 3 180 + rectangle 4 360 + + Add a scalar with operator version which return the same + results. + + >>> df + 1 + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + >>> df.add(1) + angles degrees + circle 1 361 + triangle 4 181 + rectangle 5 361 + + Divide by constant with reverse version. + + >>> df.div(10) + angles degrees + circle 0.0 36.0 + triangle 0.3 18.0 + rectangle 0.4 36.0 + + >>> df.rdiv(10) + angles degrees + circle inf 0.027778 + triangle 3.333333 0.055556 + rectangle 2.500000 0.027778 + + Subtract a list and Series by axis with operator version. + + >>> df - [1, 2] + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub([1, 2], axis="columns") + angles degrees + circle -1 358 + triangle 2 178 + rectangle 3 358 + + >>> df.sub( + ... pd.Series([1, 1, 1], index=["circle", "triangle", "rectangle"]), + ... axis="index", + ... ) + angles degrees + circle -1 359 + triangle 2 179 + rectangle 3 359 + + Multiply a dictionary by axis. + + >>> df.mul({"angles": 0, "degrees": 2}) + angles degrees + circle 0 720 + triangle 0 360 + rectangle 0 720 + + >>> df.mul({"circle": 0, "triangle": 2, "rectangle": 3}, axis="index") + angles degrees + circle 0 0 + triangle 6 360 + rectangle 12 1080 + + Multiply a DataFrame of different shape with operator version. + + >>> other = pd.DataFrame( + ... {"angles": [0, 3, 4]}, index=["circle", "triangle", "rectangle"] + ... ) + >>> other + angles + circle 0 + triangle 3 + rectangle 4 + + >>> df * other + angles degrees + circle 0 NaN + triangle 9 NaN + rectangle 16 NaN + + >>> df.mul(other, fill_value=0) + angles degrees + circle 0 0.0 + triangle 9 0.0 + rectangle 16 0.0 + + Divide by a MultiIndex by level. + + >>> df_multindex = pd.DataFrame( + ... { + ... "angles": [0, 3, 4, 4, 5, 6], + ... "degrees": [360, 180, 360, 360, 540, 720], + ... }, + ... index=[ + ... ["A", "A", "A", "B", "B", "B"], + ... [ + ... "circle", + ... "triangle", + ... "rectangle", + ... "square", + ... "pentagon", + ... "hexagon", + ... ], + ... ], + ... ) + >>> df_multindex + angles degrees + A circle 0 360 + triangle 3 180 + rectangle 4 360 + B square 4 360 + pentagon 5 540 + hexagon 6 720 + + >>> df.div(df_multindex, level=1, fill_value=0) + angles degrees + A circle NaN 1.0 + triangle 1.0 1.0 + rectangle 1.0 1.0 + B square 0.0 0.0 + pentagon 0.0 0.0 + hexagon 0.0 0.0 + + >>> df_pow = pd.DataFrame({"A": [2, 3, 4, 5], "B": [6, 7, 8, 9]}) + >>> df_pow.pow(2) + A B + 0 4 36 + 1 9 49 + 2 16 64 + 3 25 81 + """ return self._flex_arith_method( other, roperator.rpow, level=level, fill_value=fill_value, axis=axis ) @@ -8891,10 +13284,41 @@ def rpow( # ---------------------------------------------------------------------- # Combination-Related - @doc( - _shared_docs["compare"], - dedent( - """ + def compare( + self, + other: DataFrame, + align_axis: Axis = 1, + keep_shape: bool = False, + keep_equal: bool = False, + result_names: Suffixes = ("self", "other"), + ) -> DataFrame: + """ + Compare to another DataFrame and show the differences. + + Parameters + ---------- + other : DataFrame + Object to compare with. + + align_axis : {0 or 'index', 1 or 'columns'}, default 1 + Determine which axis to align the comparison on. + + * 0, or 'index' : Resulting differences are stacked vertically + with rows drawn alternately from self and other. + * 1, or 'columns' : Resulting differences are aligned horizontally + with columns drawn alternately from self and other. + + keep_shape : bool, default False + If true, all rows and columns are kept. + Otherwise, only the ones with different values are kept. + + keep_equal : bool, default False + If true, the result keeps values that are equal. + Otherwise, equal values are shown as NaNs. + + result_names : tuple, default ('self', 'other') + Set the dataframes names in the comparison. + Returns ------- DataFrame @@ -8923,11 +13347,11 @@ def rpow( Examples -------- >>> df = pd.DataFrame( - ... {{ + ... { ... "col1": ["a", "a", "b", "b", "a"], ... "col2": [1.0, 2.0, 3.0, np.nan, 5.0], - ... "col3": [1.0, 2.0, 3.0, 4.0, 5.0] - ... }}, + ... "col3": [1.0, 2.0, 3.0, 4.0, 5.0], + ... }, ... columns=["col1", "col2", "col3"], ... ) >>> df @@ -8939,8 +13363,8 @@ def rpow( 4 a 5.0 5.0 >>> df2 = df.copy() - >>> df2.loc[0, 'col1'] = 'c' - >>> df2.loc[2, 'col3'] = 4.0 + >>> df2.loc[0, "col1"] = "c" + >>> df2.loc[2, "col3"] = 4.0 >>> df2 col1 col2 col3 0 c 1.0 1.0 @@ -9004,17 +13428,6 @@ def rpow( 3 b b NaN NaN 4.0 4.0 4 a a 5.0 5.0 5.0 5.0 """ - ), - klass=_shared_doc_kwargs["klass"], - ) - def compare( - self, - other: DataFrame, - align_axis: Axis = 1, - keep_shape: bool = False, - keep_equal: bool = False, - result_names: Suffixes = ("self", "other"), - ) -> DataFrame: return super().compare( other=other, align_axis=align_axis, @@ -9653,47 +14066,185 @@ def groupby( b 12.3 123.0 NaN 12.3 33.0 - When using ``.apply()``, use ``group_keys`` to include or exclude the - group keys. The ``group_keys`` argument defaults to ``True`` (include). + When using ``.apply()``, use ``group_keys`` to include or exclude the + group keys. The ``group_keys`` argument defaults to ``True`` (include). + + >>> df = pd.DataFrame( + ... { + ... "Animal": ["Falcon", "Falcon", "Parrot", "Parrot"], + ... "Max Speed": [380.0, 370.0, 24.0, 26.0], + ... } + ... ) + >>> df.groupby("Animal", group_keys=True)[["Max Speed"]].apply(lambda x: x) + Max Speed + Animal + Falcon 0 380.0 + 1 370.0 + Parrot 2 24.0 + 3 26.0 + + >>> df.groupby("Animal", group_keys=False)[["Max Speed"]].apply(lambda x: x) + Max Speed + 0 380.0 + 1 370.0 + 2 24.0 + 3 26.0 + """ + from pandas.core.groupby.generic import DataFrameGroupBy + + if level is None and by is None: + raise TypeError("You have to supply one of 'by' and 'level'") + + return DataFrameGroupBy( + obj=self, + keys=by, + level=level, + as_index=as_index, + sort=sort, + group_keys=group_keys, + observed=observed, + dropna=dropna, + ) + + _shared_docs["pivot"] = """ + Return reshaped DataFrame organized by given index / column values. + + Reshape data (produce a "pivot" table) based on column values. Uses + unique values from specified `index` / `columns` to form axes of the + resulting DataFrame. This function does not support data + aggregation, multiple values will result in a MultiIndex in the + columns. See the :ref:`User Guide ` for more on reshaping. + + Parameters + ----------%s + columns : Hashable or a sequence of the previous + Column to use to make new frame's columns. + index : Hashable or a sequence of the previous, optional + Column to use to make new frame's index. If not given, uses existing index. + values : Hashable or a sequence of the previous, optional + Column(s) to use for populating new frame's values. If not + specified, all remaining columns will be used and the result will + have hierarchically indexed columns. + + Returns + ------- + DataFrame + Returns reshaped DataFrame. + + Raises + ------ + ValueError: + When there are any `index`, `columns` combinations with multiple + values. `DataFrame.pivot_table` when you need to aggregate. + + See Also + -------- + DataFrame.pivot_table : Generalization of pivot that can handle + duplicate values for one index/column pair. + DataFrame.unstack : Pivot based on the index values instead of a + column. + wide_to_long : Wide panel to long format. Less flexible but more + user-friendly than melt. + + Notes + ----- + For finer-tuned control, see hierarchical indexing documentation along + with the related stack/unstack methods. + + Reference :ref:`the user guide ` for more examples. + + Examples + -------- + >>> df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', + ... 'two'], + ... 'bar': ['A', 'B', 'C', 'A', 'B', 'C'], + ... 'baz': [1, 2, 3, 4, 5, 6], + ... 'zoo': ['x', 'y', 'z', 'q', 'w', 't']}) + >>> df + foo bar baz zoo + 0 one A 1 x + 1 one B 2 y + 2 one C 3 z + 3 two A 4 q + 4 two B 5 w + 5 two C 6 t + + >>> df.pivot(index='foo', columns='bar', values='baz') + bar A B C + foo + one 1 2 3 + two 4 5 6 + + >>> df.pivot(index='foo', columns='bar')['baz'] + bar A B C + foo + one 1 2 3 + two 4 5 6 + + >>> df.pivot(index='foo', columns='bar', values=['baz', 'zoo']) + baz zoo + bar A B C A B C + foo + one 1 2 3 x y z + two 4 5 6 q w t + + You could also assign a list of column names or a list of index names. + + >>> df = pd.DataFrame({ + ... "lev1": [1, 1, 1, 2, 2, 2], + ... "lev2": [1, 1, 2, 1, 1, 2], + ... "lev3": [1, 2, 1, 2, 1, 2], + ... "lev4": [1, 2, 3, 4, 5, 6], + ... "values": [0, 1, 2, 3, 4, 5]}) + >>> df + lev1 lev2 lev3 lev4 values + 0 1 1 1 1 0 + 1 1 1 2 2 1 + 2 1 2 1 3 2 + 3 2 1 2 4 3 + 4 2 1 1 5 4 + 5 2 2 2 6 5 + + >>> df.pivot(index="lev1", columns=["lev2", "lev3"], values="values") + lev2 1 2 + lev3 1 2 1 2 + lev1 + 1 0.0 1.0 2.0 NaN + 2 4.0 3.0 NaN 5.0 + + >>> df.pivot(index=["lev1", "lev2"], columns=["lev3"], values="values") + lev3 1 2 + lev1 lev2 + 1 1 0.0 1.0 + 2 2.0 NaN + 2 1 4.0 3.0 + 2 NaN 5.0 - >>> df = pd.DataFrame( - ... { - ... "Animal": ["Falcon", "Falcon", "Parrot", "Parrot"], - ... "Max Speed": [380.0, 370.0, 24.0, 26.0], - ... } - ... ) - >>> df.groupby("Animal", group_keys=True)[["Max Speed"]].apply(lambda x: x) - Max Speed - Animal - Falcon 0 380.0 - 1 370.0 - Parrot 2 24.0 - 3 26.0 + A ValueError is raised if there are any duplicates. - >>> df.groupby("Animal", group_keys=False)[["Max Speed"]].apply(lambda x: x) - Max Speed - 0 380.0 - 1 370.0 - 2 24.0 - 3 26.0 - """ - from pandas.core.groupby.generic import DataFrameGroupBy + >>> df = pd.DataFrame({"foo": ['one', 'one', 'two', 'two'], + ... "bar": ['A', 'A', 'B', 'C'], + ... "baz": [1, 2, 3, 4]}) + >>> df + foo bar baz + 0 one A 1 + 1 one A 2 + 2 two B 3 + 3 two C 4 - if level is None and by is None: - raise TypeError("You have to supply one of 'by' and 'level'") + Notice that the first two rows are the same for our `index` + and `columns` arguments. - return DataFrameGroupBy( - obj=self, - keys=by, - level=level, - as_index=as_index, - sort=sort, - group_keys=group_keys, - observed=observed, - dropna=dropna, - ) + >>> df.pivot(index='foo', columns='bar', values='baz') + Traceback (most recent call last): + ... + ValueError: Index contains duplicate entries, cannot reshape + """ - _shared_docs["pivot"] = """ + def pivot( + self, *, columns, index=lib.no_default, values=lib.no_default + ) -> DataFrame: + """ Return reshaped DataFrame organized by given index / column values. Reshape data (produce a "pivot" table) based on column values. Uses @@ -9703,7 +14254,7 @@ def groupby( columns. See the :ref:`User Guide ` for more on reshaping. Parameters - ----------%s + ---------- columns : Hashable or a sequence of the previous Column to use to make new frame's columns. index : Hashable or a sequence of the previous, optional @@ -9742,11 +14293,14 @@ def groupby( Examples -------- - >>> df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', - ... 'two'], - ... 'bar': ['A', 'B', 'C', 'A', 'B', 'C'], - ... 'baz': [1, 2, 3, 4, 5, 6], - ... 'zoo': ['x', 'y', 'z', 'q', 'w', 't']}) + >>> df = pd.DataFrame( + ... { + ... "foo": ["one", "one", "one", "two", "two", "two"], + ... "bar": ["A", "B", "C", "A", "B", "C"], + ... "baz": [1, 2, 3, 4, 5, 6], + ... "zoo": ["x", "y", "z", "q", "w", "t"], + ... } + ... ) >>> df foo bar baz zoo 0 one A 1 x @@ -9756,19 +14310,19 @@ def groupby( 4 two B 5 w 5 two C 6 t - >>> df.pivot(index='foo', columns='bar', values='baz') + >>> df.pivot(index="foo", columns="bar", values="baz") bar A B C foo one 1 2 3 two 4 5 6 - >>> df.pivot(index='foo', columns='bar')['baz'] + >>> df.pivot(index="foo", columns="bar")["baz"] bar A B C foo one 1 2 3 two 4 5 6 - >>> df.pivot(index='foo', columns='bar', values=['baz', 'zoo']) + >>> df.pivot(index="foo", columns="bar", values=["baz", "zoo"]) baz zoo bar A B C A B C foo @@ -9777,12 +14331,15 @@ def groupby( You could also assign a list of column names or a list of index names. - >>> df = pd.DataFrame({ - ... "lev1": [1, 1, 1, 2, 2, 2], - ... "lev2": [1, 1, 2, 1, 1, 2], - ... "lev3": [1, 2, 1, 2, 1, 2], - ... "lev4": [1, 2, 3, 4, 5, 6], - ... "values": [0, 1, 2, 3, 4, 5]}) + >>> df = pd.DataFrame( + ... { + ... "lev1": [1, 1, 1, 2, 2, 2], + ... "lev2": [1, 1, 2, 1, 1, 2], + ... "lev3": [1, 2, 1, 2, 1, 2], + ... "lev4": [1, 2, 3, 4, 5, 6], + ... "values": [0, 1, 2, 3, 4, 5], + ... } + ... ) >>> df lev1 lev2 lev3 lev4 values 0 1 1 1 1 0 @@ -9809,9 +14366,13 @@ def groupby( A ValueError is raised if there are any duplicates. - >>> df = pd.DataFrame({"foo": ['one', 'one', 'two', 'two'], - ... "bar": ['A', 'A', 'B', 'C'], - ... "baz": [1, 2, 3, 4]}) + >>> df = pd.DataFrame( + ... { + ... "foo": ["one", "one", "two", "two"], + ... "bar": ["A", "A", "B", "C"], + ... "baz": [1, 2, 3, 4], + ... } + ... ) >>> df foo bar baz 0 one A 1 @@ -9822,29 +14383,190 @@ def groupby( Notice that the first two rows are the same for our `index` and `columns` arguments. - >>> df.pivot(index='foo', columns='bar', values='baz') + >>> df.pivot(index="foo", columns="bar", values="baz") Traceback (most recent call last): ... ValueError: Index contains duplicate entries, cannot reshape """ - - @Substitution("") - @Appender(_shared_docs["pivot"]) - def pivot( - self, *, columns, index=lib.no_default, values=lib.no_default - ) -> DataFrame: from pandas.core.reshape.pivot import pivot return pivot(self, index=index, columns=columns, values=values) - _shared_docs["pivot_table"] = """ + _shared_docs["pivot_table"] = """ + Create a spreadsheet-style pivot table as a DataFrame. + + The levels in the pivot table will be stored in MultiIndex objects + (hierarchical indexes) on the index and columns of the result DataFrame. + + Parameters + ----------%s + values : list-like or scalar, optional + Column or columns to aggregate. + index : column, Grouper, array, or sequence of the previous + Keys to group by on the pivot table index. If a list is passed, + it can contain any of the other types (except list). If an array is + passed, it must be the same length as the data and will be used in + the same manner as column values. + columns : column, Grouper, array, or sequence of the previous + Keys to group by on the pivot table column. If a list is passed, + it can contain any of the other types (except list). If an array is + passed, it must be the same length as the data and will be used in + the same manner as column values. + aggfunc : function, list of functions, dict, default "mean" + If a list of functions is passed, the resulting pivot table will have + hierarchical columns whose top level are the function names + (inferred from the function objects themselves). + If a dict is passed, the key is column to aggregate and the value is + function or list of functions. If ``margin=True``, aggfunc will be + used to calculate the partial aggregates. + fill_value : scalar, default None + Value to replace missing values with (in the resulting pivot table, + after aggregation). + margins : bool, default False + If ``margins=True``, special ``All`` columns and rows + will be added with partial group aggregates across the categories + on the rows and columns. + dropna : bool, default True + Do not include columns whose entries are all NaN. If True, + + * rows with an NA value in any column will be omitted before computing + margins, + * index/column keys containing NA values will be dropped (see ``dropna`` + parameter in :meth:`DataFrame.groupby`). + + margins_name : str, default 'All' + Name of the row / column that will contain the totals + when margins is True. + observed : bool, default False + This only applies if any of the groupers are Categoricals. + If True: only show observed values for categorical groupers. + If False: show all values for categorical groupers. + + .. versionchanged:: 3.0.0 + + The default value is now ``True``. + + sort : bool, default True + Specifies if the result should be sorted. + + **kwargs : dict + Optional keyword arguments to pass to ``aggfunc``. + + Returns + ------- + DataFrame + An Excel style pivot table. + + See Also + -------- + DataFrame.pivot : Pivot without aggregation that can handle + non-numeric data. + DataFrame.melt: Unpivot a DataFrame from wide to long format, + optionally leaving identifiers set. + wide_to_long : Wide panel to long format. Less flexible but more + user-friendly than melt. + + Notes + ----- + Reference :ref:`the user guide ` for more examples. + + Examples + -------- + >>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo", + ... "bar", "bar", "bar", "bar"], + ... "B": ["one", "one", "one", "two", "two", + ... "one", "one", "two", "two"], + ... "C": ["small", "large", "large", "small", + ... "small", "large", "small", "small", + ... "large"], + ... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7], + ... "E": [2, 4, 5, 5, 6, 6, 8, 9, 9]}) + >>> df + A B C D E + 0 foo one small 1 2 + 1 foo one large 2 4 + 2 foo one large 2 5 + 3 foo two small 3 5 + 4 foo two small 3 6 + 5 bar one large 4 6 + 6 bar one small 5 8 + 7 bar two small 6 9 + 8 bar two large 7 9 + + This first example aggregates values by taking the sum. + + >>> table = pd.pivot_table(df, values='D', index=['A', 'B'], + ... columns=['C'], aggfunc="sum") + >>> table + C large small + A B + bar one 4.0 5.0 + two 7.0 6.0 + foo one 4.0 1.0 + two NaN 6.0 + + We can also fill missing values using the `fill_value` parameter. + + >>> table = pd.pivot_table(df, values='D', index=['A', 'B'], + ... columns=['C'], aggfunc="sum", fill_value=0) + >>> table + C large small + A B + bar one 4 5 + two 7 6 + foo one 4 1 + two 0 6 + + The next example aggregates by taking the mean across multiple columns. + + >>> table = pd.pivot_table(df, values=['D', 'E'], index=['A', 'C'], + ... aggfunc={'D': "mean", 'E': "mean"}) + >>> table + D E + A C + bar large 5.500000 7.500000 + small 5.500000 8.500000 + foo large 2.000000 4.500000 + small 2.333333 4.333333 + + We can also calculate multiple types of aggregations for any given + value column. + + >>> table = pd.pivot_table(df, values=['D', 'E'], index=['A', 'C'], + ... aggfunc={'D': "mean", + ... 'E': ["min", "max", "mean"]}) + >>> table + D E + mean max mean min + A C + bar large 5.500000 9 7.500000 6 + small 5.500000 9 8.500000 8 + foo large 2.000000 5 4.500000 4 + small 2.333333 6 4.333333 2 + """ + + def pivot_table( + self, + values=None, + index=None, + columns=None, + aggfunc: AggFuncType = "mean", + fill_value=None, + margins: bool = False, + dropna: bool = True, + margins_name: Level = "All", + observed: bool = True, + sort: bool = True, + **kwargs, + ) -> DataFrame: + """ Create a spreadsheet-style pivot table as a DataFrame. The levels in the pivot table will be stored in MultiIndex objects (hierarchical indexes) on the index and columns of the result DataFrame. Parameters - ----------%s + ---------- values : list-like or scalar, optional Column or columns to aggregate. index : column, Grouper, array, or sequence of the previous @@ -9917,15 +14639,45 @@ def pivot( Examples -------- - >>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo", - ... "bar", "bar", "bar", "bar"], - ... "B": ["one", "one", "one", "two", "two", - ... "one", "one", "two", "two"], - ... "C": ["small", "large", "large", "small", - ... "small", "large", "small", "small", - ... "large"], - ... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7], - ... "E": [2, 4, 5, 5, 6, 6, 8, 9, 9]}) + >>> df = pd.DataFrame( + ... { + ... "A": [ + ... "foo", + ... "foo", + ... "foo", + ... "foo", + ... "foo", + ... "bar", + ... "bar", + ... "bar", + ... "bar", + ... ], + ... "B": [ + ... "one", + ... "one", + ... "one", + ... "two", + ... "two", + ... "one", + ... "one", + ... "two", + ... "two", + ... ], + ... "C": [ + ... "small", + ... "large", + ... "large", + ... "small", + ... "small", + ... "large", + ... "small", + ... "small", + ... "large", + ... ], + ... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7], + ... "E": [2, 4, 5, 5, 6, 6, 8, 9, 9], + ... } + ... ) >>> df A B C D E 0 foo one small 1 2 @@ -9940,8 +14692,9 @@ def pivot( This first example aggregates values by taking the sum. - >>> table = pd.pivot_table(df, values='D', index=['A', 'B'], - ... columns=['C'], aggfunc="sum") + >>> table = pd.pivot_table( + ... df, values="D", index=["A", "B"], columns=["C"], aggfunc="sum" + ... ) >>> table C large small A B @@ -9952,8 +14705,14 @@ def pivot( We can also fill missing values using the `fill_value` parameter. - >>> table = pd.pivot_table(df, values='D', index=['A', 'B'], - ... columns=['C'], aggfunc="sum", fill_value=0) + >>> table = pd.pivot_table( + ... df, + ... values="D", + ... index=["A", "B"], + ... columns=["C"], + ... aggfunc="sum", + ... fill_value=0, + ... ) >>> table C large small A B @@ -9964,8 +14723,12 @@ def pivot( The next example aggregates by taking the mean across multiple columns. - >>> table = pd.pivot_table(df, values=['D', 'E'], index=['A', 'C'], - ... aggfunc={'D': "mean", 'E': "mean"}) + >>> table = pd.pivot_table( + ... df, + ... values=["D", "E"], + ... index=["A", "C"], + ... aggfunc={"D": "mean", "E": "mean"}, + ... ) >>> table D E A C @@ -9977,9 +14740,12 @@ def pivot( We can also calculate multiple types of aggregations for any given value column. - >>> table = pd.pivot_table(df, values=['D', 'E'], index=['A', 'C'], - ... aggfunc={'D': "mean", - ... 'E': ["min", "max", "mean"]}) + >>> table = pd.pivot_table( + ... df, + ... values=["D", "E"], + ... index=["A", "C"], + ... aggfunc={"D": "mean", "E": ["min", "max", "mean"]}, + ... ) >>> table D E mean max mean min @@ -9989,23 +14755,6 @@ def pivot( foo large 2.000000 5 4.500000 4 small 2.333333 6 4.333333 2 """ - - @Substitution("") - @Appender(_shared_docs["pivot_table"]) - def pivot_table( - self, - values=None, - index=None, - columns=None, - aggfunc: AggFuncType = "mean", - fill_value=None, - margins: bool = False, - dropna: bool = True, - margins_name: Level = "All", - observed: bool = True, - sort: bool = True, - **kwargs, - ) -> DataFrame: from pandas.core.reshape.pivot import pivot_table return pivot_table( @@ -10575,19 +15324,52 @@ def melt( # ---------------------------------------------------------------------- # Time series-related - @doc( - Series.diff, - klass="DataFrame", - extra_params="axis : {0 or 'index', 1 or 'columns'}, default 0\n " - "Take difference over rows (0) or columns (1).\n", - other_klass="Series", - examples=dedent( - """ + def diff(self, periods: int = 1, axis: Axis = 0) -> DataFrame: + """ + First discrete difference of element. + + Calculates the difference of a DataFrame element compared with another + element in the DataFrame (default is element in previous row). + + Parameters + ---------- + periods : int, default 1 + Periods to shift for calculating difference, accepts negative + values. + axis : {0 or 'index', 1 or 'columns'}, default 0 + Take difference over rows (0) or columns (1). + + Returns + ------- + DataFrame + First differences of the Series. + + See Also + -------- + DataFrame.pct_change: Percent change over given number of periods. + DataFrame.shift: Shift index by desired number of periods with an + optional time freq. + Series.diff: First discrete difference of object. + + Notes + ----- + For boolean dtypes, this uses :meth:`operator.xor` rather than + :meth:`operator.sub`. + The result is calculated according to current dtype in DataFrame, + however dtype of the result is always float64. + + Examples + -------- + Difference with previous row - >>> df = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6], - ... 'b': [1, 1, 2, 3, 5, 8], - ... 'c': [1, 4, 9, 16, 25, 36]}) + >>> df = pd.DataFrame( + ... { + ... "a": [1, 2, 3, 4, 5, 6], + ... "b": [1, 1, 2, 3, 5, 8], + ... "c": [1, 4, 9, 16, 25, 36], + ... } + ... ) >>> df a b c 0 1 1 1 @@ -10641,14 +15423,12 @@ def melt( Overflow in input dtype - >>> df = pd.DataFrame({'a': [1, 0]}, dtype=np.uint8) + >>> df = pd.DataFrame({"a": [1, 0]}, dtype=np.uint8) >>> df.diff() a 0 NaN - 1 255.0""" - ), - ) - def diff(self, periods: int = 1, axis: Axis = 0) -> DataFrame: + 1 255.0 + """ if not lib.is_integer(periods): if not (is_float(periods) and periods.is_integer()): raise ValueError("periods must be an integer") @@ -10756,14 +15536,110 @@ def _gotitem( """ ) - @doc( - _shared_docs["aggregate"], - klass=_shared_doc_kwargs["klass"], - axis=_shared_doc_kwargs["axis"], - see_also=_agg_see_also_doc, - examples=_agg_examples_doc, - ) def aggregate(self, func=None, axis: Axis = 0, *args, **kwargs): + """ + Aggregate using one or more operations over the specified axis. + + Parameters + ---------- + func : function, str, list or dict + Function to use for aggregating the data. If a function, must either + work when passed a DataFrame or when passed to DataFrame.apply. + + Accepted combinations are: + + - function + - string function name + - list of functions and/or function names, e.g. ``[np.sum, 'mean']`` + - dict of axis labels -> functions, function names or list of such. + axis : {0 or 'index', 1 or 'columns'}, default 0 + If 0 or 'index': apply function to each column. + If 1 or 'columns': apply function to each row. + *args + Positional arguments to pass to `func`. + **kwargs + Keyword arguments to pass to `func`. + + Returns + ------- + scalar, Series or DataFrame + + The return can be: + + * scalar : when Series.agg is called with single function + * Series : when DataFrame.agg is called with a single function + * DataFrame : when DataFrame.agg is called with several functions + + See Also + -------- + DataFrame.apply : Perform any type of operations. + DataFrame.transform : Perform transformation type operations. + DataFrame.groupby : Perform operations over groups. + DataFrame.resample : Perform operations over resampled bins. + DataFrame.rolling : Perform operations over rolling window. + DataFrame.expanding : Perform operations over expanding window. + core.window.ewm.ExponentialMovingWindow : Perform operation over exponential + weighted window. + + Notes + ----- + The aggregation operations are always performed over an axis, either the + index (default) or the column axis. This behavior is different from + `numpy` aggregation functions (`mean`, `median`, `prod`, `sum`, `std`, + `var`), where the default is to compute the aggregation of the flattened + array, e.g., ``numpy.mean(arr_2d)`` as opposed to + ``numpy.mean(arr_2d, axis=0)``. + + `agg` is an alias for `aggregate`. Use the alias. + + Functions that mutate the passed object can produce unexpected + behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` + for more details. + + A passed user-defined-function will be passed a Series for evaluation. + + If ``func`` defines an index relabeling, ``axis`` must be ``0`` or ``index``. + + Examples + -------- + >>> df = pd.DataFrame( + ... [[1, 2, 3], [4, 5, 6], [7, 8, 9], [np.nan, np.nan, np.nan]], + ... columns=["A", "B", "C"], + ... ) + + Aggregate these functions over the rows. + + >>> df.agg(["sum", "min"]) + A B C + sum 12.0 15.0 18.0 + min 1.0 2.0 3.0 + + Different aggregations per column. + + >>> df.agg({"A": ["sum", "min"], "B": ["min", "max"]}) + A B + sum 12.0 NaN + min 1.0 2.0 + max NaN 8.0 + + Aggregate different functions over the columns + and rename the index of the resulting DataFrame. + + >>> df.agg(x=("A", "max"), y=("B", "min"), z=("C", "mean")) + A B C + x 7.0 NaN NaN + y NaN 2.0 NaN + z NaN NaN 6.0 + + Aggregate over the columns. + + >>> df.agg("mean", axis="columns") + 0 2.0 + 1 5.0 + 2 8.0 + 3 NaN + dtype: float64 + """ from pandas.core.apply import frame_apply axis = self._get_axis_number(axis) @@ -10775,14 +15651,147 @@ def aggregate(self, func=None, axis: Axis = 0, *args, **kwargs): agg = aggregate - @doc( - _shared_docs["transform"], - klass=_shared_doc_kwargs["klass"], - axis=_shared_doc_kwargs["axis"], - ) def transform( self, func: AggFuncType, axis: Axis = 0, *args, **kwargs ) -> DataFrame: + """ + Call ``func`` on self producing a DataFrame with the same axis shape as self. + + Parameters + ---------- + func : function, str, list-like or dict-like + Function to use for transforming the data. If a function, must either + work when passed a DataFrame or when passed to DataFrame.apply. If func + is both list-like and dict-like, dict-like behavior takes precedence. + + Accepted combinations are: + + - function + - string function name + - list-like of functions and/or function names, e.g. ``[np.exp, 'sqrt']`` + - dict-like of axis labels -> functions, + function names or list-like of such. + axis : {0 or 'index', 1 or 'columns'}, default 0 + If 0 or 'index': apply function to each column. + If 1 or 'columns': apply function to each row. + *args + Positional arguments to pass to `func`. + **kwargs + Keyword arguments to pass to `func`. + + Returns + ------- + DataFrame + A DataFrame that must have the same length as self. + + Raises + ------ + ValueError : If the returned DataFrame has a different length than self. + + See Also + -------- + DataFrame.agg : Only perform aggregating type operations. + DataFrame.apply : Invoke function on a DataFrame. + + Notes + ----- + Functions that mutate the passed object can produce unexpected + behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` + for more details. + + Examples + -------- + >>> df = pd.DataFrame({"A": range(3), "B": range(1, 4)}) + >>> df + A B + 0 0 1 + 1 1 2 + 2 2 3 + >>> df.transform(lambda x: x + 1) + A B + 0 1 2 + 1 2 3 + 2 3 4 + + Even though the resulting DataFrame must have the same length as the + input DataFrame, it is possible to provide several input functions: + + >>> s = pd.Series(range(3)) + >>> s + 0 0 + 1 1 + 2 2 + dtype: int64 + >>> s.transform([np.sqrt, np.exp]) + sqrt exp + 0 0.000000 1.000000 + 1 1.000000 2.718282 + 2 1.414214 7.389056 + + You can call transform on a GroupBy object: + + >>> df = pd.DataFrame( + ... { + ... "Date": [ + ... "2015-05-08", + ... "2015-05-07", + ... "2015-05-06", + ... "2015-05-05", + ... "2015-05-08", + ... "2015-05-07", + ... "2015-05-06", + ... "2015-05-05", + ... ], + ... "Data": [5, 8, 6, 1, 50, 100, 60, 120], + ... } + ... ) + >>> df + Date Data + 0 2015-05-08 5 + 1 2015-05-07 8 + 2 2015-05-06 6 + 3 2015-05-05 1 + 4 2015-05-08 50 + 5 2015-05-07 100 + 6 2015-05-06 60 + 7 2015-05-05 120 + >>> df.groupby("Date")["Data"].transform("sum") + 0 55 + 1 108 + 2 66 + 3 121 + 4 55 + 5 108 + 6 66 + 7 121 + Name: Data, dtype: int64 + + >>> df = pd.DataFrame( + ... { + ... "c": [1, 1, 1, 2, 2, 2, 2], + ... "type": ["m", "n", "o", "m", "m", "n", "n"], + ... } + ... ) + >>> df + c type + 0 1 m + 1 1 n + 2 1 o + 3 2 m + 4 2 m + 5 2 n + 6 2 n + >>> df["size"] = df.groupby("c")["type"].transform(len) + >>> df + c type size + 0 1 m 3 + 1 1 n 3 + 2 1 o 3 + 3 2 m 4 + 4 2 m 4 + 5 2 n 4 + 6 2 n 4 + """ from pandas.core.apply import frame_apply op = frame_apply(self, func=func, axis=axis, args=args, kwargs=kwargs) @@ -11462,25 +16471,235 @@ def join( validate=validate, ) - return joined + return joined + + def merge( + self, + right: DataFrame | Series, + how: MergeHow = "inner", + on: IndexLabel | AnyArrayLike | None = None, + left_on: IndexLabel | AnyArrayLike | None = None, + right_on: IndexLabel | AnyArrayLike | None = None, + left_index: bool = False, + right_index: bool = False, + sort: bool = False, + suffixes: Suffixes = ("_x", "_y"), + copy: bool | lib.NoDefault = lib.no_default, + indicator: str | bool = False, + validate: MergeValidate | None = None, + ) -> DataFrame: + """ + Merge DataFrame or named Series objects with a database-style join. + + A named Series object is treated as a DataFrame with a single named column. + + The join is done on columns or indexes. If joining columns on + columns, the DataFrame indexes *will be ignored*. Otherwise if joining indexes + on indexes or indexes on a column or columns, the index will be passed on. + When performing a cross merge, no column specifications to merge on are + allowed. + + .. warning:: + + If both key columns contain rows where the key is a null value, those + rows will be matched against each other. This is different from usual SQL + join behaviour and can lead to unexpected results. + + Parameters + ---------- + right : DataFrame or named Series + Object to merge with. + how : {'left', 'right', 'outer', 'inner', 'cross', 'left_anti', 'right_anti'}, + default 'inner' + Type of merge to be performed. + + * left: use only keys from left frame, similar to a SQL left outer join; + preserve key order. + * right: use only keys from right frame, similar to a SQL right outer join; + preserve key order. + * outer: use union of keys from both frames, similar to a SQL full outer + join; sort keys lexicographically. + * inner: use intersection of keys from both frames, similar to a SQL inner + join; preserve the order of the left keys. + * cross: creates the cartesian product from both frames, preserves the order + of the left keys. + * left_anti: use only keys from left frame that + are not in right frame, similar + to SQL left anti join; preserve key order. + + .. versionadded:: 3.0 + * right_anti: use only keys from right frame + that are not in left frame, similar + to SQL right anti join; preserve key order. + + .. versionadded:: 3.0 + on : Hashable or a sequence of the previous + Column or index level names to join on. These must be found in both + DataFrames. If `on` is None and not merging on indexes then this defaults + to the intersection of the columns in both DataFrames. + left_on : Hashable or a sequence of the previous, or array-like + Column or index level names to join on in the left DataFrame. Can also + be an array or list of arrays of the length of the left DataFrame. + These arrays are treated as if they are columns. + right_on : Hashable or a sequence of the previous, or array-like + Column or index level names to join on in the right DataFrame. Can also + be an array or list of arrays of the length of the right DataFrame. + These arrays are treated as if they are columns. + left_index : bool, default False + Use the index from the left DataFrame as the join key(s). If it is a + MultiIndex, the number of keys in the other DataFrame (either the index + or a number of columns) must match the number of levels. + right_index : bool, default False + Use the index from the right DataFrame as the join key. Same caveats as + left_index. + sort : bool, default False + Sort the join keys lexicographically in the result DataFrame. If False, + the order of the join keys depends on the join type (how keyword). + suffixes : list-like, default is ("_x", "_y") + A length-2 sequence where each element is optionally a string + indicating the suffix to add to overlapping column names in + `left` and `right` respectively. Pass a value of `None` instead + of a string to indicate that the column name from `left` or + `right` should be left as-is, with no suffix. At least one of the + values must not be None. + copy : bool, default False + This keyword is now ignored; changing its value will have no + impact on the method. + + .. deprecated:: 3.0.0 + + This keyword is ignored and will be removed in pandas 4.0. Since + pandas 3.0, this method always returns a new object using a lazy + copy mechanism that defers copies until necessary + (Copy-on-Write). See the `user guide on Copy-on-Write + `__ + for more details. + + indicator : bool or str, default False + If True, adds a column to the output DataFrame called "_merge" with + information on the source of each row. The column can be given a different + name by providing a string argument. The column will have a Categorical + type with the value of "left_only" for observations whose merge key only + appears in the left DataFrame, "right_only" for observations + whose merge key only appears in the right DataFrame, and "both" + if the observation's merge key is found in both DataFrames. + + validate : str, optional + If specified, checks if merge is of specified type. + + * "one_to_one" or "1:1": check if merge keys are unique in both + left and right datasets. + * "one_to_many" or "1:m": check if merge keys are unique in left + dataset. + * "many_to_one" or "m:1": check if merge keys are unique in right + dataset. + * "many_to_many" or "m:m": allowed, but does not result in checks. + + Returns + ------- + DataFrame + A DataFrame of the two merged objects. + + See Also + -------- + merge_ordered : Merge with optional filling/interpolation. + merge_asof : Merge on nearest keys. + DataFrame.join : Similar method using indices. + + Examples + -------- + >>> df1 = pd.DataFrame( + ... {"lkey": ["foo", "bar", "baz", "foo"], "value": [1, 2, 3, 5]} + ... ) + >>> df2 = pd.DataFrame( + ... {"rkey": ["foo", "bar", "baz", "foo"], "value": [5, 6, 7, 8]} + ... ) + >>> df1 + lkey value + 0 foo 1 + 1 bar 2 + 2 baz 3 + 3 foo 5 + >>> df2 + rkey value + 0 foo 5 + 1 bar 6 + 2 baz 7 + 3 foo 8 + + Merge df1 and df2 on the lkey and rkey columns. The value columns have + the default suffixes, _x and _y, appended. + + >>> df1.merge(df2, left_on="lkey", right_on="rkey") + lkey value_x rkey value_y + 0 foo 1 foo 5 + 1 foo 1 foo 8 + 2 bar 2 bar 6 + 3 baz 3 baz 7 + 4 foo 5 foo 5 + 5 foo 5 foo 8 + + Merge DataFrames df1 and df2 with specified left and right suffixes + appended to any overlapping columns. + + >>> df1.merge( + ... df2, left_on="lkey", right_on="rkey", suffixes=("_left", "_right") + ... ) + lkey value_left rkey value_right + 0 foo 1 foo 5 + 1 foo 1 foo 8 + 2 bar 2 bar 6 + 3 baz 3 baz 7 + 4 foo 5 foo 5 + 5 foo 5 foo 8 + + Merge DataFrames df1 and df2, but raise an exception if the DataFrames have + any overlapping columns. + + >>> df1.merge(df2, left_on="lkey", right_on="rkey", suffixes=(False, False)) + Traceback (most recent call last): + ... + ValueError: columns overlap but no suffix specified: + Index(['value'], dtype='object') + + >>> df1 = pd.DataFrame({"a": ["foo", "bar"], "b": [1, 2]}) + >>> df2 = pd.DataFrame({"a": ["foo", "baz"], "c": [3, 4]}) + >>> df1 + a b + 0 foo 1 + 1 bar 2 + >>> df2 + a c + 0 foo 3 + 1 baz 4 - @Substitution("") - @Appender(_merge_doc, indents=2) - def merge( - self, - right: DataFrame | Series, - how: MergeHow = "inner", - on: IndexLabel | AnyArrayLike | None = None, - left_on: IndexLabel | AnyArrayLike | None = None, - right_on: IndexLabel | AnyArrayLike | None = None, - left_index: bool = False, - right_index: bool = False, - sort: bool = False, - suffixes: Suffixes = ("_x", "_y"), - copy: bool | lib.NoDefault = lib.no_default, - indicator: str | bool = False, - validate: MergeValidate | None = None, - ) -> DataFrame: + >>> df1.merge(df2, how="inner", on="a") + a b c + 0 foo 1 3 + + >>> df1.merge(df2, how="left", on="a") + a b c + 0 foo 1 3.0 + 1 bar 2 NaN + + >>> df1 = pd.DataFrame({"left": ["foo", "bar"]}) + >>> df2 = pd.DataFrame({"right": [7, 8]}) + >>> df1 + left + 0 foo + 1 bar + >>> df2 + right + 0 7 + 1 8 + + >>> df1.merge(df2, how="cross") + left right + 0 foo 7 + 1 foo 8 + 2 bar 7 + 3 bar 8 + """ self._check_copy_deprecation(copy) from pandas.core.reshape.merge import merge @@ -12291,7 +17510,6 @@ def any( **kwargs, ) -> Series | bool: ... - @doc(make_doc("any", ndim=1)) def any( self, *, @@ -12300,6 +17518,118 @@ def any( skipna: bool = True, **kwargs, ) -> Series | bool: + """ + Return whether any element is True, potentially over an axis. + + Returns False unless there is at least one element within a series or + along a Dataframe axis that is True or equivalent (e.g. non-zero or + non-empty). + + Parameters + ---------- + axis : {0 or 'index', 1 or 'columns', None}, default 0 + Indicate which axis or axes should be reduced. For `Series` this parameter + is unused and defaults to 0. + + * 0 / 'index' : reduce the index, return a Series whose index is the + original column labels. + * 1 / 'columns' : reduce the columns, return a Series whose index is the + original index. + * None : reduce all axes, return a scalar. + + bool_only : bool, default False + Include only boolean columns. Not implemented for Series. + skipna : bool, default True + Exclude NA/null values. If the entire row/column is NA and skipna is + True, then the result will be False, as for an empty row/column. + If skipna is False, then NA are treated as True, because these are not + equal to zero. + **kwargs : any, default None + Additional keywords have no effect but might be accepted for + compatibility with NumPy. + + Returns + ------- + Series or scalar + If axis=None, then a scalar boolean is returned. + Otherwise a Series is returned with index matching the index argument. + + See Also + -------- + numpy.any : Numpy version of this method. + Series.any : Return whether any element is True. + Series.all : Return whether all elements are True. + DataFrame.any : Return whether any element is True over requested axis. + DataFrame.all : Return whether all elements are True over requested axis. + + Examples + -------- + **Series** + + For Series input, the output is a scalar indicating whether any element + is True. + + >>> pd.Series([False, False]).any() + False + >>> pd.Series([True, False]).any() + True + >>> pd.Series([], dtype="float64").any() + False + >>> pd.Series([np.nan]).any() + False + >>> pd.Series([np.nan]).any(skipna=False) + True + + **DataFrame** + + Whether each column contains at least one True element (the default). + + >>> df = pd.DataFrame({"A": [1, 2], "B": [0, 2], "C": [0, 0]}) + >>> df + A B C + 0 1 0 0 + 1 2 2 0 + + >>> df.any() + A True + B True + C False + dtype: bool + + Aggregating over the columns. + + >>> df = pd.DataFrame({"A": [True, False], "B": [1, 2]}) + >>> df + A B + 0 True 1 + 1 False 2 + + >>> df.any(axis="columns") + 0 True + 1 True + dtype: bool + + >>> df = pd.DataFrame({"A": [True, False], "B": [1, 0]}) + >>> df + A B + 0 True 1 + 1 False 0 + + >>> df.any(axis="columns") + 0 True + 1 False + dtype: bool + + Aggregating over the entire DataFrame with ``axis=None``. + + >>> df.any(axis=None) + True + + `any` for an empty DataFrame is an empty Series. + + >>> pd.DataFrame([]).any() + Series([], dtype: bool) + """ result = self._logical_func( "any", nanops.nanany, axis, bool_only, skipna, **kwargs ) @@ -12338,7 +17668,6 @@ def all( ) -> Series | bool: ... @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="all") - @doc(make_doc("all", ndim=1)) def all( self, axis: Axis | None = 0, @@ -12346,6 +17675,91 @@ def all( skipna: bool = True, **kwargs, ) -> Series | bool: + """ + Return whether all elements are True, potentially over an axis. + + Returns True unless there at least one element within a series or + along a Dataframe axis that is False or equivalent (e.g. zero or + empty). + + Parameters + ---------- + axis : {0 or 'index', 1 or 'columns', None}, default 0 + Indicate which axis or axes should be reduced. For `Series` this parameter + is unused and defaults to 0. + + * 0 / 'index' : reduce the index, return a Series whose index is the + original column labels. + * 1 / 'columns' : reduce the columns, return a Series whose index is the + original index. + * None : reduce all axes, return a scalar. + + bool_only : bool, default False + Include only boolean columns. Not implemented for Series. + skipna : bool, default True + Exclude NA/null values. If the entire row/column is NA and skipna is + True, then the result will be True, as for an empty row/column. + If skipna is False, then NA are treated as True, because these are not + equal to zero. + **kwargs : any, default None + Additional keywords have no effect but might be accepted for + compatibility with NumPy. + + Returns + ------- + Series or scalar + If axis=None, then a scalar boolean is returned. + Otherwise a Series is returned with index matching the index argument. + + See Also + -------- + Series.all : Return True if all elements are True. + DataFrame.any : Return True if one (or more) elements are True. + + Examples + -------- + **Series** + + >>> pd.Series([True, True]).all() + True + >>> pd.Series([True, False]).all() + False + >>> pd.Series([], dtype="float64").all() + True + >>> pd.Series([np.nan]).all() + True + >>> pd.Series([np.nan]).all(skipna=False) + True + + **DataFrames** + + Create a DataFrame from a dictionary. + + >>> df = pd.DataFrame({"col1": [True, True], "col2": [True, False]}) + >>> df + col1 col2 + 0 True True + 1 True False + + Default behaviour checks if values in each column all return True. + + >>> df.all() + col1 True + col2 False + dtype: bool + + Specify ``axis='columns'`` to check if values in each row all return True. + + >>> df.all(axis="columns") + 0 True + 1 False + dtype: bool + + Or ``axis=None`` for whether every value is True. + + >>> df.all(axis=None) + False + """ result = self._logical_func( "all", nanops.nanall, axis, bool_only, skipna, **kwargs ) @@ -12385,7 +17799,6 @@ def min( ) -> Series | Any: ... @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="min") - @doc(make_doc("min", ndim=2)) def min( self, axis: Axis | None = 0, @@ -12393,6 +17806,67 @@ def min( numeric_only: bool = False, **kwargs, ) -> Series | Any: + """ + Return the minimum of the values over the requested axis. + + If you want the *index* of the minimum, use ``idxmin``. This is + the equivalent of the ``numpy.ndarray`` method ``argmin``. + + Parameters + ---------- + axis : {index (0), columns (1)} + Axis for the function to be applied on. + For `Series` this parameter is unused and defaults to 0. + + For DataFrames, specifying ``axis=None`` will apply the aggregation + across both axes. + + .. versionadded:: 2.0.0 + + skipna : bool, default True + Exclude NA/null values when computing the result. + numeric_only : bool, default False + Include only float, int, boolean columns. + + **kwargs + Additional keyword arguments to be passed to the function. + + Returns + ------- + Series or scalar + Value containing the calculation referenced in the description. + + See Also + -------- + Series.sum : Return the sum. + Series.min : Return the minimum. + Series.max : Return the maximum. + Series.idxmin : Return the index of the minimum. + Series.idxmax : Return the index of the maximum. + DataFrame.sum : Return the sum over the requested axis. + DataFrame.min : Return the minimum over the requested axis. + DataFrame.max : Return the maximum over the requested axis. + DataFrame.idxmin : Return the index of the minimum over the requested axis. + DataFrame.idxmax : Return the index of the maximum over the requested axis. + + Examples + -------- + >>> idx = pd.MultiIndex.from_arrays( + ... [["warm", "warm", "cold", "cold"], ["dog", "falcon", "fish", "spider"]], + ... names=["blooded", "animal"], + ... ) + >>> s = pd.Series([4, 2, 0, 8], name="legs", index=idx) + >>> s + blooded animal + warm dog 4 + falcon 2 + cold fish 0 + spider 8 + Name: legs, dtype: int64 + + >>> s.min() + 0 + """ result = super().min( axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs ) @@ -12432,7 +17906,6 @@ def max( ) -> Series | Any: ... @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="max") - @doc(make_doc("max", ndim=2)) def max( self, axis: Axis | None = 0, @@ -12440,6 +17913,67 @@ def max( numeric_only: bool = False, **kwargs, ) -> Series | Any: + """ + Return the maximum of the values over the requested axis. + + If you want the *index* of the maximum, use ``idxmax``. This is + the equivalent of the ``numpy.ndarray`` method ``argmax``. + + Parameters + ---------- + axis : {index (0), columns (1)} + Axis for the function to be applied on. + For `Series` this parameter is unused and defaults to 0. + + For DataFrames, specifying ``axis=None`` will apply the aggregation + across both axes. + + .. versionadded:: 2.0.0 + + skipna : bool, default True + Exclude NA/null values when computing the result. + numeric_only : bool, default False + Include only float, int, boolean columns. + + **kwargs + Additional keyword arguments to be passed to the function. + + Returns + ------- + Series or scalar + Value containing the calculation referenced in the description. + + See Also + -------- + Series.sum : Return the sum. + Series.min : Return the minimum. + Series.max : Return the maximum. + Series.idxmin : Return the index of the minimum. + Series.idxmax : Return the index of the maximum. + DataFrame.sum : Return the sum over the requested axis. + DataFrame.min : Return the minimum over the requested axis. + DataFrame.max : Return the maximum over the requested axis. + DataFrame.idxmin : Return the index of the minimum over the requested axis. + DataFrame.idxmax : Return the index of the maximum over the requested axis. + + Examples + -------- + >>> idx = pd.MultiIndex.from_arrays( + ... [["warm", "warm", "cold", "cold"], ["dog", "falcon", "fish", "spider"]], + ... names=["blooded", "animal"], + ... ) + >>> s = pd.Series([4, 2, 0, 8], name="legs", index=idx) + >>> s + blooded animal + warm dog 4 + falcon 2 + cold fish 0 + spider 8 + Name: legs, dtype: int64 + + >>> s.max() + 8 + """ result = super().max( axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs ) @@ -12656,25 +18190,101 @@ def mean( **kwargs, ) -> Any: ... - @overload - def mean( - self, - *, - axis: Axis | None, - skipna: bool = ..., - numeric_only: bool = ..., - **kwargs, - ) -> Series | Any: ... + @overload + def mean( + self, + *, + axis: Axis | None, + skipna: bool = ..., + numeric_only: bool = ..., + **kwargs, + ) -> Series | Any: ... + + @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="mean") + def mean( + self, + axis: Axis | None = 0, + skipna: bool = True, + numeric_only: bool = False, + **kwargs, + ) -> Series | Any: + """ + Return the mean of the values over the requested axis. + + Parameters + ---------- + axis : {index (0), columns (1)} + Axis for the function to be applied on. + For `Series` this parameter is unused and defaults to 0. + + For DataFrames, specifying ``axis=None`` will apply the aggregation + across both axes. + + .. versionadded:: 2.0.0 + + skipna : bool, default True + Exclude NA/null values when computing the result. + numeric_only : bool, default False + Include only float, int, boolean columns. + + **kwargs + Additional keyword arguments to be passed to the function. + + Returns + ------- + Series or scalar + Value containing the calculation referenced in the description. - @deprecate_nonkeyword_arguments(Pandas4Warning, allowed_args=["self"], name="mean") - @doc(make_doc("mean", ndim=2)) - def mean( - self, - axis: Axis | None = 0, - skipna: bool = True, - numeric_only: bool = False, - **kwargs, - ) -> Series | Any: + See Also + -------- + Series.sum : Return the sum. + Series.min : Return the minimum. + Series.max : Return the maximum. + Series.idxmin : Return the index of the minimum. + Series.idxmax : Return the index of the maximum. + DataFrame.sum : Return the sum over the requested axis. + DataFrame.min : Return the minimum over the requested axis. + DataFrame.max : Return the maximum over the requested axis. + DataFrame.idxmin : Return the index of the minimum over the requested axis. + DataFrame.idxmax : Return the index of the maximum over the requested axis. + + Examples + -------- + >>> s = pd.Series([1, 2, 3]) + >>> s.mean() + 2.0 + + With a DataFrame + + >>> df = pd.DataFrame( + ... {"a": [1, 2], "b": [2, 3]}, index=["tiger", "zebra"] + ... ) + >>> df + a b + tiger 1 2 + zebra 2 3 + >>> df.mean() + a 1.5 + b 2.5 + dtype: float64 + + Using axis=1 + + >>> df.mean(axis=1) + tiger 1.5 + zebra 2.5 + dtype: float64 + + In this case, `numeric_only` should be set to `True` to avoid + getting an error. + + >>> df = pd.DataFrame( + ... {"a": [1, 2], "b": ["T", "Z"]}, index=["tiger", "zebra"] + ... ) + >>> df.mean(numeric_only=True) + a 1.5 + dtype: float64 + """ result = super().mean( axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs ) @@ -12716,7 +18326,6 @@ def median( @deprecate_nonkeyword_arguments( Pandas4Warning, allowed_args=["self"], name="median" ) - @doc(make_doc("median", ndim=2)) def median( self, axis: Axis | None = 0, @@ -12724,6 +18333,83 @@ def median( numeric_only: bool = False, **kwargs, ) -> Series | Any: + """ + Return the median of the values over the requested axis. + + Parameters + ---------- + axis : {index (0), columns (1)} + Axis for the function to be applied on. + For `Series` this parameter is unused and defaults to 0. + + For DataFrames, specifying ``axis=None`` will apply the aggregation + across both axes. + + .. versionadded:: 2.0.0 + + skipna : bool, default True + Exclude NA/null values when computing the result. + numeric_only : bool, default False + Include only float, int, boolean columns. + + **kwargs + Additional keyword arguments to be passed to the function. + + Returns + ------- + Series or scalar + Value containing the calculation referenced in the description. + + See Also + -------- + Series.sum : Return the sum. + Series.min : Return the minimum. + Series.max : Return the maximum. + Series.idxmin : Return the index of the minimum. + Series.idxmax : Return the index of the maximum. + DataFrame.sum : Return the sum over the requested axis. + DataFrame.min : Return the minimum over the requested axis. + DataFrame.max : Return the maximum over the requested axis. + DataFrame.idxmin : Return the index of the minimum over the requested axis. + DataFrame.idxmax : Return the index of the maximum over the requested axis. + + Examples + -------- + >>> s = pd.Series([1, 2, 3]) + >>> s.median() + 2.0 + + With a DataFrame + + >>> df = pd.DataFrame( + ... {"a": [1, 2], "b": [2, 3]}, index=["tiger", "zebra"] + ... ) + >>> df + a b + tiger 1 2 + zebra 2 3 + >>> df.median() + a 1.5 + b 2.5 + dtype: float64 + + Using axis=1 + + >>> df.median(axis=1) + tiger 1.5 + zebra 2.5 + dtype: float64 + + In this case, `numeric_only` should be set to `True` + to avoid getting an error. + + >>> df = pd.DataFrame( + ... {"a": [1, 2], "b": ["T", "Z"]}, index=["tiger", "zebra"] + ... ) + >>> df.median(numeric_only=True) + a 1.5 + dtype: float64 + """ result = super().median( axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs ) @@ -13345,7 +19031,6 @@ def kurt( kurtosis = kurt # type: ignore[assignment] product = prod - @doc(make_doc("cummin", ndim=2)) def cummin( self, axis: Axis = 0, @@ -13354,10 +19039,107 @@ def cummin( *args, **kwargs, ) -> Self: + """ + Return cumulative minimum over a DataFrame or Series axis. + + Returns a DataFrame or Series of the same size containing the cumulative + minimum. + + Parameters + ---------- + axis : {0 or 'index', 1 or 'columns'}, default 0 + The index or the name of the axis. 0 is equivalent to None or 'index'. + For `Series` this parameter is unused and defaults to 0. + skipna : bool, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA. + numeric_only : bool, default False + Include only float, int, boolean columns. + *args, **kwargs + Additional keywords have no effect but might be accepted for + compatibility with NumPy. + + Returns + ------- + Series or DataFrame + Return cumulative minimum of Series or DataFrame. + + See Also + -------- + core.window.expanding.Expanding.min : Similar functionality + but ignores ``NaN`` values. + DataFrame.min : Return the minimum over + DataFrame axis. + DataFrame.cummax : Return cumulative maximum over DataFrame axis. + DataFrame.cummin : Return cumulative minimum over DataFrame axis. + DataFrame.cumsum : Return cumulative sum over DataFrame axis. + DataFrame.cumprod : Return cumulative product over DataFrame axis. + + Examples + -------- + **Series** + + >>> s = pd.Series([2, np.nan, 5, -1, 0]) + >>> s + 0 2.0 + 1 NaN + 2 5.0 + 3 -1.0 + 4 0.0 + dtype: float64 + + By default, NA values are ignored. + + >>> s.cummin() + 0 2.0 + 1 NaN + 2 2.0 + 3 -1.0 + 4 -1.0 + dtype: float64 + + To include NA values in the operation, use ``skipna=False`` + + >>> s.cummin(skipna=False) + 0 2.0 + 1 NaN + 2 NaN + 3 NaN + 4 NaN + dtype: float64 + + **DataFrame** + + >>> df = pd.DataFrame( + ... [[2.0, 1.0], [3.0, np.nan], [1.0, 0.0]], columns=list("AB") + ... ) + >>> df + A B + 0 2.0 1.0 + 1 3.0 NaN + 2 1.0 0.0 + + By default, iterates over rows and finds the minimum + in each column. This is equivalent to ``axis=None`` or ``axis='index'``. + + >>> df.cummin() + A B + 0 2.0 1.0 + 1 2.0 NaN + 2 1.0 0.0 + + To iterate over columns and find the minimum in each row, + use ``axis=1`` + + >>> df.cummin(axis=1) + A B + 0 2.0 1.0 + 1 3.0 NaN + 2 1.0 0.0 + """ data = self._get_numeric_data() if numeric_only else self return NDFrame.cummin(data, axis, skipna, *args, **kwargs) - @doc(make_doc("cummax", ndim=2)) def cummax( self, axis: Axis = 0, @@ -13366,10 +19148,107 @@ def cummax( *args, **kwargs, ) -> Self: + """ + Return cumulative maximum over a DataFrame or Series axis. + + Returns a DataFrame or Series of the same size containing the cumulative + maximum. + + Parameters + ---------- + axis : {0 or 'index', 1 or 'columns'}, default 0 + The index or the name of the axis. 0 is equivalent to None or 'index'. + For `Series` this parameter is unused and defaults to 0. + skipna : bool, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA. + numeric_only : bool, default False + Include only float, int, boolean columns. + *args, **kwargs + Additional keywords have no effect but might be accepted for + compatibility with NumPy. + + Returns + ------- + Series or DataFrame + Return cumulative maximum of Series or DataFrame. + + See Also + -------- + core.window.expanding.Expanding.max : Similar functionality + but ignores ``NaN`` values. + DataFrame.max : Return the maximum over + DataFrame axis. + DataFrame.cummax : Return cumulative maximum over DataFrame axis. + DataFrame.cummin : Return cumulative minimum over DataFrame axis. + DataFrame.cumsum : Return cumulative sum over DataFrame axis. + DataFrame.cumprod : Return cumulative product over DataFrame axis. + + Examples + -------- + **Series** + + >>> s = pd.Series([2, np.nan, 5, -1, 0]) + >>> s + 0 2.0 + 1 NaN + 2 5.0 + 3 -1.0 + 4 0.0 + dtype: float64 + + By default, NA values are ignored. + + >>> s.cummax() + 0 2.0 + 1 NaN + 2 5.0 + 3 5.0 + 4 5.0 + dtype: float64 + + To include NA values in the operation, use ``skipna=False`` + + >>> s.cummax(skipna=False) + 0 2.0 + 1 NaN + 2 NaN + 3 NaN + 4 NaN + dtype: float64 + + **DataFrame** + + >>> df = pd.DataFrame( + ... [[2.0, 1.0], [3.0, np.nan], [1.0, 0.0]], columns=list("AB") + ... ) + >>> df + A B + 0 2.0 1.0 + 1 3.0 NaN + 2 1.0 0.0 + + By default, iterates over rows and finds the maximum + in each column. This is equivalent to ``axis=None`` or ``axis='index'``. + + >>> df.cummax() + A B + 0 2.0 1.0 + 1 3.0 NaN + 2 3.0 1.0 + + To iterate over columns and find the maximum in each row, + use ``axis=1`` + + >>> df.cummax(axis=1) + A B + 0 2.0 2.0 + 1 3.0 NaN + 2 1.0 1.0 + """ data = self._get_numeric_data() if numeric_only else self return NDFrame.cummax(data, axis, skipna, *args, **kwargs) - @doc(make_doc("cumsum", ndim=2)) def cumsum( self, axis: Axis = 0, @@ -13378,10 +19257,107 @@ def cumsum( *args, **kwargs, ) -> Self: + """ + Return cumulative sum over a DataFrame or Series axis. + + Returns a DataFrame or Series of the same size containing the cumulative + sum. + + Parameters + ---------- + axis : {0 or 'index', 1 or 'columns'}, default 0 + The index or the name of the axis. 0 is equivalent to None or 'index'. + For `Series` this parameter is unused and defaults to 0. + skipna : bool, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA. + numeric_only : bool, default False + Include only float, int, boolean columns. + *args, **kwargs + Additional keywords have no effect but might be accepted for + compatibility with NumPy. + + Returns + ------- + Series or DataFrame + Return cumulative sum of Series or DataFrame. + + See Also + -------- + core.window.expanding.Expanding.sum : Similar functionality + but ignores ``NaN`` values. + DataFrame.sum : Return the sum over + DataFrame axis. + DataFrame.cummax : Return cumulative maximum over DataFrame axis. + DataFrame.cummin : Return cumulative minimum over DataFrame axis. + DataFrame.cumsum : Return cumulative sum over DataFrame axis. + DataFrame.cumprod : Return cumulative product over DataFrame axis. + + Examples + -------- + **Series** + + >>> s = pd.Series([2, np.nan, 5, -1, 0]) + >>> s + 0 2.0 + 1 NaN + 2 5.0 + 3 -1.0 + 4 0.0 + dtype: float64 + + By default, NA values are ignored. + + >>> s.cumsum() + 0 2.0 + 1 NaN + 2 7.0 + 3 6.0 + 4 6.0 + dtype: float64 + + To include NA values in the operation, use ``skipna=False`` + + >>> s.cumsum(skipna=False) + 0 2.0 + 1 NaN + 2 NaN + 3 NaN + 4 NaN + dtype: float64 + + **DataFrame** + + >>> df = pd.DataFrame( + ... [[2.0, 1.0], [3.0, np.nan], [1.0, 0.0]], columns=list("AB") + ... ) + >>> df + A B + 0 2.0 1.0 + 1 3.0 NaN + 2 1.0 0.0 + + By default, iterates over rows and finds the sum + in each column. This is equivalent to ``axis=None`` or ``axis='index'``. + + >>> df.cumsum() + A B + 0 2.0 1.0 + 1 5.0 NaN + 2 6.0 1.0 + + To iterate over columns and find the sum in each row, + use ``axis=1`` + + >>> df.cumsum(axis=1) + A B + 0 2.0 3.0 + 1 3.0 NaN + 2 1.0 1.0 + """ data = self._get_numeric_data() if numeric_only else self return NDFrame.cumsum(data, axis, skipna, *args, **kwargs) - @doc(make_doc("cumprod", 2)) def cumprod( self, axis: Axis = 0, @@ -13390,6 +19366,104 @@ def cumprod( *args, **kwargs, ) -> Self: + """ + Return cumulative product over a DataFrame or Series axis. + + Returns a DataFrame or Series of the same size containing the cumulative + product. + + Parameters + ---------- + axis : {0 or 'index', 1 or 'columns'}, default 0 + The index or the name of the axis. 0 is equivalent to None or 'index'. + For `Series` this parameter is unused and defaults to 0. + skipna : bool, default True + Exclude NA/null values. If an entire row/column is NA, the result + will be NA. + numeric_only : bool, default False + Include only float, int, boolean columns. + *args, **kwargs + Additional keywords have no effect but might be accepted for + compatibility with NumPy. + + Returns + ------- + Series or DataFrame + Return cumulative product of Series or DataFrame. + + See Also + -------- + core.window.expanding.Expanding.prod : Similar functionality + but ignores ``NaN`` values. + DataFrame.prod : Return the product over + DataFrame axis. + DataFrame.cummax : Return cumulative maximum over DataFrame axis. + DataFrame.cummin : Return cumulative minimum over DataFrame axis. + DataFrame.cumsum : Return cumulative sum over DataFrame axis. + DataFrame.cumprod : Return cumulative product over DataFrame axis. + + Examples + -------- + **Series** + + >>> s = pd.Series([2, np.nan, 5, -1, 0]) + >>> s + 0 2.0 + 1 NaN + 2 5.0 + 3 -1.0 + 4 0.0 + dtype: float64 + + By default, NA values are ignored. + + >>> s.cumprod() + 0 2.0 + 1 NaN + 2 10.0 + 3 -10.0 + 4 -0.0 + dtype: float64 + + To include NA values in the operation, use ``skipna=False`` + + >>> s.cumprod(skipna=False) + 0 2.0 + 1 NaN + 2 NaN + 3 NaN + 4 NaN + dtype: float64 + + **DataFrame** + + >>> df = pd.DataFrame( + ... [[2.0, 1.0], [3.0, np.nan], [1.0, 0.0]], columns=list("AB") + ... ) + >>> df + A B + 0 2.0 1.0 + 1 3.0 NaN + 2 1.0 0.0 + + By default, iterates over rows and finds the product + in each column. This is equivalent to ``axis=None`` or ``axis='index'``. + + >>> df.cumprod() + A B + 0 2.0 1.0 + 1 6.0 NaN + 2 6.0 0.0 + + To iterate over columns and find the product in each row, + use ``axis=1`` + + >>> df.cumprod(axis=1) + A B + 0 2.0 2.0 + 1 3.0 NaN + 2 1.0 0.0 + """ data = self._get_numeric_data() if numeric_only else self return NDFrame.cumprod(data, axis, skipna, *args, **kwargs)