{ "cells": [ { "cell_type": "markdown", "id": "faba2d3c", "metadata": {}, "source": [ "(stats_tutorial)=\n", "# Intro to the stats module" ] }, { "cell_type": "code", "execution_count": 1, "id": "648bb9c8", "metadata": {}, "outputs": [], "source": [ "from scipy import stats\n", "import numpy as np\n", "from xarray_einstats.stats import XrContinuousRV, rankdata, hmean, skew, median_abs_deviation\n", "from xarray_einstats.tutorial import generate_mcmc_like_dataset" ] }, { "cell_type": "code", "execution_count": 2, "id": "113cbf73", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
<xarray.Dataset> Size: 6kB\n",
       "Dimensions:  (plot_dim: 20, chain: 4, draw: 10, team: 6, match: 12)\n",
       "Coordinates:\n",
       "  * team     (team) <U1 24B 'a' 'b' 'c' 'd' 'e' 'f'\n",
       "  * chain    (chain) int64 32B 0 1 2 3\n",
       "  * draw     (draw) int64 80B 0 1 2 3 4 5 6 7 8 9\n",
       "Dimensions without coordinates: plot_dim, match\n",
       "Data variables:\n",
       "    x_plot   (plot_dim) float64 160B 0.0 0.5263 1.053 1.579 ... 8.947 9.474 10.0\n",
       "    mu       (chain, draw, team) float64 2kB 0.2296 0.5383 ... 0.4452 2.004\n",
       "    sigma    (chain, draw) float64 320B 0.3703 0.00899 0.1398 ... 0.2246 0.2875\n",
       "    score    (chain, draw, match) int64 4kB 1 0 0 1 4 1 1 0 ... 0 0 2 1 2 1 0 2
" ], "text/plain": [ " Size: 6kB\n", "Dimensions: (plot_dim: 20, chain: 4, draw: 10, team: 6, match: 12)\n", "Coordinates:\n", " * team (team) \n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
<xarray.DataArray (rv_dim0: 100, chain: 4, draw: 10, team: 6)> Size: 192kB\n",
       "-0.1369 0.312 1.122 0.296 -0.3041 2.919 ... 0.2673 0.07624 0.02325 0.3478 2.179\n",
       "Coordinates:\n",
       "  * team     (team) <U1 24B 'a' 'b' 'c' 'd' 'e' 'f'\n",
       "  * chain    (chain) int64 32B 0 1 2 3\n",
       "  * draw     (draw) int64 80B 0 1 2 3 4 5 6 7 8 9\n",
       "Dimensions without coordinates: rv_dim0
" ], "text/plain": [ " Size: 192kB\n", "-0.1369 0.312 1.122 0.296 -0.3041 2.919 ... 0.2673 0.07624 0.02325 0.3478 2.179\n", "Coordinates:\n", " * team (team) \n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
<xarray.DataArray (subject: 5, batch: 3, chain: 4, draw: 10, team: 6)> Size: 29kB\n",
       "0.1375 0.729 1.113 0.5688 0.1979 3.794 ... -0.1246 0.2988 0.3684 0.1294 1.877\n",
       "Coordinates:\n",
       "  * team     (team) <U1 24B 'a' 'b' 'c' 'd' 'e' 'f'\n",
       "  * chain    (chain) int64 32B 0 1 2 3\n",
       "  * draw     (draw) int64 80B 0 1 2 3 4 5 6 7 8 9\n",
       "Dimensions without coordinates: subject, batch
" ], "text/plain": [ " Size: 29kB\n", "0.1375 0.729 1.113 0.5688 0.1979 3.794 ... -0.1246 0.2988 0.3684 0.1294 1.877\n", "Coordinates:\n", " * team (team) \n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
<xarray.DataArray (subject: 5, batch: 3, chain: 4, draw: 10, team: 6)> Size: 29kB\n",
       "-0.1794 0.5826 1.101 -0.07705 0.1534 3.711 ... 0.7631 0.2766 0.4849 0.5801 2.014\n",
       "Coordinates:\n",
       "  * subject  (subject) <U23 460B 'Monstera deliciosa' ... 'Monstera pinnatipa...\n",
       "  * batch    (batch) <U7 84B 'March' 'June' 'October'\n",
       "  * team     (team) <U1 24B 'a' 'b' 'c' 'd' 'e' 'f'\n",
       "  * chain    (chain) int64 32B 0 1 2 3\n",
       "  * draw     (draw) int64 80B 0 1 2 3 4 5 6 7 8 9
" ], "text/plain": [ " Size: 29kB\n", "-0.1794 0.5826 1.101 -0.07705 0.1534 3.711 ... 0.7631 0.2766 0.4849 0.5801 2.014\n", "Coordinates:\n", " * subject (subject) \n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
<xarray.DataArray 'x_plot' (plot_dim: 20, chain: 4, draw: 10, team: 6)> Size: 38kB\n",
       "-0.1177 -0.9821 -4.519 0.06682 0.02491 ... -594.3 -600.6 -561.9 -551.8 -386.4\n",
       "Coordinates:\n",
       "  * chain    (chain) int64 32B 0 1 2 3\n",
       "  * draw     (draw) int64 80B 0 1 2 3 4 5 6 7 8 9\n",
       "  * team     (team) <U1 24B 'a' 'b' 'c' 'd' 'e' 'f'\n",
       "Dimensions without coordinates: plot_dim
" ], "text/plain": [ " Size: 38kB\n", "-0.1177 -0.9821 -4.519 0.06682 0.02491 ... -594.3 -600.6 -561.9 -551.8 -386.4\n", "Coordinates:\n", " * chain (chain) int64 32B 0 1 2 3\n", " * draw (draw) int64 80B 0 1 2 3 4 5 6 7 8 9\n", " * team (team) \n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
<xarray.DataArray (quantile: 3, chain: 4, draw: 10, team: 6)> Size: 6kB\n",
       "-0.02018 0.2885 0.8726 -0.204 -0.1332 ... 0.2786 0.2264 0.5523 0.6391 2.198\n",
       "Coordinates:\n",
       "  * quantile  (quantile) float64 24B 0.25 0.5 0.75\n",
       "  * chain     (chain) int64 32B 0 1 2 3\n",
       "  * draw      (draw) int64 80B 0 1 2 3 4 5 6 7 8 9\n",
       "  * team      (team) <U1 24B 'a' 'b' 'c' 'd' 'e' 'f'
" ], "text/plain": [ " Size: 6kB\n", "-0.02018 0.2885 0.8726 -0.204 -0.1332 ... 0.2786 0.2264 0.5523 0.6391 2.198\n", "Coordinates:\n", " * quantile (quantile) float64 24B 0.25 0.5 0.75\n", " * chain (chain) int64 32B 0 1 2 3\n", " * draw (draw) int64 80B 0 1 2 3 4 5 6 7 8 9\n", " * team (team) \n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
<xarray.DataArray (point: 50, chain: 4, draw: 10, team: 6)> Size: 96kB\n",
       "5.321e-44 2.898e-49 4.753e-60 5.206e-41 ... 3.563e-57 4.449e-55 3.664e-24\n",
       "Coordinates:\n",
       "  * point    (point) float64 400B -5.0 -4.796 -4.592 -4.388 ... 4.592 4.796 5.0\n",
       "  * chain    (chain) int64 32B 0 1 2 3\n",
       "  * draw     (draw) int64 80B 0 1 2 3 4 5 6 7 8 9\n",
       "  * team     (team) <U1 24B 'a' 'b' 'c' 'd' 'e' 'f'
" ], "text/plain": [ " Size: 96kB\n", "5.321e-44 2.898e-49 4.753e-60 5.206e-41 ... 3.563e-57 4.449e-55 3.664e-24\n", "Coordinates:\n", " * point (point) float64 400B -5.0 -4.796 -4.592 -4.388 ... 4.592 4.796 5.0\n", " * chain (chain) int64 32B 0 1 2 3\n", " * draw (draw) int64 80B 0 1 2 3 4 5 6 7 8 9\n", " * team (team) " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import matplotlib.pyplot as plt\n", "plt.rcParams[\"figure.facecolor\"] = \"white\"\n", "\n", "fig, ax = plt.subplots()\n", "ax.plot(pdf.point, pdf.sel(team=\"d\", chain=2), color=\"C0\", alpha=.5)\n", "ax.set(xlabel=\"x\", ylabel=\"pdf of normal distribution\", );" ] }, { "cell_type": "markdown", "id": "b04ac95c", "metadata": {}, "source": [ "(stats_tutorial/other)=\n", "## Other functions\n", "The rest of the functions in the module have a very similar API to their scipy counterparts, the only differences are:\n", "\n", "* They take `dims` instead of `axis`. Moreover, `dims` can be `str` or a sequence of `str` instead of a single integer only as supported by `axis`.\n", "* Arguments that take {term}`numpy:array_like` as values take `DataArray` inputs instead. For example the `scale` argument in {func}`~xarray_einstats.stats.median_abs_deviation`\n", "* They accept extra arbitrary kwargs, that are passed to {func}`xarray.apply_ufunc`.\n", "\n", "Here are some examples of using functions in the `stats` module of `xarray_einstats` with `dims` argument instead of `axis`." ] }, { "cell_type": "code", "execution_count": 11, "id": "ba457be7", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
<xarray.DataArray 'mu' (chain: 4, draw: 10)> Size: 320B\n",
       "0.1588 0.2123 0.5543 0.7826 0.1913 0.6035 ... 0.1269 0.712 0.3044 0.1936 0.1223\n",
       "Coordinates:\n",
       "  * chain    (chain) int64 32B 0 1 2 3\n",
       "  * draw     (draw) int64 80B 0 1 2 3 4 5 6 7 8 9
" ], "text/plain": [ " Size: 320B\n", "0.1588 0.2123 0.5543 0.7826 0.1913 0.6035 ... 0.1269 0.712 0.3044 0.1936 0.1223\n", "Coordinates:\n", " * chain (chain) int64 32B 0 1 2 3\n", " * draw (draw) int64 80B 0 1 2 3 4 5 6 7 8 9" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "hmean(ds[\"mu\"], dims=\"team\")" ] }, { "cell_type": "code", "execution_count": 12, "id": "bfe438ce", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
<xarray.DataArray 'score' (match: 12, chain: 4, draw: 10)> Size: 4kB\n",
       "14 14 14 14 14 31 14 1 31 14 31 1 14 1 ... 15 15 15 15 15 1 34 15 15 1 34 34 34\n",
       "Dimensions without coordinates: match, chain, draw
" ], "text/plain": [ " Size: 4kB\n", "14 14 14 14 14 31 14 1 31 14 31 1 14 1 ... 15 15 15 15 15 1 34 15 15 1 34 34 34\n", "Dimensions without coordinates: match, chain, draw" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rankdata(ds[\"score\"], dims=(\"chain\", \"draw\"), method=\"min\")" ] }, { "cell_type": "markdown", "id": "3f247801", "metadata": {}, "source": [ ":::{important}\n", "The statistical summaries and other statistical functions can take both {class}`~xarray.DataArray` and {class}`~xarray.Dataset`. Methods in probability functions and functions in linear algebra module\n", "are tested only on `DataArray`s.\n", "\n", "When using `Dataset` inputs, you must make sure that all the dimensions in `dims` are\n", "present in _all_ the `DataArray`s within the `Dataset`.\n", ":::" ] }, { "cell_type": "code", "execution_count": 13, "id": "ea516a32", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
<xarray.Dataset> Size: 176B\n",
       "Dimensions:  (match: 12, team: 6)\n",
       "Coordinates:\n",
       "  * team     (team) <U1 24B 'a' 'b' 'c' 'd' 'e' 'f'\n",
       "Dimensions without coordinates: match\n",
       "Data variables:\n",
       "    score    (match) float64 96B 1.466 0.2149 0.6788 1.361 ... 1.099 1.156 1.265\n",
       "    mu       (team) float64 48B 0.8152 1.84 2.102 1.806 1.091 0.9678\n",
       "    sigma    float64 8B 1.314
" ], "text/plain": [ " Size: 176B\n", "Dimensions: (match: 12, team: 6)\n", "Coordinates:\n", " * team (team) \n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
<xarray.Dataset> Size: 32B\n",
       "Dimensions:  ()\n",
       "Data variables:\n",
       "    x_plot   float64 8B 2.632\n",
       "    mu       float64 8B 0.4878\n",
       "    sigma    float64 8B 0.39\n",
       "    score    float64 8B 1.0
" ], "text/plain": [ " Size: 32B\n", "Dimensions: ()\n", "Data variables:\n", " x_plot float64 8B 2.632\n", " mu float64 8B 0.4878\n", " sigma float64 8B 0.39\n", " score float64 8B 1.0" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "median_abs_deviation(ds)" ] }, { "cell_type": "code", "execution_count": 15, "id": "b4fce79a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Last updated: Thu May 22 2025\n", "\n", "Python implementation: CPython\n", "Python version : 3.12.7\n", "IPython version : 8.29.0\n", "\n", "xarray_einstats: 0.9.0\n", "xarray : 2025.4.0\n", "\n", "matplotlib : 3.10.1\n", "numpy : 2.2.6\n", "xarray_einstats: 0.9.0\n", "scipy : 1.15.2\n", "\n", "Watermark: 2.5.0\n", "\n" ] } ], "source": [ "%load_ext watermark\n", "%watermark -n -u -v -iv -w -p xarray_einstats,xarray" ] }, { "cell_type": "code", "execution_count": null, "id": "7a08ad50", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.7" } }, "nbformat": 4, "nbformat_minor": 5 }