From 05ff5b4d74198b39e1729e5e1f95207685cb428f Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Thu, 16 May 2019 00:17:06 -0700
Subject: [PATCH 1/8] Let test cover data sizes both above and below n

---
 Lib/test/test_statistics.py | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py
index 1922de5df4b0c5..3790ed44783123 100644
--- a/Lib/test/test_statistics.py
+++ b/Lib/test/test_statistics.py
@@ -2161,17 +2161,18 @@ def test_specific_cases(self):
             # Quantiles should be idempotent
             if len(expected) >= 2:
                 self.assertEqual(quantiles(expected, n=n), expected)
-            # Cross-check against other methods
-            if len(data) >= n:
-                # After end caps are added, method='inclusive' should
-                # give the same result as method='exclusive' whenever
-                # there are more data points than desired cut points.
-                padded_data = [min(data) - 1000] + data + [max(data) + 1000]
-                self.assertEqual(
-                    quantiles(data, n=n),
-                    quantiles(padded_data, n=n, method='inclusive'),
-                    (n, data),
-                )
+            # Cross-check against method='inclusive' which should give
+            # the same result after adding in minimum and maximum values
+            # extrapolated from the two lowest and two highest points.
+            sdata = sorted(data)
+            lo = 2 * sdata[0] - sdata[1]
+            hi = 2 * sdata[-1] - sdata[-2]
+            padded_data = data + [lo, hi]
+            self.assertEqual(
+                quantiles(data, n=n),
+                quantiles(padded_data, n=n, method='inclusive'),
+                (n, data),
+            )
             # Invariant under tranlation and scaling
             def f(x):
                 return 3.5 * x - 1234.675

From e0f12a4c8407ad2414351cc65cea000dcde4c3be Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Thu, 16 May 2019 00:48:21 -0700
Subject: [PATCH 2/8] Verify the Q2 agrees with median() for various data sizes

---
 Lib/test/test_statistics.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py
index 3790ed44783123..bf5f1067e7b912 100644
--- a/Lib/test/test_statistics.py
+++ b/Lib/test/test_statistics.py
@@ -2189,6 +2189,11 @@ def f(x):
             actual = quantiles(statistics.NormalDist(), n=n)
             self.assertTrue(all(math.isclose(e, a, abs_tol=0.0001)
                             for e, a in zip(expected, actual)))
+        # Q2 agrees with median()
+        for k in range(2, 60):
+            data = random.choices(range(100), k=k)
+            q1, q2, q3 = quantiles(data)
+            self.assertEqual(q2, statistics.median(data))
 
     def test_specific_cases_inclusive(self):
         # Match results computed by hand and cross-checked
@@ -2243,6 +2248,11 @@ def f(x):
         data.remove(max(data))
         expected = quantiles(data, n=32)
         self.assertEqual(expected, actual)
+        # Q2 agrees with median()
+        for k in range(2, 60):
+            data = random.choices(range(100), k=k)
+            q1, q2, q3 = quantiles(data, method='inclusive')
+            self.assertEqual(q2, statistics.median(data))
 
     def test_equal_inputs(self):
         quantiles = statistics.quantiles

From df091f319fbc9c06a54af8c2e4e2d9f42d8e4106 Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Thu, 16 May 2019 07:05:24 -0700
Subject: [PATCH 3/8] Test deciles

---
 Lib/test/test_statistics.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py
index bf5f1067e7b912..946c7428c61311 100644
--- a/Lib/test/test_statistics.py
+++ b/Lib/test/test_statistics.py
@@ -2239,6 +2239,11 @@ def f(x):
             actual = quantiles(statistics.NormalDist(), n=n, method="inclusive")
             self.assertTrue(all(math.isclose(e, a, abs_tol=0.0001)
                             for e, a in zip(expected, actual)))
+        # Natural deciles
+        self.assertEqual(quantiles([0, 100], n=10, method='inclusive'),
+                         [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0])
+        self.assertEqual(quantiles(range(0, 101), n=10, method='inclusive'),
+                         [10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0])
         # Whenever n is smaller than the number of data points, running
         # method='inclusive' should give the same result as method='exclusive'
         # after the two included extreme points are removed.

From 50a92ab1c44c8faac4ce2c74b70cf5642a0c2b19 Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Sat, 18 May 2019 00:54:17 -0700
Subject: [PATCH 4/8] Elaborate on the *method* parameter

---
 Doc/library/statistics.rst | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/Doc/library/statistics.rst b/Doc/library/statistics.rst
index fb7df4e7188a07..dd80a1a46f8dce 100644
--- a/Doc/library/statistics.rst
+++ b/Doc/library/statistics.rst
@@ -522,11 +522,26 @@ However, for reading convenience, most of the examples show sorted sequences.
    value or compute ``106`` as the midpoint).  This might matter if
    there are too few samples for a given number of cut points.
 
-   If *method* is set to *inclusive*, *dist* is treated as population data.
-   The minimum value is treated as the 0th percentile and the maximum
-   value is treated as the 100th percentile.  If *dist* is an instance of
-   a class that defines an :meth:`~inv_cdf` method, setting *method*
-   has no effect.
+   The choice of *method* depends on whether dataset includes or
+   excludes the lowest and highest possible values from the
+   population.
+
+   The *method* defaults to *exclusive*.  This is used for data
+   sampled from a population with more extreme values than found in
+   the samples.  The quantiles are computed with the assumption
+   that each data point, including the smallest and largest,
+   separates two continuous intervals with equal probability.
+
+   When the *method* is set to *inclusive*, the minimum value in
+   *dist* is treated as the 0th percentile and the maximum value is
+   treated as the 100th percentile.  This is useful for describing
+   population data.  It also applies to sample data that is known to
+   include the most extreme possible values (such as a sample of test
+   scores including 0 and 100, the lowest and highest possible
+   scores).
+
+   If *dist* is an instance of a class that defines an
+   :meth:`~inv_cdf` method, setting *method* has no effect.
 
    .. doctest::
 

From 1fee30bedfd1ff9d928d3ace6be42416008039fc Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Sat, 18 May 2019 02:54:33 -0700
Subject: [PATCH 5/8] Make the inclusive/exclusive explanation more precise and
 concise

---
 Doc/library/statistics.rst | 39 +++++++++++++++++---------------------
 1 file changed, 17 insertions(+), 22 deletions(-)

diff --git a/Doc/library/statistics.rst b/Doc/library/statistics.rst
index dd80a1a46f8dce..2410a9ee309af5 100644
--- a/Doc/library/statistics.rst
+++ b/Doc/library/statistics.rst
@@ -517,28 +517,23 @@ However, for reading convenience, most of the examples show sorted sequences.
    For sample data, the cut points are linearly interpolated from the
    two nearest data points.  For example, if a cut point falls one-third
    of the distance between two sample values, ``100`` and ``112``, the
-   cut-point will evaluate to ``104``.  Other selection methods may be
-   offered in the future (for example choose ``100`` as the nearest
-   value or compute ``106`` as the midpoint).  This might matter if
-   there are too few samples for a given number of cut points.
-
-   The choice of *method* depends on whether dataset includes or
-   excludes the lowest and highest possible values from the
-   population.
-
-   The *method* defaults to *exclusive*.  This is used for data
-   sampled from a population with more extreme values than found in
-   the samples.  The quantiles are computed with the assumption
-   that each data point, including the smallest and largest,
-   separates two continuous intervals with equal probability.
-
-   When the *method* is set to *inclusive*, the minimum value in
-   *dist* is treated as the 0th percentile and the maximum value is
-   treated as the 100th percentile.  This is useful for describing
-   population data.  It also applies to sample data that is known to
-   include the most extreme possible values (such as a sample of test
-   scores including 0 and 100, the lowest and highest possible
-   scores).
+   cut-point will evaluate to ``104``.
+
+   Quantiles can be computed differently depending on whether the data
+   in *dist* includes or excludes the lowest and highest possible values
+   from the population.
+
+   The default *method* is *exclusive* and is used for data sampled from
+   a population that can have more extreme values than found in the
+   samples.  The portion of the population falling below the *i-th* of
+   *m* data points is computed as ``i // (m + 1)``.
+
+   Setting the *method* to *inclusive* is used for describing population
+   data or for samples that include the extreme points.  The minimum
+   value in *dist* is treated as the 0th percentile and the maximum
+   value is treated as the 100th percentile.  The portion of the
+   population falling below the *i-th* of *m* data points is computed as
+   ``(i - 1) // (m - 1)``.
 
    If *dist* is an instance of a class that defines an
    :meth:`~inv_cdf` method, setting *method* has no effect.

From 3a17ee7e9a63d9632c6143ee6f1b4e5ff51882d5 Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Sat, 18 May 2019 03:04:14 -0700
Subject: [PATCH 6/8] Use true division in the doc formulas

---
 Doc/library/statistics.rst | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Doc/library/statistics.rst b/Doc/library/statistics.rst
index 2410a9ee309af5..5f6d308b07cc16 100644
--- a/Doc/library/statistics.rst
+++ b/Doc/library/statistics.rst
@@ -523,17 +523,17 @@ However, for reading convenience, most of the examples show sorted sequences.
    in *dist* includes or excludes the lowest and highest possible values
    from the population.
 
-   The default *method* is *exclusive* and is used for data sampled from
+   The default *method* is "exclusive" and is used for data sampled from
    a population that can have more extreme values than found in the
    samples.  The portion of the population falling below the *i-th* of
-   *m* data points is computed as ``i // (m + 1)``.
+   *m* data points is computed as ``i / (m + 1)``.
 
-   Setting the *method* to *inclusive* is used for describing population
+   Setting the *method* to "inclusive" is used for describing population
    data or for samples that include the extreme points.  The minimum
    value in *dist* is treated as the 0th percentile and the maximum
    value is treated as the 100th percentile.  The portion of the
    population falling below the *i-th* of *m* data points is computed as
-   ``(i - 1) // (m - 1)``.
+   ``(i - 1) / (m - 1)``.
 
    If *dist* is an instance of a class that defines an
    :meth:`~inv_cdf` method, setting *method* has no effect.

From ab662fb923292ea34a26b8da12fe4f3bfd0138bc Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Sat, 18 May 2019 03:26:10 -0700
Subject: [PATCH 7/8] Note that there should be more data points than quantiles

---
 Doc/library/statistics.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Doc/library/statistics.rst b/Doc/library/statistics.rst
index 5f6d308b07cc16..344af2d7d1e0b3 100644
--- a/Doc/library/statistics.rst
+++ b/Doc/library/statistics.rst
@@ -511,7 +511,8 @@ However, for reading convenience, most of the examples show sorted sequences.
    is not least 1.
 
    The *dist* can be any iterable containing sample data or it can be an
-   instance of a class that defines an :meth:`~inv_cdf` method.
+   instance of a class that defines an :meth:`~inv_cdf` method.  For meaningful
+   results, the number of data points in *dist* should be larger than *n*.
    Raises :exc:`StatisticsError` if there are not at least two data points.
 
    For sample data, the cut points are linearly interpolated from the

From d55271689407d0e19a8434c146802701208c05cd Mon Sep 17 00:00:00 2001
From: Raymond Hettinger <raymond.hettinger@gmail.com>
Date: Sat, 18 May 2019 09:49:43 -0700
Subject: [PATCH 8/8] Mention *method* specifically.

---
 Doc/library/statistics.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Doc/library/statistics.rst b/Doc/library/statistics.rst
index 344af2d7d1e0b3..bc841fda72f887 100644
--- a/Doc/library/statistics.rst
+++ b/Doc/library/statistics.rst
@@ -520,9 +520,9 @@ However, for reading convenience, most of the examples show sorted sequences.
    of the distance between two sample values, ``100`` and ``112``, the
    cut-point will evaluate to ``104``.
 
-   Quantiles can be computed differently depending on whether the data
-   in *dist* includes or excludes the lowest and highest possible values
-   from the population.
+   The *method* for computing quantiles can be varied depending on
+   whether the data in *dist* includes or excludes the lowest and
+   highest possible values from the population.
 
    The default *method* is "exclusive" and is used for data sampled from
    a population that can have more extreme values than found in the