{
  "__type": "IngestedDoc",
  "__tag": 4010,
  "_content": {},
  "_ordered_sections": [],
  "item_file": null,
  "item_line": null,
  "item_type": null,
  "aliases": [],
  "example_section_data": {
    "__type": "Section",
    "__tag": 4015,
    "children": [],
    "title": [],
    "level": 0,
    "target": null
  },
  "see_also": [],
  "signature": null,
  "references": null,
  "qa": "tutorial:stats:continuous_kstwo",
  "arbitrary": [
    {
      "__type": "Section",
      "__tag": 4015,
      "children": [
        {
          "__type": "Paragraph",
          "__tag": 4045,
          "children": [
            {
              "__type": "Text",
              "__tag": 4046,
              "value": "This is the distribution of the maximum absolute differences between an empirical distribution function, computed from "
            },
            {
              "__type": "InlineMath",
              "__tag": 4057,
              "value": "n"
            },
            {
              "__type": "Text",
              "__tag": 4046,
              "value": " samples or observations, and a comparison (or target) cumulative distribution function, which is assumed to be continuous. (The \"two\" in the name is because this is the two-sided difference. "
            },
            {
              "__type": "InlineCode",
              "__tag": 4051,
              "value": "ksone"
            },
            {
              "__type": "Text",
              "__tag": 4046,
              "value": " is the distribution of the positive differences, "
            },
            {
              "__type": "InlineMath",
              "__tag": 4057,
              "value": "D_n^+"
            },
            {
              "__type": "Text",
              "__tag": 4046,
              "value": ", hence it concerns one-sided differences. "
            },
            {
              "__type": "InlineCode",
              "__tag": 4051,
              "value": "kstwobign"
            },
            {
              "__type": "Text",
              "__tag": 4046,
              "value": " is the limiting distribution of the "
            },
            {
              "__type": "Emphasis",
              "__tag": 4047,
              "children": [
                {
                  "__type": "Text",
                  "__tag": 4046,
                  "value": "normalized"
                }
              ]
            },
            {
              "__type": "Text",
              "__tag": 4046,
              "value": " maximum absolute differences "
            },
            {
              "__type": "InlineMath",
              "__tag": 4057,
              "value": "\\sqrt{n} D_n"
            },
            {
              "__type": "Text",
              "__tag": 4046,
              "value": ".)"
            }
          ]
        },
        {
          "__type": "Paragraph",
          "__tag": 4045,
          "children": [
            {
              "__type": "Text",
              "__tag": 4046,
              "value": "Writing "
            },
            {
              "__type": "InlineMath",
              "__tag": 4057,
              "value": "D_n = \\sup_t \\left|F_{empirical,n}(t)-F_{target}(t)\\right|"
            },
            {
              "__type": "Text",
              "__tag": 4046,
              "value": ", "
            },
            {
              "__type": "InlineCode",
              "__tag": 4051,
              "value": "kstwo"
            },
            {
              "__type": "Text",
              "__tag": 4046,
              "value": " is the distribution of the "
            },
            {
              "__type": "InlineMath",
              "__tag": 4057,
              "value": "D_n"
            },
            {
              "__type": "Text",
              "__tag": 4046,
              "value": " values."
            }
          ]
        },
        {
          "__type": "Paragraph",
          "__tag": 4045,
          "children": [
            {
              "__type": "InlineCode",
              "__tag": 4051,
              "value": "kstwo"
            },
            {
              "__type": "Text",
              "__tag": 4046,
              "value": " can also be used with the differences between two empirical distribution functions, for sets of observations with "
            },
            {
              "__type": "InlineMath",
              "__tag": 4057,
              "value": "m"
            },
            {
              "__type": "Text",
              "__tag": 4046,
              "value": " and "
            },
            {
              "__type": "InlineMath",
              "__tag": 4057,
              "value": "n"
            },
            {
              "__type": "Text",
              "__tag": 4046,
              "value": " samples respectively. Writing "
            },
            {
              "__type": "InlineMath",
              "__tag": 4057,
              "value": "D_{m,n} = \\sup_t \\left|F_{1,m}(t)-F_{2,n}(t)\\right|"
            },
            {
              "__type": "Text",
              "__tag": 4046,
              "value": ",  where "
            },
            {
              "__type": "InlineMath",
              "__tag": 4057,
              "value": "F_{1,m}"
            },
            {
              "__type": "Text",
              "__tag": 4046,
              "value": " and "
            },
            {
              "__type": "InlineMath",
              "__tag": 4057,
              "value": "F_{2,n}"
            },
            {
              "__type": "Text",
              "__tag": 4046,
              "value": " are the two empirical distribution functions, then "
            },
            {
              "__type": "InlineMath",
              "__tag": 4057,
              "value": "Pr(D_{m,n} \\le x) \\approx Pr(D_N \\le x)"
            },
            {
              "__type": "Text",
              "__tag": 4046,
              "value": " under appropriate conditions, where "
            },
            {
              "__type": "InlineMath",
              "__tag": 4057,
              "value": "N = \\sqrt{\\left(\\frac{mn}{m+n}\\right)}"
            },
            {
              "__type": "Text",
              "__tag": 4046,
              "value": "."
            }
          ]
        },
        {
          "__type": "Paragraph",
          "__tag": 4045,
          "children": [
            {
              "__type": "Text",
              "__tag": 4046,
              "value": "There is one shape parameter "
            },
            {
              "__type": "InlineMath",
              "__tag": 4057,
              "value": "n"
            },
            {
              "__type": "Text",
              "__tag": 4046,
              "value": ", a positive integer, and the support is "
            },
            {
              "__type": "InlineMath",
              "__tag": 4057,
              "value": "x\\in\\left[0,1\\right]"
            },
            {
              "__type": "Text",
              "__tag": 4046,
              "value": "."
            }
          ]
        },
        {
          "__type": "Paragraph",
          "__tag": 4045,
          "children": [
            {
              "__type": "Text",
              "__tag": 4046,
              "value": "The implementation follows Simard & L'Ecuyer, which combines exact algorithms of Durbin and Pomeranz with asymptotic estimates of Li-Chien, Pelz and Good to compute the CDF with 5-15 accurate digits."
            }
          ]
        }
      ],
      "title": [
        {
          "__type": "Text",
          "__tag": 4046,
          "value": "KStwo Distribution"
        }
      ],
      "level": 0,
      "target": "continuous-kstwo"
    },
    {
      "__type": "Section",
      "__tag": 4015,
      "children": [
        {
          "__type": "Code",
          "__tag": 4050,
          "value": ">>> import numpy as np\n>>> from scipy.stats import kstwo",
          "execution_status": null
        },
        {
          "__type": "Paragraph",
          "__tag": 4045,
          "children": [
            {
              "__type": "Text",
              "__tag": 4046,
              "value": "Show the probability of a gap at least as big as 0, 0.5 and 1.0 for a sample of size 5"
            }
          ]
        },
        {
          "__type": "Code",
          "__tag": 4050,
          "value": ">>> kstwo.sf([0, 0.5, 1.0], 5)\narray([1.   , 0.112, 0.   ])",
          "execution_status": null
        },
        {
          "__type": "Paragraph",
          "__tag": 4045,
          "children": [
            {
              "__type": "Text",
              "__tag": 4046,
              "value": "Compare a sample of size 5 drawn from a source N(0.5, 1) distribution against a target N(0, 1) CDF."
            }
          ]
        },
        {
          "__type": "Code",
          "__tag": 4050,
          "value": ">>> from scipy.stats import norm\n>>> n = 5\n>>> gendist = norm(0.5, 1)       # Normal distribution, mean 0.5, stddev 1\n>>> x = np.sort(gendist.rvs(size=n, random_state=np.random.default_rng()))\n>>> x\narray([-1.59113056, -0.66335147,  0.54791569,  0.78009321,  1.27641365])  # may vary\n>>> target = norm(0, 1)\n>>> cdfs = target.cdf(x)\n>>> cdfs\narray([0.0557901 , 0.25355274, 0.7081251 , 0.78233199, 0.89909533])   # may vary\n>>> # Construct the Empirical CDF and the K-S statistics (Dn+, Dn-, Dn)\n>>> ecdfs = np.arange(n+1, dtype=float)/n\n>>> cols = np.column_stack([x, ecdfs[1:], cdfs, cdfs - ecdfs[:n], ecdfs[1:] - cdfs])\n>>> np.set_printoptions(precision=3)\n>>> cols\narray([[-1.591,  0.2  ,  0.056,  0.056,  0.144],     # may vary\n       [-0.663,  0.4  ,  0.254,  0.054,  0.146],\n       [ 0.548,  0.6  ,  0.708,  0.308, -0.108],\n       [ 0.78 ,  0.8  ,  0.782,  0.182,  0.018],\n       [ 1.276,  1.   ,  0.899,  0.099,  0.101]])\n>>> gaps = cols[:, -2:]\n>>> Dnpm = np.max(gaps, axis=0)\n>>> Dn = np.max(Dnpm)\n>>> iminus, iplus = np.argmax(gaps, axis=0)\n>>> print('Dn- = %f (at x=%.2f)' % (Dnpm[0], x[iminus]))\nDn- = 0.246201 (at x=-0.14)\n>>> print('Dn+ = %f (at x=%.2f)' % (Dnpm[1], x[iplus]))\nDn+ = 0.224726 (at x=0.19)\n>>> print('Dn  = %f' % (Dn))\nDn  = 0.246201",
          "execution_status": null
        },
        {
          "__type": "Code",
          "__tag": 4050,
          "value": ">>> probs = kstwo.sf(Dn, n)\n>>> print(chr(10).join(['For a sample of size %d drawn from a N(0, 1) distribution:' % n,\n...      ' Kolmogorov-Smirnov 2-sided n=%d: Prob(Dn >= %f) = %.4f' % (n, Dn, probs)]))\nFor a sample of size 5 drawn from a N(0, 1) distribution:\n Kolmogorov-Smirnov 2-sided n=5: Prob(Dn >= 0.246201) = 0.8562",
          "execution_status": null
        },
        {
          "__type": "Paragraph",
          "__tag": 4045,
          "children": [
            {
              "__type": "Text",
              "__tag": 4046,
              "value": "Plot the Empirical CDF against the target N(0, 1) CDF"
            }
          ]
        },
        {
          "__type": "Code",
          "__tag": 4050,
          "value": ">>> import matplotlib.pyplot as plt\n>>> plt.step(np.concatenate([[-3], x]), ecdfs, where='post', label='Empirical CDF')\n>>> x3 = np.linspace(-3, 3, 100)\n>>> plt.plot(x3, target.cdf(x3), label='CDF for N(0, 1)')\n>>> plt.ylim([0, 1]); plt.grid(True); plt.legend();\n>>> plt.vlines([x[iminus]], ecdfs[iminus], cdfs[iminus], color='r', linestyle='solid', lw=4)\n>>> plt.vlines([x[iplus]], cdfs[iplus], ecdfs[iplus+1], color='m', linestyle='solid', lw=4)\n>>> plt.annotate('Dn-', xy=(x[iminus], (ecdfs[iminus]+ cdfs[iminus])/2),\n...              xytext=(x[iminus]+1, (ecdfs[iminus]+ cdfs[iminus])/2 - 0.02),\n...              arrowprops=dict(facecolor='white', edgecolor='r', shrink=0.05), size=15, color='r');\n>>> plt.annotate('Dn+', xy=(x[iplus], (ecdfs[iplus+1]+ cdfs[iplus])/2),\n...             xytext=(x[iplus]-2, (ecdfs[iplus+1]+ cdfs[iplus])/2 - 0.02),\n...             arrowprops=dict(facecolor='white', edgecolor='m', shrink=0.05), size=15, color='m');\n>>> plt.show()",
          "execution_status": null
        }
      ],
      "title": [
        {
          "__type": "Text",
          "__tag": 4046,
          "value": "Examples"
        }
      ],
      "level": 1,
      "target": null
    },
    {
      "__type": "Section",
      "__tag": 4015,
      "children": [
        {
          "__type": "BulletList",
          "__tag": 4053,
          "ordered": false,
          "start": 1,
          "children": [
            {
              "__type": "ListItem",
              "__tag": 4054,
              "children": [
                {
                  "__type": "Paragraph",
                  "__tag": 4045,
                  "children": [
                    {
                      "__type": "Text",
                      "__tag": 4046,
                      "value": "\"Kolmogorov-Smirnov test\", Wikipedia    https://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test"
                    }
                  ]
                }
              ]
            },
            {
              "__type": "ListItem",
              "__tag": 4054,
              "children": [
                {
                  "__type": "Paragraph",
                  "__tag": 4045,
                  "children": [
                    {
                      "__type": "Text",
                      "__tag": 4046,
                      "value": "Durbin J. \"The Probability that the Sample Distribution Function Lies Between Two    Parallel Straight Lines.\" "
                    },
                    {
                      "__type": "Emphasis",
                      "__tag": 4047,
                      "children": [
                        {
                          "__type": "Text",
                          "__tag": 4046,
                          "value": "Ann. Math. Statist"
                        }
                      ]
                    },
                    {
                      "__type": "Text",
                      "__tag": 4046,
                      "value": "., 39 (1968) 39, 398-411."
                    }
                  ]
                }
              ]
            },
            {
              "__type": "ListItem",
              "__tag": 4054,
              "children": [
                {
                  "__type": "Paragraph",
                  "__tag": 4045,
                  "children": [
                    {
                      "__type": "Text",
                      "__tag": 4046,
                      "value": "Pomeranz J.  \"Exact Cumulative Distribution of the Kolmogorov-Smirnov Statistic for    Small Samples (Algorithm 487).\"  "
                    },
                    {
                      "__type": "Emphasis",
                      "__tag": 4047,
                      "children": [
                        {
                          "__type": "Text",
                          "__tag": 4046,
                          "value": "Communications of the ACM"
                        }
                      ]
                    },
                    {
                      "__type": "Text",
                      "__tag": 4046,
                      "value": ", 17(12), (1974) 703-704."
                    }
                  ]
                }
              ]
            },
            {
              "__type": "ListItem",
              "__tag": 4054,
              "children": [
                {
                  "__type": "Paragraph",
                  "__tag": 4045,
                  "children": [
                    {
                      "__type": "Text",
                      "__tag": 4046,
                      "value": "Li-Chien, C.  \"On the exact distribution of the statistics of A. N. Kolmogorov and    their asymptotic expansion.\"  "
                    },
                    {
                      "__type": "Emphasis",
                      "__tag": 4047,
                      "children": [
                        {
                          "__type": "Text",
                          "__tag": 4046,
                          "value": "Acta Matematica Sinica"
                        }
                      ]
                    },
                    {
                      "__type": "Text",
                      "__tag": 4046,
                      "value": ", 6, (1956) 55-81."
                    }
                  ]
                }
              ]
            },
            {
              "__type": "ListItem",
              "__tag": 4054,
              "children": [
                {
                  "__type": "Paragraph",
                  "__tag": 4045,
                  "children": [
                    {
                      "__type": "Text",
                      "__tag": 4046,
                      "value": "Pelz W, Good IJ. \"Approximating the Lower Tail-areas of the Kolmogorov-Smirnov One-sample    Statistic.\" "
                    },
                    {
                      "__type": "Emphasis",
                      "__tag": 4047,
                      "children": [
                        {
                          "__type": "Text",
                          "__tag": 4046,
                          "value": "Journal of the Royal Statistical Society"
                        }
                      ]
                    },
                    {
                      "__type": "Text",
                      "__tag": 4046,
                      "value": ", Series B, (1976) 38(2), 152-156."
                    }
                  ]
                }
              ]
            },
            {
              "__type": "ListItem",
              "__tag": 4054,
              "children": [
                {
                  "__type": "Paragraph",
                  "__tag": 4045,
                  "children": [
                    {
                      "__type": "Text",
                      "__tag": 4046,
                      "value": "Simard, R., L'Ecuyer, P. \"Computing the Two-Sided Kolmogorov-Smirnov Distribution\",    "
                    },
                    {
                      "__type": "Emphasis",
                      "__tag": 4047,
                      "children": [
                        {
                          "__type": "Text",
                          "__tag": 4046,
                          "value": "Journal of Statistical Software"
                        }
                      ]
                    },
                    {
                      "__type": "Text",
                      "__tag": 4046,
                      "value": ", Vol 39, (2011) 11."
                    }
                  ]
                }
              ]
            }
          ]
        },
        {
          "__type": "Paragraph",
          "__tag": 4045,
          "children": [
            {
              "__type": "Text",
              "__tag": 4046,
              "value": "Implementation: "
            },
            {
              "__type": "InlineRole",
              "__tag": 4003,
              "value": "scipy.stats.kstwo",
              "domain": null,
              "role": null,
              "inventory": null
            }
          ]
        }
      ],
      "title": [
        {
          "__type": "Text",
          "__tag": 4046,
          "value": "References"
        }
      ],
      "level": 1,
      "target": null
    }
  ],
  "local_refs": []
}