{
  "__type": "IngestedDoc",
  "__tag": 4010,
  "_content": {},
  "_ordered_sections": [],
  "item_file": null,
  "item_line": null,
  "item_type": null,
  "aliases": [],
  "example_section_data": {
    "__type": "Section",
    "__tag": 4015,
    "children": [],
    "title": [],
    "level": 0,
    "target": null
  },
  "see_also": [],
  "signature": null,
  "references": null,
  "qa": "tutorial:stats:multiscale_graphcorr",
  "arbitrary": [
    {
      "__type": "Section",
      "__tag": 4015,
      "children": [
        {
          "__type": "Paragraph",
          "__tag": 4045,
          "children": [
            {
              "__type": "Text",
              "__tag": 4046,
              "value": "With "
            },
            {
              "__type": "CrossRef",
              "__tag": 4002,
              "value": "scipy.stats.multiscale_graphcorr",
              "reference": {
                "__type": "RefInfo",
                "__tag": 4000,
                "module": "scipy",
                "version": "*",
                "kind": "api",
                "path": "scipy.stats._mgc:multiscale_graphcorr"
              },
              "kind": "module"
            },
            {
              "__type": "Text",
              "__tag": 4046,
              "value": ", we can test for independence on high dimensional and nonlinear data. Before we start, let's import some useful packages:"
            }
          ]
        },
        {
          "__type": "Blockquote",
          "__tag": 4059,
          "children": [
            {
              "__type": "Code",
              "__tag": 4050,
              "value": ">>> import numpy as np\n>>> import matplotlib.pyplot as plt; plt.style.use('classic')\n>>> from scipy.stats import multiscale_graphcorr",
              "execution_status": null
            }
          ]
        },
        {
          "__type": "Paragraph",
          "__tag": 4045,
          "children": [
            {
              "__type": "Text",
              "__tag": 4046,
              "value": "Let's use a custom plotting function to plot the data relationship:"
            }
          ]
        },
        {
          "__type": "Blockquote",
          "__tag": 4059,
          "children": [
            {
              "__type": "Code",
              "__tag": 4050,
              "value": ">>> def mgc_plot(x, y, sim_name, mgc_dict=None, only_viz=False,\n...              only_mgc=False):\n...     \"\"\"Plot sim and MGC-plot\"\"\"\n...     if not only_mgc:\n...         # simulation\n...         plt.figure(figsize=(8, 8))\n...         ax = plt.gca()\n...         ax.set_title(sim_name + \" Simulation\", fontsize=20)\n...         ax.scatter(x, y)\n...         ax.set_xlabel('X', fontsize=15)\n...         ax.set_ylabel('Y', fontsize=15)\n...         ax.axis('equal')\n...         ax.tick_params(axis=\"x\", labelsize=15)\n...         ax.tick_params(axis=\"y\", labelsize=15)\n...         plt.show()\n...     if not only_viz:\n...         # local correlation map\n...         plt.figure(figsize=(8,8))\n...         ax = plt.gca()\n...         mgc_map = mgc_dict[\"mgc_map\"]\n...         # draw heatmap\n...         ax.set_title(\"Local Correlation Map\", fontsize=20)\n...         im = ax.imshow(mgc_map, cmap='YlGnBu')\n...         # colorbar\n...         cbar = ax.figure.colorbar(im, ax=ax)\n...         cbar.ax.set_ylabel(\"\", rotation=-90, va=\"bottom\")\n...         ax.invert_yaxis()\n...         # Turn spines off and create white grid.\n...         for edge, spine in ax.spines.items():\n...             spine.set_visible(False)\n...         # optimal scale\n...         opt_scale = mgc_dict[\"opt_scale\"]\n...         ax.scatter(opt_scale[0], opt_scale[1],\n...                    marker='X', s=200, color='red')\n...         # other formatting\n...         ax.tick_params(bottom=\"off\", left=\"off\")\n...         ax.set_xlabel('#Neighbors for X', fontsize=15)\n...         ax.set_ylabel('#Neighbors for Y', fontsize=15)\n...         ax.tick_params(axis=\"x\", labelsize=15)\n...         ax.tick_params(axis=\"y\", labelsize=15)\n...         ax.set_xlim(0, 100)\n...         ax.set_ylim(0, 100)\n...         plt.show()",
              "execution_status": null
            }
          ]
        },
        {
          "__type": "Paragraph",
          "__tag": 4045,
          "children": [
            {
              "__type": "Text",
              "__tag": 4046,
              "value": "Let's look at some linear data first:"
            }
          ]
        },
        {
          "__type": "Blockquote",
          "__tag": 4059,
          "children": [
            {
              "__type": "Code",
              "__tag": 4050,
              "value": ">>> rng = np.random.default_rng()\n>>> x = np.linspace(-1, 1, num=100)\n>>> y = x + 0.3 * rng.random(x.size)",
              "execution_status": null
            }
          ]
        },
        {
          "__type": "Paragraph",
          "__tag": 4045,
          "children": [
            {
              "__type": "Text",
              "__tag": 4046,
              "value": "The simulation relationship can be plotted below:"
            }
          ]
        },
        {
          "__type": "Blockquote",
          "__tag": 4059,
          "children": [
            {
              "__type": "Code",
              "__tag": 4050,
              "value": ">>> mgc_plot(x, y, \"Linear\", only_viz=True)",
              "execution_status": null
            }
          ]
        },
        {
          "__type": "Paragraph",
          "__tag": 4045,
          "children": [
            {
              "__type": "Text",
              "__tag": 4046,
              "value": "Now, we can see the test statistic, p-value, and MGC map visualized below. The optimal scale is shown on the map as a red \"x\":"
            }
          ]
        },
        {
          "__type": "Blockquote",
          "__tag": 4059,
          "children": [
            {
              "__type": "Code",
              "__tag": 4050,
              "value": ">>> stat, pvalue, mgc_dict = multiscale_graphcorr(x, y)\n>>> print(\"MGC test statistic: \", round(stat, 1))\nMGC test statistic:  1.0\n>>> print(\"P-value: \", round(pvalue, 1))\nP-value:  0.0\n>>> mgc_plot(x, y, \"Linear\", mgc_dict, only_mgc=True)",
              "execution_status": null
            }
          ]
        },
        {
          "__type": "Paragraph",
          "__tag": 4045,
          "children": [
            {
              "__type": "Text",
              "__tag": 4046,
              "value": "It is clear from here, that MGC is able to determine a relationship between the input data matrices because the p-value is very low and the MGC test statistic is relatively high. The MGC-map indicates a "
            },
            {
              "__type": "Strong",
              "__tag": 4048,
              "children": [
                {
                  "__type": "Text",
                  "__tag": 4046,
                  "value": "strongly linear relationship"
                }
              ]
            },
            {
              "__type": "Text",
              "__tag": 4046,
              "value": ". Intuitively, this is because having more neighbors will help in identifying a linear relationship between "
            },
            {
              "__type": "InlineMath",
              "__tag": 4057,
              "value": "x"
            },
            {
              "__type": "Text",
              "__tag": 4046,
              "value": " and "
            },
            {
              "__type": "InlineMath",
              "__tag": 4057,
              "value": "y"
            },
            {
              "__type": "Text",
              "__tag": 4046,
              "value": ". The optimal scale in this case is "
            },
            {
              "__type": "Strong",
              "__tag": 4048,
              "children": [
                {
                  "__type": "Text",
                  "__tag": 4046,
                  "value": "equivalent to the global scale"
                }
              ]
            },
            {
              "__type": "Text",
              "__tag": 4046,
              "value": ", marked by a red spot on the map."
            }
          ]
        },
        {
          "__type": "Paragraph",
          "__tag": 4045,
          "children": [
            {
              "__type": "Text",
              "__tag": 4046,
              "value": "The same can be done for nonlinear data sets. The following "
            },
            {
              "__type": "InlineMath",
              "__tag": 4057,
              "value": "x"
            },
            {
              "__type": "Text",
              "__tag": 4046,
              "value": " and "
            },
            {
              "__type": "InlineMath",
              "__tag": 4057,
              "value": "y"
            },
            {
              "__type": "Text",
              "__tag": 4046,
              "value": " arrays are derived from a nonlinear simulation:"
            }
          ]
        },
        {
          "__type": "Blockquote",
          "__tag": 4059,
          "children": [
            {
              "__type": "Code",
              "__tag": 4050,
              "value": ">>> unif = np.array(rng.uniform(0, 5, size=100))\n>>> x = unif * np.cos(np.pi * unif)\n>>> y = unif * np.sin(np.pi * unif) + 0.4 * rng.random(x.size)",
              "execution_status": null
            }
          ]
        },
        {
          "__type": "Paragraph",
          "__tag": 4045,
          "children": [
            {
              "__type": "Text",
              "__tag": 4046,
              "value": "The simulation relationship can be plotted below:"
            }
          ]
        },
        {
          "__type": "Blockquote",
          "__tag": 4059,
          "children": [
            {
              "__type": "Code",
              "__tag": 4050,
              "value": ">>> mgc_plot(x, y, \"Spiral\", only_viz=True)",
              "execution_status": null
            }
          ]
        },
        {
          "__type": "Paragraph",
          "__tag": 4045,
          "children": [
            {
              "__type": "Text",
              "__tag": 4046,
              "value": "Now, we can see the test statistic, p-value, and MGC map visualized below. The optimal scale is shown on the map as a red \"x\":"
            }
          ]
        },
        {
          "__type": "Blockquote",
          "__tag": 4059,
          "children": [
            {
              "__type": "Code",
              "__tag": 4050,
              "value": ">>> stat, pvalue, mgc_dict = multiscale_graphcorr(x, y)\n>>> print(\"MGC test statistic: \", round(stat, 1))\nMGC test statistic:  0.2  # random\n>>> print(\"P-value: \", round(pvalue, 1))\nP-value:  0.0\n>>> mgc_plot(x, y, \"Spiral\", mgc_dict, only_mgc=True)",
              "execution_status": null
            }
          ]
        },
        {
          "__type": "Paragraph",
          "__tag": 4045,
          "children": [
            {
              "__type": "Text",
              "__tag": 4046,
              "value": "It is clear from here, that MGC is able to determine a relationship again because the p-value is very low and the MGC test statistic is relatively high. The MGC-map indicates a "
            },
            {
              "__type": "Strong",
              "__tag": 4048,
              "children": [
                {
                  "__type": "Text",
                  "__tag": 4046,
                  "value": "strongly nonlinear relationship"
                }
              ]
            },
            {
              "__type": "Text",
              "__tag": 4046,
              "value": ". The optimal scale in this case is "
            },
            {
              "__type": "Strong",
              "__tag": 4048,
              "children": [
                {
                  "__type": "Text",
                  "__tag": 4046,
                  "value": "equivalent to the local scale"
                }
              ]
            },
            {
              "__type": "Text",
              "__tag": 4046,
              "value": ", marked by a red spot on the map."
            }
          ]
        }
      ],
      "title": [
        {
          "__type": "Text",
          "__tag": 4046,
          "value": "Multiscale Graph Correlation (MGC)"
        }
      ],
      "level": 0,
      "target": null
    }
  ],
  "local_refs": []
}