{
  "__type": "IngestedDoc",
  "__tag": 4010,
  "_content": {},
  "_ordered_sections": [],
  "item_file": null,
  "item_line": null,
  "item_type": null,
  "aliases": [],
  "example_section_data": {
    "__type": "Section",
    "__tag": 4015,
    "children": [],
    "title": [],
    "level": 0,
    "target": null
  },
  "see_also": [],
  "signature": null,
  "references": null,
  "qa": "reference:simd:index",
  "arbitrary": [
    {
      "__type": "Section",
      "__tag": 4015,
      "children": [
        {
          "__type": "Target",
          "__tag": 4061,
          "label": "numpysimd"
        }
      ],
      "title": [],
      "level": 0,
      "target": null
    },
    {
      "__type": "Section",
      "__tag": 4015,
      "children": [
        {
          "__type": "Paragraph",
          "__tag": 4045,
          "children": [
            {
              "__type": "Text",
              "__tag": 4046,
              "value": "NumPy comes with a flexible working mechanism that allows it to harness the SIMD features that CPUs own, in order to provide faster and more stable performance on all popular platforms. Currently, NumPy supports the X86, IBM/Power, ARM7 and ARM8 architectures."
            }
          ]
        },
        {
          "__type": "Paragraph",
          "__tag": 4045,
          "children": [
            {
              "__type": "Text",
              "__tag": 4046,
              "value": "The optimization process in NumPy is carried out in three layers:"
            }
          ]
        },
        {
          "__type": "BulletList",
          "__tag": 4053,
          "ordered": false,
          "start": 1,
          "children": [
            {
              "__type": "ListItem",
              "__tag": 4054,
              "children": [
                {
                  "__type": "Paragraph",
                  "__tag": 4045,
                  "children": [
                    {
                      "__type": "Text",
                      "__tag": 4046,
                      "value": "Code is "
                    },
                    {
                      "__type": "Emphasis",
                      "__tag": 4047,
                      "children": [
                        {
                          "__type": "Text",
                          "__tag": 4046,
                          "value": "written"
                        }
                      ]
                    },
                    {
                      "__type": "Text",
                      "__tag": 4046,
                      "value": " using the universal intrinsics which is a set of types, macros and   functions that are mapped to each supported instruction-sets by using guards that   will enable use of the them only when the compiler recognizes them.   This allow us to generate multiple kernels for the same functionality,   in which each generated kernel represents a set of instructions that related one   or multiple certain CPU features. The first kernel represents the minimum (baseline)   CPU features, and the other kernels represent the additional (dispatched) CPU features."
                    }
                  ]
                }
              ]
            },
            {
              "__type": "ListItem",
              "__tag": 4054,
              "children": [
                {
                  "__type": "Paragraph",
                  "__tag": 4045,
                  "children": [
                    {
                      "__type": "Text",
                      "__tag": 4046,
                      "value": "At "
                    },
                    {
                      "__type": "Emphasis",
                      "__tag": 4047,
                      "children": [
                        {
                          "__type": "Text",
                          "__tag": 4046,
                          "value": "compile"
                        }
                      ]
                    },
                    {
                      "__type": "Text",
                      "__tag": 4046,
                      "value": " time, CPU build options are used to define the minimum and   additional features to support, based on user choice and compiler support. The   appropriate intrinsics are overlaid with the platform / architecture intrinsics,   and multiple kernels are compiled."
                    }
                  ]
                }
              ]
            },
            {
              "__type": "ListItem",
              "__tag": 4054,
              "children": [
                {
                  "__type": "Paragraph",
                  "__tag": 4045,
                  "children": [
                    {
                      "__type": "Text",
                      "__tag": 4046,
                      "value": "At "
                    },
                    {
                      "__type": "Emphasis",
                      "__tag": 4047,
                      "children": [
                        {
                          "__type": "Text",
                          "__tag": 4046,
                          "value": "runtime import"
                        }
                      ]
                    },
                    {
                      "__type": "Text",
                      "__tag": 4046,
                      "value": ", the CPU is probed for the set of supported CPU   features. A mechanism is used to grab the pointer to the most appropriate   kernel, and this will be the one called for the function."
                    }
                  ]
                }
              ]
            }
          ]
        },
        {
          "__type": "Admonition",
          "__tag": 4056,
          "kind": "note",
          "base_type": "note",
          "children": [
            {
              "__type": "AdmonitionTitle",
              "__tag": 4055,
              "children": [
                {
                  "__type": "Text",
                  "__tag": 4046,
                  "value": "note "
                }
              ]
            },
            {
              "__type": "Paragraph",
              "__tag": 4045,
              "children": [
                {
                  "__type": "Text",
                  "__tag": 4046,
                  "value": "NumPy community had a deep discussion before implementing this work, please check "
                },
                {
                  "__type": "CrossRef",
                  "__tag": 4002,
                  "value": "nep-0038-SIMD-optimizations",
                  "reference": {
                    "__type": "LocalRef",
                    "__tag": 4022,
                    "kind": "docs",
                    "path": "nep-0038-SIMD-optimizations"
                  },
                  "kind": "docs"
                },
                {
                  "__type": "Text",
                  "__tag": 4046,
                  "value": " for more clarification."
                }
              ]
            }
          ]
        },
        {
          "__type": "BulletList",
          "__tag": 4053,
          "ordered": false,
          "start": 1,
          "children": [
            {
              "__type": "ListItem",
              "__tag": 4054,
              "children": [
                {
                  "__type": "Paragraph",
                  "__tag": 4045,
                  "children": [
                    {
                      "__type": "CrossRef",
                      "__tag": 4002,
                      "value": "CPU Build Options",
                      "reference": {
                        "__type": "LocalRef",
                        "__tag": 4022,
                        "kind": "docs",
                        "path": "reference:simd:build-options"
                      },
                      "kind": "exists"
                    }
                  ]
                }
              ]
            },
            {
              "__type": "ListItem",
              "__tag": 4054,
              "children": [
                {
                  "__type": "Paragraph",
                  "__tag": 4045,
                  "children": [
                    {
                      "__type": "CrossRef",
                      "__tag": 4002,
                      "value": "How does the CPU dispatcher work?",
                      "reference": {
                        "__type": "LocalRef",
                        "__tag": 4022,
                        "kind": "docs",
                        "path": "reference:simd:how-it-works"
                      },
                      "kind": "exists"
                    }
                  ]
                }
              ]
            }
          ]
        }
      ],
      "title": [
        {
          "__type": "Text",
          "__tag": 4046,
          "value": "CPU/SIMD optimizations"
        }
      ],
      "level": 0,
      "target": null
    }
  ],
  "local_refs": []
}