[Doc] Documentation of Python APIs and tutorials on page layout (#90)

flashinfer-ai · Jan 31, 2024 · 354ab22 · 354ab22
1 parent 6f1ff31
commit 354ab22
Show file tree

Hide file tree

Showing 17 changed files with 1,279 additions and 188 deletions.
diff --git a/.gitignore b/.gitignore
@@ -2,7 +2,16 @@
 python/csrc/generated/
 python/flashinfer/_build_meta.py
 
+# Generated documentation files
+docs/generated
+
+# DS_Store files
+.DS_store
+
+# Microbenchmark files
 microbenchmark/
+
+# vscode
 .vscode/
 
 # Byte-compiled / optimized / DLL files

diff --git a/docs/_static/FlashInfer-black-background.png b/docs/_static/FlashInfer-black-background.png
diff --git a/docs/_static/FlashInfer-white-background.png b/docs/_static/FlashInfer-white-background.png
diff --git a/docs/api/python/cascade.rst b/docs/api/python/cascade.rst
@@ -0,0 +1,38 @@
+.. _apicascade:
+
+flashinfer.cascade
+==================
+
+.. currentmodule:: flashinfer.cascade
+
+.. _api-merge-states:
+
+Merge Attention States
+----------------------
+
+.. autosummary::
+   :toctree: ../../generated
+
+   merge_state
+   merge_state_in_place
+   merge_states
+
+.. _api-cascade-attention:
+
+Cascade Attention
+-----------------
+
+.. autosummary::
+   :toctree: ../../generated
+
+   batch_decode_with_shared_prefix_padded_kv_cache
+
+
+Cascade Attention Wrapper Classes
+---------------------------------
+
+.. autoclass:: BatchDecodeWithSharedPrefixPagedKVCacheWrapper
+    :members:
+
+.. autoclass:: BatchPrefillWithSharedPrefixPagedKVCacheWrapper
+    :members:
diff --git a/docs/api/python/decode.rst b/docs/api/python/decode.rst
@@ -0,0 +1,26 @@
+.. _apidecode:
+
+flashinfer.decode
+=================
+
+.. currentmodule:: flashinfer.decode
+
+Single Request Decoding
+-----------------------
+
+.. autosummary::
+    :toctree: ../../generated
+
+    single_decode_with_kv_cache
+
+Batch Decoding
+--------------
+
+.. autosummary::
+    :toctree: ../../generated
+
+    batch_decode_with_padded_kv_cache
+    batch_decode_with_padded_kv_cache_return_lse
+
+.. autoclass:: BatchDecodeWithPagedKVCacheWrapper
+    :members:
diff --git a/docs/api/python/page.rst b/docs/api/python/page.rst
@@ -0,0 +1,16 @@
+.. _apipage:
+
+flashinfer.page
+===============
+
+Kernels to manipulte paged kv-cache.
+
+.. currentmodule:: flashinfer.page
+
+Append new K/V tensors to Paged KV-Cache
+----------------------------------------
+
+.. autosummary::
+  :toctree: ../../generated
+
+  append_paged_kv_cache
diff --git a/docs/api/python/prefill.rst b/docs/api/python/prefill.rst
@@ -0,0 +1,27 @@
+.. _apiprefill:
+
+flashinfer.prefill
+==================
+
+Attention kernels for prefill & append attention in both single request and batch serving setting.
+
+.. currentmodule:: flashinfer.prefill
+
+Single Request Prefill/Append Attention
+---------------------------------------
+
+.. autosummary::
+    :toctree: ../../generated
+
+    single_prefill_with_kv_cache
+    single_prefill_with_kv_cache_return_lse
+
+Batch Prefill/Append Attention
+------------------------------
+
+.. autoclass:: BatchPrefillWithPagedKVCacheWrapper
+    :members:
+
+.. autoclass:: BatchPrefillWithRaggedKVCacheWrapper
+    :members:
+
diff --git a/docs/conf.py b/docs/conf.py
@@ -1,7 +1,7 @@
 import os
 import sys
 
-import tlcpack_sphinx_addon
+# import tlcpack_sphinx_addon
 # Configuration file for the Sphinx documentation builder.
 #
 # For the full list of built-in configuration values, see the documentation:
@@ -10,6 +10,10 @@
 # -- Project information -----------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 
+sys.path.insert(0, os.path.abspath("../python"))
+os.environ["BUILD_DOC"] = "1"
+autodoc_mock_imports = ["torch"]
+
 project = 'FlashInfer'
 author = "FlashInfer Contributors"
 footer_copyright = '2023-2024, {}'.format(author)
@@ -22,11 +26,10 @@
 
 extensions = [
     "sphinx_tabs.tabs",
-    "sphinx_toolbox.collapse",
-    "sphinxcontrib.httpdomain",
     "sphinx.ext.autodoc",
     "sphinx.ext.napoleon",
-    "sphinx_reredirects",
+    "sphinx.ext.autosummary",
+    "sphinx.ext.mathjax",
 ]
 
 source_suffix = [".rst"]
@@ -44,11 +47,7 @@
 
 # -- Options for HTML output ----------------------------------------------
 
-# The theme is set by the make target
-import sphinx_rtd_theme
-
-html_theme = "sphinx_rtd_theme"
-html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
+html_theme = "furo" #"sphinx_rtd_theme"
 
 templates_path = []
 
@@ -60,27 +59,8 @@
     "logo_only": True,
 }
 
-header_links = [
-    ("Home", "https://flashinfer.ai"),
-    ("Github", "https://github.com/flashinfer-ai/flashinfer"),
-    ("Discussions", "https://github.com/orgs/flashinfer-ai/discussions"),
-]
-
-html_context = {
-    "footer_copyright": footer_copyright,
-    "footer_note": footer_note,
-    "header_links": header_links,
-    "display_github": True,
-    "github_user": "flashinfer-ai",
-    "github_repo": "flashinfer",
-    "github_version": "main/docs/",
-    "theme_vcs_pageview_mode": "edit",
-    # "header_logo": "/path/to/logo",
-    # "header_logo_link": "",
-    # "version_selecter": "",
+html_static_path = ["_static"]
+html_theme_options = {
+    "light_logo": "FlashInfer-white-background.png",
+    "dark_logo": "FlashInfer-black-background.png",
 }
-
-# add additional overrides
-templates_path += [tlcpack_sphinx_addon.get_templates_path()]
-html_static_path += [tlcpack_sphinx_addon.get_static_path()]
-
diff --git a/docs/index.rst b/docs/index.rst
@@ -6,15 +6,29 @@
 Welcome to FlashInfer's documentation!
 ======================================
 
+`Blog <https://flashinfer.ai/>`_ | `Discussion Forum <https://github.com/orgs/flashinfer-ai/discussions>`_ | `GitHub <https://github.com/flashinfer-ai/flashinfer/>`_
+
+FlashInfer is a library for Language Languages Models that provides high-performance implementation of LLM GPU kernels such as FlashAttention, PageAttention and LoRA. FlashInfer focus on LLM serving and inference, and delivers state-the-art performance across diverse scenarios.
+
 .. toctree::
    :maxdepth: 2
-   :caption: Contents:
+   :caption: Get Started
 
+   installation
 
+.. toctree::
+   :maxdepth: 2
+   :caption: Tutorials
 
-Indices and tables
-==================
+   tutorials/recursive_attention
+   tutorials/kv_layout
+
+.. toctree::
+   :maxdepth: 2
+   :caption: PyTorch API Reference
 
-* :ref:`genindex`
-* :ref:`modindex`
-* :ref:`search`
+   api/python/decode
+   api/python/prefill
+   api/python/cascade
+   api/python/page
+
diff --git a/docs/installation.rst b/docs/installation.rst
@@ -0,0 +1,47 @@
+.. _installation:
+
+Installation
+============
+
+Python Package
+--------------
+FlashInfer is available as a Python package, built on top of `PyTorch <https://pytorch.org/>`_ to
+easily integrate with your python applications.
+
+Prerequisites
+^^^^^^^^^^^^^
+
+- OS: Linux only
+- Python: 3.10, 3.11
+- PyTorch CUDA 11.8/12.1
+  - Use ``python -c "import torch; print(torch.version.cuda)"`` to check your PyTorch CUDA version.
+- Supported GPU architectures: sm_80, sm_86, sm_89, sm_90 (sm_75 support is working in progress).
+
+Quick Start
+^^^^^^^^^^^
+
+.. tabs::
+    .. tab:: PyTorch CUDA 11.8
+
+        .. code-block:: bash
+
+            pip install flashinfer -i https://flashinfer.ai/whl/cu118/
+
+    .. tab:: PyTorch CUDA 12.1
+
+        .. code-block:: bash
+
+            pip install flashinfer -i https://flashinfer.ai/whl/cu121/
+
+
+C++ API
+-------
+
+FlashInfer is a header-only library with only CUDA/C++ standard library dependency
+that can be directly integrated into your C++ project without installation.
+
+You can check our `unittest and benchmarks <https://github.com/flashinfer-ai/flashinfer/tree/main/src>`_ on how to use our C++ APIs at the moment.
+
+.. note::
+    The ``nvbench`` and ``googletest`` dependency in ``3rdparty`` directory are only
+    used to compile unittests and benchmarks, and are not required for the library itself.
diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -1,9 +1,7 @@
 sphinx-tabs == 3.4.1
-sphinx-rtd-theme
-sphinx == 5.2.3
+sphinx == 7.2.6
 sphinx-toolbox == 3.4.0
-tlcpack-sphinx-addon==0.2.2
-sphinxcontrib_httpdomain==1.8.1
-sphinxcontrib-napoleon==0.7
-sphinx-reredirects==0.1.2
-
+sphinxcontrib_httpdomain == 1.8.1
+sphinxcontrib-napoleon == 0.7
+sphinx-reredirects == 0.1.2
+furo == 2024.01.29