<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE pkgmetadata SYSTEM "https://www.gentoo.org/dtd/metadata.dtd">
<pkgmetadata>
  <maintainer type="person">
    <email>iohann.s.titov@gmail.com</email>
    <name>Ivan S. Titov</name>
  </maintainer>
  <longdescription lang="en">
    vLLM is a fast and easy-to-use library for LLM inference and
    serving. Provides a Python API and an OpenAI-compatible HTTP server.

    USE=cpu / cuda / rocm pick a single VLLM_TARGET_DEVICE for the
    build (mutually exclusive). Default (none of the three) builds
    with VLLM_TARGET_DEVICE=empty — Python entrypoints import cleanly,
    backend kernels fail at first model load. Useful if you only want
    the API surface for development.
  </longdescription>
  <use>
    <flag name="cpu">Build for CPU inference (VLLM_TARGET_DEVICE=cpu); pull torchaudio + numba</flag>
    <flag name="rocm">Build for AMD ROCm inference (VLLM_TARGET_DEVICE=rocm); pull HIP libs + torch{audio,vision}</flag>
  </use>
  <upstream>
    <remote-id type="pypi">vllm</remote-id>
    <remote-id type="github">vllm-project/vllm</remote-id>
    <bugs-to>https://github.com/vllm-project/vllm/issues</bugs-to>
  </upstream>
</pkgmetadata>
