Skip to content

Commit cd765cc

Browse files
s3wozbohnstinglDarkLight1337tlrmchlsmth
authored andcommitted
[Model] Add GraniteMoeHybrid 4.0 model (vllm-project#17497)
Signed-off-by: Thomas Ortner <[email protected]> Signed-off-by: Stanislaw Wozniak <[email protected]> Co-authored-by: Thomas Ortner <[email protected]> Co-authored-by: Cyrus Leung <[email protected]> Co-authored-by: Tyler Michael Smith <[email protected]>
1 parent 7f5cbda commit cd765cc

File tree

6 files changed

+637
-0
lines changed

6 files changed

+637
-0
lines changed

docs/source/models/supported_models.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,11 @@ See [this page](#generative-models) for more information on how to use generativ
385385
* `ibm-granite/granite-3.0-1b-a400m-base`, `ibm-granite/granite-3.0-3b-a800m-instruct`, `ibm/PowerMoE-3b`, etc.
386386
* ✅︎
387387
* ✅︎
388+
- * `GraniteMoeHybridForCausalLM`
389+
* Granite 4.0 MoE Hybrid
390+
* `ibm-granite/granite-4.0-tiny-preview`, etc.
391+
* ✅︎
392+
* ✅︎
388393
- * `GraniteMoeSharedForCausalLM`
389394
* Granite MoE Shared
390395
* `ibm-research/moe-7b-1b-active-shared-experts` (test model)
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
3+
import pytest
4+
5+
from ...utils import check_logprobs_close
6+
7+
# Path of the checkpoints
8+
MODELS = [
9+
"ibm-granite/granite-4.0-tiny-preview",
10+
]
11+
12+
13+
@pytest.mark.skip(
14+
reason="Granite 4.0 is not yet available in huggingface transformers")
15+
@pytest.mark.parametrize("model", MODELS)
16+
@pytest.mark.parametrize("dtype", ["float16", "bfloat16"])
17+
@pytest.mark.parametrize("max_tokens", [64])
18+
@pytest.mark.parametrize("num_logprobs", [5])
19+
def test_model_equivalence_to_hf_greedy(
20+
hf_runner,
21+
vllm_runner,
22+
example_prompts,
23+
model: str,
24+
dtype: str,
25+
max_tokens: int,
26+
num_logprobs: int,
27+
):
28+
with vllm_runner(model, dtype=dtype) as vllm_model:
29+
vllm_outputs = vllm_model.generate_greedy_logprobs(
30+
example_prompts, max_tokens, num_logprobs)
31+
32+
with hf_runner(model, dtype=dtype) as hf_model:
33+
hf_outputs = hf_model.generate_greedy_logprobs_limit(
34+
example_prompts, max_tokens, num_logprobs)
35+
36+
check_logprobs_close(
37+
outputs_0_lst=hf_outputs,
38+
outputs_1_lst=vllm_outputs,
39+
name_0="hf",
40+
name_1="vllm",
41+
)

tests/models/language/generation/test_hybrid.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@
2323

2424
HYBRID_MODELS = [
2525
"ai21labs/Jamba-tiny-dev",
26+
# NOTE: ibm-granite/granite-4.0-tiny-preview are skipped currently as
27+
# it is not yet available in huggingface transformers
28+
# "ibm-granite/granite-4.0-tiny-preview",
2629
# NOTE: Running Plamo2 in transformers implementation requires to install
2730
# causal-conv1d package, which is not listed as a test dependency as it's
2831
# not compatible with pip-compile.

tests/models/registry.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,8 @@ def check_available_online(
166166
{"1b": "EleutherAI/pythia-1.4b"}),
167167
"GraniteForCausalLM": _HfExamplesInfo("ibm/PowerLM-3b"),
168168
"GraniteMoeForCausalLM": _HfExamplesInfo("ibm/PowerMoE-3b"),
169+
"GraniteMoeHybridForCausalLM": _HfExamplesInfo("ibm-granite/granite-4.0-tiny-preview", # noqa: E501
170+
min_transformers_version="4.52.0"), # noqa: E501
169171
"GraniteMoeSharedForCausalLM": _HfExamplesInfo("ibm-research/moe-7b-1b-active-shared-experts"), # noqa: E501
170172
"Grok1ModelForCausalLM": _HfExamplesInfo("hpcai-tech/grok-1",
171173
trust_remote_code=True),

0 commit comments

Comments
 (0)