Skip to content

Commit b771e47

Browse files
authored
[CI] post-GptOss fixes for green CI (#39929)
1 parent eb6e26a commit b771e47

File tree

10 files changed

+21
-16
lines changed

10 files changed

+21
-16
lines changed

docs/source/en/_toctree.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -511,6 +511,8 @@
511511
title: GPT2
512512
- local: model_doc/gpt_bigcode
513513
title: GPTBigCode
514+
- local: model_doc/gpt_oss
515+
title: GptOss
514516
- local: model_doc/gptsan-japanese
515517
title: GPTSAN Japanese
516518
- local: model_doc/gpt-sw3
@@ -617,8 +619,6 @@
617619
title: OLMoE
618620
- local: model_doc/open-llama
619621
title: Open-Llama
620-
- local: model_doc/openai_moe
621-
title: OpenAIMoe
622622
- local: model_doc/opt
623623
title: OPT
624624
- local: model_doc/pegasus

docs/source/en/main_classes/quantization.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,10 @@ Learn how to quantize models in the [Quantization](../quantization) guide.
6565

6666
[[autodoc]] HqqConfig
6767

68+
## Mxfp4Config
69+
70+
[[autodoc]] Mxfp4Config
71+
6872
## FbgemmFp8Config
6973

7074
[[autodoc]] FbgemmFp8Config

docs/source/en/model_doc/openai_moe.md renamed to docs/source/en/model_doc/gpt_oss.md

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,11 @@ rendered properly in your Markdown viewer.
2424
</div>
2525
</div>
2626

27-
# OpenAIMoE
27+
# GptOss
2828

2929
## Overview
3030

31-
The OpenAIMoE model was proposed in [<INSERT PAPER NAME HERE>](<INSERT PAPER LINK HERE>) by <INSERT AUTHORS HERE>.
31+
The GptOss model was proposed in [<INSERT PAPER NAME HERE>](<INSERT PAPER LINK HERE>) by <INSERT AUTHORS HERE>.
3232
<INSERT SHORT SUMMARY HERE>
3333

3434
The abstract from the paper is the following:
@@ -43,16 +43,16 @@ This model was contributed by [INSERT YOUR HF USERNAME HERE](https://huggingface
4343
The original code can be found [here](<INSERT LINK TO GITHUB REPO HERE>).
4444

4545

46-
## OpenAIMoeConfig
46+
## GptOssConfig
4747

48-
[[autodoc]] OpenAIMoeConfig
48+
[[autodoc]] GptOssConfig
4949

50-
## OpenAIMoeModel
50+
## GptOssModel
5151

52-
[[autodoc]] OpenAIMoeModel
52+
[[autodoc]] GptOssModel
5353
- forward
5454

55-
## OpenAIMoeForCausalLM
55+
## GptOssForCausalLM
5656

57-
[[autodoc]] OpenAIMoeForCausalLM
57+
[[autodoc]] GptOssForCausalLM
5858
- forward

src/transformers/models/granitemoe/modeling_granitemoe.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
logger = logging.get_logger(__name__)
4141

4242

43-
# Copied from transformers.models.jetmoe.modeling_jetmoe.load_balancing_loss_func
43+
# Copied from transformers.models.qwen2_moe.modeling_qwen2_moe.load_balancing_loss_func
4444
def load_balancing_loss_func(
4545
gate_logits: Union[torch.Tensor, tuple[torch.Tensor], None],
4646
num_experts: Optional[int] = None,

src/transformers/models/jamba/modeling_jamba.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@
6767
logger = logging.get_logger(__name__)
6868

6969

70-
# Copied from transformers.models.mixtral.modeling_mixtral.load_balancing_loss_func with gate->router
70+
# Copied from transformers.models.qwen2_moe.modeling_qwen2_moe.load_balancing_loss_func with gate->router
7171
def load_balancing_loss_func(
7272
router_logits: Union[torch.Tensor, tuple[torch.Tensor], None],
7373
num_experts: Optional[int] = None,

src/transformers/models/jetmoe/modeling_jetmoe.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
logger = logging.get_logger(__name__)
5151

5252

53-
# Copied from transformers.models.mixtral.modeling_mixtral.load_balancing_loss_func
53+
# Copied from transformers.models.qwen2_moe.modeling_qwen2_moe.load_balancing_loss_func
5454
def load_balancing_loss_func(
5555
gate_logits: Union[torch.Tensor, tuple[torch.Tensor], None],
5656
num_experts: Optional[int] = None,

src/transformers/models/olmoe/modeling_olmoe.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
logger = logging.get_logger(__name__)
4040

4141

42-
# Copied from transformers.models.mixtral.modeling_mixtral.load_balancing_loss_func
42+
# Copied from transformers.models.qwen2_moe.modeling_qwen2_moe.load_balancing_loss_func
4343
def load_balancing_loss_func(
4444
gate_logits: Union[torch.Tensor, tuple[torch.Tensor], None],
4545
num_experts: Optional[int] = None,

src/transformers/models/phimoe/modeling_phimoe.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@
5555
logger = logging.get_logger(__name__)
5656

5757

58-
# Copied from transformers.models.mixtral.modeling_mixtral.load_balancing_loss_func
58+
# Copied from transformers.models.qwen2_moe.modeling_qwen2_moe.load_balancing_loss_func
5959
def load_balancing_loss_func(
6060
gate_logits: Union[torch.Tensor, tuple[torch.Tensor], None],
6161
num_experts: Optional[int] = None,

src/transformers/models/qwen2_moe/modeling_qwen2_moe.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,6 @@
5959
logger = logging.get_logger(__name__)
6060

6161

62-
# Copied from transformers.models.mixtral.modeling_mixtral.load_balancing_loss_func
6362
def load_balancing_loss_func(
6463
gate_logits: Union[torch.Tensor, tuple[torch.Tensor], None],
6564
num_experts: Optional[int] = None,

utils/check_config_attributes.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,8 @@
345345
"IdeficsConfig": True,
346346
"IdeficsVisionConfig": True,
347347
"IdeficsPerceiverConfig": True,
348+
# TODO: @Arthur/Joao (`hidden_act` unused)
349+
"GptOssConfig": True,
348350
}
349351
)
350352

0 commit comments

Comments
 (0)