MCPcopy
hub / github.com/huggingface/transformers / generate_without_cb

Function generate_without_cb

examples/pytorch/continuous_batching.py:33–51  ·  view source on GitHub ↗
(
    model_id: str, sliding_window: int, attn_impl: str, batched_inputs: list[int], generation_config: GenerationConfig
)

Source from the content-addressed store, hash-verified

31
32
33def generate_without_cb(
34 model_id: str, sliding_window: int, attn_impl: str, batched_inputs: list[int], generation_config: GenerationConfig
35) -> dict[str, str]:
36 # Setup model and tokenizer
37 model = AutoModelForCausalLM.from_pretrained(model_id, dtype=torch.bfloat16, attn_implementation=attn_impl)
38 model = model.cuda().eval() # type: ignore
39 if sliding_window > 0 and getattr(model.config, "sliding_window", None) is not None:
40 model.config.sliding_window = sliding_window
41 tokenizer = AutoTokenizer.from_pretrained(model_id)
42 # Generate one by one
43 decoded_outputs = {}
44 for input_ids in tqdm(batched_inputs, desc="Generating outputs without CB"):
45 key = " ".join(map(str, input_ids)) # This will be used to identify the output after batched generation
46 input_ids = torch.tensor([input_ids]).to("cuda")
47 attention_mask = torch.ones_like(input_ids)
48 outputs = model.generate(input_ids, attention_mask=attention_mask, generation_config=generation_config)
49 generated_tokens = outputs[0][input_ids.shape[1] :]
50 decoded_outputs[key] = tokenizer.decode(generated_tokens, skip_special_tokens=False) # type: ignore
51 return decoded_outputs
52
53
54def batch_generate(

Callers 1

Calls 7

evalMethod · 0.80
joinMethod · 0.80
from_pretrainedMethod · 0.45
cudaMethod · 0.45
toMethod · 0.45
generateMethod · 0.45
decodeMethod · 0.45

Tested by

no test coverage detected