MCPcopy Index your code
hub / github.com/openai/tiktoken / encode_batch

Method encode_batch

tiktoken/core.py:178–206  ·  view source on GitHub ↗

Encodes a list of strings into tokens, in parallel. See `encode` for more details on `allowed_special` and `disallowed_special`. ``` >>> enc.encode_batch(["hello world", "goodbye world"]) [[31373, 995], [11274, 16390, 995]] ```

(
        self,
        text: list[str],
        *,
        num_threads: int = 8,
        allowed_special: Literal["all"] | AbstractSet[str] = set(),  # noqa: B006
        disallowed_special: Literal["all"] | Collection[str] = "all",
    )

Source from the content-addressed store, hash-verified

176 return list(e.map(encoder, text))
177
178 def encode_batch(
179 self,
180 text: list[str],
181 *,
182 num_threads: int = 8,
183 allowed_special: Literal["all"] | AbstractSet[str] = set(), # noqa: B006
184 disallowed_special: Literal["all"] | Collection[str] = "all",
185 ) -> list[list[int]]:
186 """Encodes a list of strings into tokens, in parallel.
187
188 See `encode` for more details on `allowed_special` and `disallowed_special`.
189
190 ```
191 >>> enc.encode_batch(["hello world", "goodbye world"])
192 [[31373, 995], [11274, 16390, 995]]
193 ```
194 """
195 if allowed_special == "all":
196 allowed_special = self.special_tokens_set
197 if disallowed_special == "all":
198 disallowed_special = self.special_tokens_set - allowed_special
199 if not isinstance(disallowed_special, frozenset):
200 disallowed_special = frozenset(disallowed_special)
201
202 encoder = functools.partial(
203 self.encode, allowed_special=allowed_special, disallowed_special=disallowed_special
204 )
205 with ThreadPoolExecutor(num_threads) as e:
206 return list(e.map(encoder, text))
207
208 def encode_with_unstable(
209 self,

Callers 2

test_batch_encodeFunction · 0.80
test_hyp_batch_roundtripFunction · 0.80

Calls

no outgoing calls

Tested by 2

test_batch_encodeFunction · 0.64
test_hyp_batch_roundtripFunction · 0.64