Encodes a list of strings into tokens, in parallel. See `encode` for more details on `allowed_special` and `disallowed_special`. ``` >>> enc.encode_batch(["hello world", "goodbye world"]) [[31373, 995], [11274, 16390, 995]] ```
(
self,
text: list[str],
*,
num_threads: int = 8,
allowed_special: Literal["all"] | AbstractSet[str] = set(), # noqa: B006
disallowed_special: Literal["all"] | Collection[str] = "all",
)
| 176 | return list(e.map(encoder, text)) |
| 177 | |
| 178 | def encode_batch( |
| 179 | self, |
| 180 | text: list[str], |
| 181 | *, |
| 182 | num_threads: int = 8, |
| 183 | allowed_special: Literal["all"] | AbstractSet[str] = set(), # noqa: B006 |
| 184 | disallowed_special: Literal["all"] | Collection[str] = "all", |
| 185 | ) -> list[list[int]]: |
| 186 | """Encodes a list of strings into tokens, in parallel. |
| 187 | |
| 188 | See `encode` for more details on `allowed_special` and `disallowed_special`. |
| 189 | |
| 190 | ``` |
| 191 | >>> enc.encode_batch(["hello world", "goodbye world"]) |
| 192 | [[31373, 995], [11274, 16390, 995]] |
| 193 | ``` |
| 194 | """ |
| 195 | if allowed_special == "all": |
| 196 | allowed_special = self.special_tokens_set |
| 197 | if disallowed_special == "all": |
| 198 | disallowed_special = self.special_tokens_set - allowed_special |
| 199 | if not isinstance(disallowed_special, frozenset): |
| 200 | disallowed_special = frozenset(disallowed_special) |
| 201 | |
| 202 | encoder = functools.partial( |
| 203 | self.encode, allowed_special=allowed_special, disallowed_special=disallowed_special |
| 204 | ) |
| 205 | with ThreadPoolExecutor(num_threads) as e: |
| 206 | return list(e.map(encoder, text)) |
| 207 | |
| 208 | def encode_with_unstable( |
| 209 | self, |
no outgoing calls