Create a processor for `processor_class`. If a processor is not able to be built with the original arguments, this method tries to change the arguments and call itself recursively, by inferring a new `config_class` or a new `processor_class` from another one, in order to find a checkpoi
(config_class, processor_class, allow_no_checkpoint=False)
| 354 | |
| 355 | |
| 356 | def build_processor(config_class, processor_class, allow_no_checkpoint=False): |
| 357 | """Create a processor for `processor_class`. |
| 358 | |
| 359 | If a processor is not able to be built with the original arguments, this method tries to change the arguments and |
| 360 | call itself recursively, by inferring a new `config_class` or a new `processor_class` from another one, in order to |
| 361 | find a checkpoint containing the necessary files to build a processor. |
| 362 | |
| 363 | The processor is not saved here. Instead, it will be saved in `convert_processors` after further changes in |
| 364 | `convert_processors`. For each model architecture`, a copy will be created and saved along the built model. |
| 365 | """ |
| 366 | # Currently, this solely uses the docstring in the source file of `config_class` to find a checkpoint. |
| 367 | checkpoint = get_checkpoint_from_config_class(config_class) |
| 368 | |
| 369 | # New method that is more robust to get checkpoints! |
| 370 | |
| 371 | if checkpoint is None and not processor_class.__name__.startswith("Auto"): |
| 372 | # try to get the checkpoint from the config class for `processor_class`. |
| 373 | # This helps cases like `XCLIPConfig` and `VideoMAEFeatureExtractor` to find a checkpoint from `VideoMAEConfig`. |
| 374 | config_class_from_processor_class = get_config_class_from_processor_class(processor_class) |
| 375 | checkpoint = get_checkpoint_from_config_class(config_class_from_processor_class) |
| 376 | |
| 377 | processor = None |
| 378 | try: |
| 379 | revision = CHECKPOINT_REVISIONS.get(config_class.__name__) |
| 380 | sub_folder = CHECKPOINT_SUBFOLDERS.get(config_class.__name__, "") |
| 381 | processor = processor_class.from_pretrained(checkpoint, revision=revision, subfolder=sub_folder) |
| 382 | except Exception as e: |
| 383 | logger.error(f"{e.__class__.__name__}: {e}") |
| 384 | |
| 385 | # Try to get a new processor class from checkpoint. This is helpful for a checkpoint without necessary file to load |
| 386 | # processor while `processor_class` is an Auto class. For example, `sew` has `Wav2Vec2Processor` in |
| 387 | # `PROCESSOR_MAPPING_NAMES`, its `tokenizer_class` is `AutoTokenizer`, and the checkpoint |
| 388 | # `https://huggingface.co/asapp/sew-tiny-100k` has no tokenizer file, but we can get |
| 389 | # `tokenizer_class: Wav2Vec2CTCTokenizer` from the config file. (The new processor class won't be able to load from |
| 390 | # `checkpoint`, but it helps this recursive method to find a way to build a processor). |
| 391 | if ( |
| 392 | processor is None |
| 393 | and checkpoint is not None |
| 394 | and issubclass(processor_class, (PreTrainedTokenizerBase, AutoTokenizer)) |
| 395 | ): |
| 396 | try: |
| 397 | revision = CHECKPOINT_REVISIONS.get(config_class.__name__) |
| 398 | config = AutoConfig.from_pretrained(checkpoint, revision=revision) |
| 399 | except Exception as e: |
| 400 | logger.error(f"{e.__class__.__name__}: {e}") |
| 401 | config = None |
| 402 | if config is not None: |
| 403 | # TODO: sam2 (Sam2Config) from `facebook/sam2.1-hiera-tiny` will fail if we don't add `getattr(config, "tokenizer_class", None) is not None` |
| 404 | # (as we get `Sam2VideoConfig` instead of `Sam2Config`) |
| 405 | if getattr(config, "tokenizer_class", None) is not None and not isinstance(config, config_class): |
| 406 | raise ValueError( |
| 407 | f"`config` (which is of type {config.__class__.__name__}) should be an instance of `config_class`" |
| 408 | f" ({config_class.__name__})!" |
| 409 | ) |
| 410 | if getattr(config, "tokenizer_class", None) is not None: |
| 411 | tokenizer_class = config.tokenizer_class |
| 412 | new_processor_class = None |
| 413 | if tokenizer_class is not None: |
no test coverage detected