MCPcopy
hub / github.com/huggingface/transformers / to_dict

Method to_dict

src/transformers/processing_utils.py:1001–1062  ·  view source on GitHub ↗

Serializes this instance to a Python dictionary. Returns: `dict[str, Any]`: Dictionary of all the attributes that make up this processor instance.

(self)

Source from the content-addressed store, hash-verified

999 return proper_class
1000
1001 def to_dict(self) -> dict[str, Any]:
1002 """
1003 Serializes this instance to a Python dictionary.
1004
1005 Returns:
1006 `dict[str, Any]`: Dictionary of all the attributes that make up this processor instance.
1007 """
1008 # Exclude tokenizer attributes before deepcopying to avoid copying large vocab/token structures.
1009 tokenizer_attributes = set()
1010 for attribute in self.__class__.get_attributes():
1011 if attribute in self.__dict__:
1012 modality = _get_modality_for_attribute(attribute)
1013 if modality == "tokenizer":
1014 tokenizer_attributes.add(attribute)
1015
1016 dict_to_copy = {k: v for k, v in self.__dict__.items() if k not in tokenizer_attributes}
1017 output = copy.deepcopy(dict_to_copy)
1018
1019 # Get the kwargs in `__init__`.
1020 sig = inspect.signature(self.__init__)
1021 # Only save the attributes that are presented in the kwargs of `__init__`.
1022 # or in the attributes
1023 attrs_to_save = list(sig.parameters) + self.__class__.get_attributes()
1024 # extra attributes to be kept
1025 attrs_to_save += ["auto_map"]
1026
1027 if "chat_template" in output:
1028 del output["chat_template"]
1029
1030 def cast_array_to_list(dictionary):
1031 """
1032 Numpy arrays are not serialiazable but can be in pre-processing dicts.
1033 This function casts arrays to list, recusring through the nested configs as well.
1034 """
1035 for key, value in dictionary.items():
1036 if isinstance(value, np.ndarray):
1037 dictionary[key] = value.tolist()
1038 elif isinstance(value, dict):
1039 dictionary[key] = cast_array_to_list(value)
1040 return dictionary
1041
1042 # Special case, add `audio_tokenizer` dict which points to model weights and path
1043 if "audio_tokenizer" in output:
1044 audio_tokenizer_dict = {
1045 "audio_tokenizer_class": self.audio_tokenizer.__class__.__name__,
1046 "audio_tokenizer_name_or_path": self.audio_tokenizer.name_or_path,
1047 }
1048 output["audio_tokenizer"] = audio_tokenizer_dict
1049
1050 # Serialize attributes as a dict
1051 output = {
1052 k: v.to_dict() if isinstance(v, PushToHubMixin) else v
1053 for k, v in output.items()
1054 if (
1055 k in attrs_to_save # keep all attributes that have to be serialized
1056 and v.__class__.__name__ != "BeamSearchDecoderCTC" # remove attributes with that are objects
1057 )
1058 }

Callers 6

to_json_stringMethod · 0.95
_get_exact_configFunction · 0.45
from_pretrainedMethod · 0.45
get_compiled_callMethod · 0.45

Calls 4

get_attributesMethod · 0.80
addMethod · 0.45
itemsMethod · 0.45

Tested by

no test coverage detected