After this function runs, the `all_attrables` keys of `args` all contain only references to columns of `df_output`. This function handles the extraction of data from `args["attrable"]` and column-name-generation as appropriate, and adds the data to `df_output` and then replaces `arg
(
args, wide_mode, var_name, value_name, is_pd_like, native_namespace
)
| 1211 | |
| 1212 | |
| 1213 | def process_args_into_dataframe( |
| 1214 | args, wide_mode, var_name, value_name, is_pd_like, native_namespace |
| 1215 | ): |
| 1216 | """ |
| 1217 | After this function runs, the `all_attrables` keys of `args` all contain only |
| 1218 | references to columns of `df_output`. This function handles the extraction of data |
| 1219 | from `args["attrable"]` and column-name-generation as appropriate, and adds the |
| 1220 | data to `df_output` and then replaces `args["attrable"]` with the appropriate |
| 1221 | reference. |
| 1222 | """ |
| 1223 | |
| 1224 | df_input: nw.DataFrame | None = args["data_frame"] |
| 1225 | df_provided = df_input is not None |
| 1226 | |
| 1227 | # we use a dict instead of a dataframe directly so that it doesn't cause |
| 1228 | # PerformanceWarning by pandas by repeatedly setting the columns. |
| 1229 | # a dict is used instead of a list as the columns needs to be overwritten. |
| 1230 | df_output = {} |
| 1231 | constants = {} |
| 1232 | ranges = [] |
| 1233 | wide_id_vars = set() |
| 1234 | reserved_names = _get_reserved_col_names(args) if df_provided else set() |
| 1235 | |
| 1236 | # Case of functions with a "dimensions" kw: scatter_matrix, parcats, parcoords |
| 1237 | if "dimensions" in args and args["dimensions"] is None: |
| 1238 | if not df_provided: |
| 1239 | raise ValueError( |
| 1240 | "No data were provided. Please provide data either with the `data_frame` or with the `dimensions` argument." |
| 1241 | ) |
| 1242 | else: |
| 1243 | df_output = {col: df_input.get_column(col) for col in df_input.columns} |
| 1244 | |
| 1245 | # hover_data is a dict |
| 1246 | hover_data_is_dict = ( |
| 1247 | "hover_data" in args |
| 1248 | and args["hover_data"] |
| 1249 | and isinstance(args["hover_data"], dict) |
| 1250 | ) |
| 1251 | # If dict, convert all values of hover_data to tuples to simplify processing |
| 1252 | if hover_data_is_dict: |
| 1253 | for k in args["hover_data"]: |
| 1254 | if _isinstance_listlike(args["hover_data"][k]): |
| 1255 | args["hover_data"][k] = (True, args["hover_data"][k]) |
| 1256 | if not isinstance(args["hover_data"][k], tuple): |
| 1257 | args["hover_data"][k] = (args["hover_data"][k], None) |
| 1258 | if df_provided and args["hover_data"][k][1] is not None and k in df_input: |
| 1259 | raise ValueError( |
| 1260 | "Ambiguous input: values for '%s' appear both in hover_data and data_frame" |
| 1261 | % k |
| 1262 | ) |
| 1263 | # Loop over possible arguments |
| 1264 | for field_name in all_attrables: |
| 1265 | # Massaging variables |
| 1266 | argument_list = ( |
| 1267 | [args.get(field_name)] |
| 1268 | if field_name not in array_attrables |
| 1269 | else args.get(field_name) |
| 1270 | ) |
no test coverage detected