Make the DataFrame's column types align with the SQL table column types. Need to work around limited NA value support. Floats are always fine, ints must always be floats if there are Null values. Booleans are hard because converting bool column with None repl
(
self,
parse_dates=None,
dtype_backend: DtypeBackend | Literal["numpy"] = "numpy",
)
| 1288 | return Table(self.name, meta, *columns, schema=schema) |
| 1289 | |
| 1290 | def _harmonize_columns( |
| 1291 | self, |
| 1292 | parse_dates=None, |
| 1293 | dtype_backend: DtypeBackend | Literal["numpy"] = "numpy", |
| 1294 | ) -> None: |
| 1295 | """ |
| 1296 | Make the DataFrame's column types align with the SQL table |
| 1297 | column types. |
| 1298 | Need to work around limited NA value support. Floats are always |
| 1299 | fine, ints must always be floats if there are Null values. |
| 1300 | Booleans are hard because converting bool column with None replaces |
| 1301 | all Nones with false. Therefore only convert bool if there are no |
| 1302 | NA values. |
| 1303 | Datetimes should already be converted to np.datetime64 if supported, |
| 1304 | but here we also force conversion if required. |
| 1305 | """ |
| 1306 | parse_dates = _process_parse_dates_argument(parse_dates) |
| 1307 | |
| 1308 | for sql_col in self.table.columns: |
| 1309 | col_name = sql_col.name |
| 1310 | try: |
| 1311 | df_col = self.frame[col_name] |
| 1312 | |
| 1313 | # Handle date parsing upfront; don't try to convert columns |
| 1314 | # twice |
| 1315 | if col_name in parse_dates: |
| 1316 | try: |
| 1317 | fmt = parse_dates[col_name] |
| 1318 | except TypeError: |
| 1319 | fmt = None |
| 1320 | self.frame[col_name] = _handle_date_column(df_col, format=fmt) |
| 1321 | continue |
| 1322 | |
| 1323 | # the type the dataframe column should have |
| 1324 | col_type = self._get_dtype(sql_col.type) |
| 1325 | |
| 1326 | if ( |
| 1327 | col_type is datetime |
| 1328 | or col_type is date |
| 1329 | or col_type is DatetimeTZDtype |
| 1330 | ): |
| 1331 | # Convert tz-aware Datetime SQL columns to UTC |
| 1332 | utc = col_type is DatetimeTZDtype |
| 1333 | self.frame[col_name] = _handle_date_column(df_col, utc=utc) |
| 1334 | elif dtype_backend == "numpy" and col_type is float: |
| 1335 | # floats support NA, can always convert! |
| 1336 | self.frame[col_name] = df_col.astype(col_type) |
| 1337 | elif ( |
| 1338 | using_string_dtype() |
| 1339 | and is_string_dtype(col_type) |
| 1340 | and is_object_dtype(self.frame[col_name]) |
| 1341 | ): |
| 1342 | self.frame[col_name] = df_col.astype(col_type) |
| 1343 | elif dtype_backend == "numpy" and len(df_col) == df_col.count(): |
| 1344 | # No NA values, can convert ints and bools |
| 1345 | if col_type is np.dtype("int64") or col_type is bool: |
| 1346 | self.frame[col_name] = df_col.astype(col_type) |
| 1347 | except KeyError: |
no test coverage detected