MCPcopy
hub / github.com/pandas-dev/pandas / _harmonize_columns

Method _harmonize_columns

pandas/io/sql.py:1290–1348  ·  view source on GitHub ↗

Make the DataFrame's column types align with the SQL table column types. Need to work around limited NA value support. Floats are always fine, ints must always be floats if there are Null values. Booleans are hard because converting bool column with None repl

(
        self,
        parse_dates=None,
        dtype_backend: DtypeBackend | Literal["numpy"] = "numpy",
    )

Source from the content-addressed store, hash-verified

1288 return Table(self.name, meta, *columns, schema=schema)
1289
1290 def _harmonize_columns(
1291 self,
1292 parse_dates=None,
1293 dtype_backend: DtypeBackend | Literal["numpy"] = "numpy",
1294 ) -> None:
1295 """
1296 Make the DataFrame's column types align with the SQL table
1297 column types.
1298 Need to work around limited NA value support. Floats are always
1299 fine, ints must always be floats if there are Null values.
1300 Booleans are hard because converting bool column with None replaces
1301 all Nones with false. Therefore only convert bool if there are no
1302 NA values.
1303 Datetimes should already be converted to np.datetime64 if supported,
1304 but here we also force conversion if required.
1305 """
1306 parse_dates = _process_parse_dates_argument(parse_dates)
1307
1308 for sql_col in self.table.columns:
1309 col_name = sql_col.name
1310 try:
1311 df_col = self.frame[col_name]
1312
1313 # Handle date parsing upfront; don't try to convert columns
1314 # twice
1315 if col_name in parse_dates:
1316 try:
1317 fmt = parse_dates[col_name]
1318 except TypeError:
1319 fmt = None
1320 self.frame[col_name] = _handle_date_column(df_col, format=fmt)
1321 continue
1322
1323 # the type the dataframe column should have
1324 col_type = self._get_dtype(sql_col.type)
1325
1326 if (
1327 col_type is datetime
1328 or col_type is date
1329 or col_type is DatetimeTZDtype
1330 ):
1331 # Convert tz-aware Datetime SQL columns to UTC
1332 utc = col_type is DatetimeTZDtype
1333 self.frame[col_name] = _handle_date_column(df_col, utc=utc)
1334 elif dtype_backend == "numpy" and col_type is float:
1335 # floats support NA, can always convert!
1336 self.frame[col_name] = df_col.astype(col_type)
1337 elif (
1338 using_string_dtype()
1339 and is_string_dtype(col_type)
1340 and is_object_dtype(self.frame[col_name])
1341 ):
1342 self.frame[col_name] = df_col.astype(col_type)
1343 elif dtype_backend == "numpy" and len(df_col) == df_col.count():
1344 # No NA values, can convert ints and bools
1345 if col_type is np.dtype("int64") or col_type is bool:
1346 self.frame[col_name] = df_col.astype(col_type)
1347 except KeyError:

Callers 2

_query_iteratorMethod · 0.95
readMethod · 0.95

Calls 9

_get_dtypeMethod · 0.95
using_string_dtypeFunction · 0.90
is_string_dtypeFunction · 0.90
is_object_dtypeFunction · 0.90
_handle_date_columnFunction · 0.85
astypeMethod · 0.45
countMethod · 0.45
dtypeMethod · 0.45

Tested by

no test coverage detected