Ch. 9 (inequality) bug #316

darribas · 2023-12-10T20:02:57Z

Using pandas version 2.1.1, Cell 38:

rmeans = (
    pci_df.assign(
        # Create column with region name for each county
        Region_Name=pci_df.Region.map(region_names)
    )
    .groupby(
        # Group counties by region name
        by="Region_Name"
        # Calculate mean by region and save only year columns
    )
    .mean()[years]
)

Currently returns the following error:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/groupby/groupby.py:1871, in GroupBy._agg_py_fallback(self, how, values, ndim, alt)
   1870 try:
-> 1871     res_values = self.grouper.agg_series(ser, alt, preserve_dtype=True)
   1872 except Exception as err:

File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/groupby/ops.py:850, in BaseGrouper.agg_series(self, obj, func, preserve_dtype)
    848     preserve_dtype = True
--> 850 result = self._aggregate_series_pure_python(obj, func)
    852 npvalues = lib.maybe_convert_objects(result, try_float=False)

File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/groupby/ops.py:871, in BaseGrouper._aggregate_series_pure_python(self, obj, func)
    870 for i, group in enumerate(splitter):
--> 871     res = func(group)
    872     res = extract_result(res)

File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/groupby/groupby.py:2377, in GroupBy.mean.<locals>.<lambda>(x)
   2374 else:
   2375     result = self._cython_agg_general(
   2376         "mean",
-> 2377         alt=lambda x: Series(x).mean(numeric_only=numeric_only),
   2378         numeric_only=numeric_only,
   2379     )
   2380     return result.__finalize__(self.obj, method="groupby")

File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/series.py:6221, in Series.mean(self, axis, skipna, numeric_only, **kwargs)
   6213 @doc(make_doc("mean", ndim=1))
   6214 def mean(
   6215     self,
   (...)
   6219     **kwargs,
   6220 ):
-> 6221     return NDFrame.mean(self, axis, skipna, numeric_only, **kwargs)

File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/generic.py:11978, in NDFrame.mean(self, axis, skipna, numeric_only, **kwargs)
  11971 def mean(
  11972     self,
  11973     axis: Axis | None = 0,
   (...)
  11976     **kwargs,
  11977 ) -> Series | float:
> 11978     return self._stat_function(
  11979         "mean", nanops.nanmean, axis, skipna, numeric_only, **kwargs
  11980     )

File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/generic.py:11935, in NDFrame._stat_function(self, name, func, axis, skipna, numeric_only, **kwargs)
  11933 validate_bool_kwarg(skipna, "skipna", none_allowed=False)
> 11935 return self._reduce(
  11936     func, name=name, axis=axis, skipna=skipna, numeric_only=numeric_only
  11937 )

File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/series.py:6129, in Series._reduce(self, op, name, axis, skipna, numeric_only, filter_type, **kwds)
   6125     raise TypeError(
   6126         f"Series.{name} does not allow {kwd_name}={numeric_only} "
   6127         "with non-numeric dtypes."
   6128     )
-> 6129 return op(delegate, skipna=skipna, **kwds)

File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/nanops.py:147, in bottleneck_switch.__call__.<locals>.f(values, axis, skipna, **kwds)
    146 else:
--> 147     result = alt(values, axis=axis, skipna=skipna, **kwds)
    149 return result

File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/nanops.py:404, in _datetimelike_compat.<locals>.new_func(values, axis, skipna, mask, **kwargs)
    402     mask = isna(values)
--> 404 result = func(values, axis=axis, skipna=skipna, mask=mask, **kwargs)
    406 if datetimelike:

File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/nanops.py:720, in nanmean(values, axis, skipna, mask)
    719 the_sum = values.sum(axis, dtype=dtype_sum)
--> 720 the_sum = _ensure_numeric(the_sum)
    722 if axis is not None and getattr(the_sum, "ndim", False):

File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/nanops.py:1693, in _ensure_numeric(x)
   1691 if isinstance(x, str):
   1692     # GH#44008, GH#36703 avoid casting e.g. strings to numeric
-> 1693     raise TypeError(f"Could not convert string '{x}' to numeric")
   1694 try:

TypeError: Could not convert string '060606060606060606060606060606060606060606060606060606060606060606060606060606060606060606060606060606060606060606063232323232323232323232323232323232414141414141414141414141414141414141414141414141414141414141414141414141535353535353535353535353535353535353535353535353535353535353535353535353535353' to numeric

The above exception was the direct cause of the following exception:

TypeError                                 Traceback (most recent call last)
Cell In[38], line 2
      1 rmeans = (
----> 2     pci_df.assign(
      3         # Create column with region name for each county
      4         Region_Name=pci_df.Region.map(region_names)
      5     )
      6     .groupby(
      7         # Group counties by region name
      8         by="Region_Name"
      9         # Calculate mean by region and save only year columns
     10     )
     11     .mean()[years]
     12 )

File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/groupby/groupby.py:2375, in GroupBy.mean(self, numeric_only, engine, engine_kwargs)
   2368     return self._numba_agg_general(
   2369         grouped_mean,
   2370         executor.float_dtype_mapping,
   2371         engine_kwargs,
   2372         min_periods=0,
   2373     )
   2374 else:
-> 2375     result = self._cython_agg_general(
   2376         "mean",
   2377         alt=lambda x: Series(x).mean(numeric_only=numeric_only),
   2378         numeric_only=numeric_only,
   2379     )
   2380     return result.__finalize__(self.obj, method="groupby")

File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/groupby/groupby.py:1926, in GroupBy._cython_agg_general(self, how, alt, numeric_only, min_count, **kwargs)
   1923     result = self._agg_py_fallback(how, values, ndim=data.ndim, alt=alt)
   1924     return result
-> 1926 new_mgr = data.grouped_reduce(array_func)
   1927 res = self._wrap_agged_manager(new_mgr)
   1928 out = self._wrap_aggregated_output(res)

File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/internals/managers.py:1428, in BlockManager.grouped_reduce(self, func)
   1424 if blk.is_object:
   1425     # split on object-dtype blocks bc some columns may raise
   1426     #  while others do not.
   1427     for sb in blk._split():
-> 1428         applied = sb.apply(func)
   1429         result_blocks = extend_blocks(applied, result_blocks)
   1430 else:

File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/internals/blocks.py:366, in Block.apply(self, func, **kwargs)
    360 @final
    361 def apply(self, func, **kwargs) -> list[Block]:
    362     """
    363     apply the function to my values; return a block if we are not
    364     one
    365     """
--> 366     result = func(self.values, **kwargs)
    368     result = maybe_coerce_values(result)
    369     return self._split_op_result(result)

File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/groupby/groupby.py:1923, in GroupBy._cython_agg_general.<locals>.array_func(values)
   1920 else:
   1921     return result
-> 1923 result = self._agg_py_fallback(how, values, ndim=data.ndim, alt=alt)
   1924 return result

File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/groupby/groupby.py:1875, in GroupBy._agg_py_fallback(self, how, values, ndim, alt)
   1873     msg = f"agg function failed [how->{how},dtype->{ser.dtype}]"
   1874     # preserve the kind of exception that raised
-> 1875     raise type(err)(msg) from err
   1877 if ser.dtype == object:
   1878     res_values = res_values.astype(object, copy=False)

TypeError: agg function failed [how->mean,dtype->object]

The text was updated successfully, but these errors were encountered:

ljwolf · 2023-12-10T21:56:33Z

We will need to flip the select: .groupby(“Region_Name”)[years].mean() Get Outlook for iOS<https://aka.ms/o0ukef>

________________________________ From: Dani Arribas-Bel ***@***.***> Sent: Sunday, December 10, 2023 10:03:08 PM To: gdsbook/book ***@***.***> Cc: Subscribed ***@***.***> Subject: [gdsbook/book] Ch. 9 (inequality) bug (Issue #316) Using pandas version 2.1.1, Cell 38: rmeans = ( pci_df.assign( # Create column with region name for each county Region_Name=pci_df.Region.map(region_names) ) .groupby( # Group counties by region name by="Region_Name" # Calculate mean by region and save only year columns ) .mean()[years] ) Currently returns the following error:

--------------------------------------------------------------------------- TypeError Traceback (most recent call last) File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/groupby/groupby.py:1871, in GroupBy._agg_py_fallback(self, how, values, ndim, alt) 1870 try: -> 1871 res_values = self.grouper.agg_series(ser, alt, preserve_dtype=True) 1872 except Exception as err: File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/groupby/ops.py:850, in BaseGrouper.agg_series(self, obj, func, preserve_dtype) 848 preserve_dtype = True --> 850 result = self._aggregate_series_pure_python(obj, func) 852 npvalues = lib.maybe_convert_objects(result, try_float=False) File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/groupby/ops.py:871, in BaseGrouper._aggregate_series_pure_python(self, obj, func) 870 for i, group in enumerate(splitter): --> 871 res = func(group) 872 res = extract_result(res) File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/groupby/groupby.py:2377, in GroupBy.mean.<locals>.<lambda>(x) 2374 else: 2375 result = self._cython_agg_general( 2376 "mean", -> 2377 alt=lambda x: Series(x).mean(numeric_only=numeric_only), 2378 numeric_only=numeric_only, 2379 ) 2380 return result.__finalize__(self.obj, method="groupby") File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/series.py:6221, in Series.mean(self, axis, skipna, numeric_only, **kwargs) 6213 @doc(make_doc("mean", ndim=1)) 6214 def mean( 6215 self, (...) 6219 **kwargs, 6220 ): -> 6221 return NDFrame.mean(self, axis, skipna, numeric_only, **kwargs) File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/generic.py:11978, in NDFrame.mean(self, axis, skipna, numeric_only, **kwargs) 11971 def mean( 11972 self, 11973 axis: Axis | None = 0, (...) 11976 **kwargs, 11977 ) -> Series | float:

11978 return self._stat_function(

11979 "mean", nanops.nanmean, axis, skipna, numeric_only, **kwargs 11980 ) File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/generic.py:11935, in NDFrame._stat_function(self, name, func, axis, skipna, numeric_only, **kwargs) 11933 validate_bool_kwarg(skipna, "skipna", none_allowed=False)

11935 return self._reduce(

11936 func, name=name, axis=axis, skipna=skipna, numeric_only=numeric_only 11937 ) File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/series.py:6129, in Series._reduce(self, op, name, axis, skipna, numeric_only, filter_type, **kwds) 6125 raise TypeError( 6126 f"Series.{name} does not allow {kwd_name}={numeric_only} " 6127 "with non-numeric dtypes." 6128 ) -> 6129 return op(delegate, skipna=skipna, **kwds) File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/nanops.py:147, in bottleneck_switch.__call__.<locals>.f(values, axis, skipna, **kwds) 146 else: --> 147 result = alt(values, axis=axis, skipna=skipna, **kwds) 149 return result File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/nanops.py:404, in _datetimelike_compat.<locals>.new_func(values, axis, skipna, mask, **kwargs) 402 mask = isna(values) --> 404 result = func(values, axis=axis, skipna=skipna, mask=mask, **kwargs) 406 if datetimelike: File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/nanops.py:720, in nanmean(values, axis, skipna, mask) 719 the_sum = values.sum(axis, dtype=dtype_sum) --> 720 the_sum = _ensure_numeric(the_sum) 722 if axis is not None and getattr(the_sum, "ndim", False): File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/nanops.py:1693, in _ensure_numeric(x) 1691 if isinstance(x, str): 1692 # GH#44008, GH#36703 avoid casting e.g. strings to numeric -> 1693 raise TypeError(f"Could not convert string '{x}' to numeric") 1694 try: TypeError: Could not convert string '060606060606060606060606060606060606060606060606060606060606060606060606060606060606060606060606060606060606060606063232323232323232323232323232323232414141414141414141414141414141414141414141414141414141414141414141414141535353535353535353535353535353535353535353535353535353535353535353535353535353' to numeric The above exception was the direct cause of the following exception: TypeError Traceback (most recent call last) Cell In[38], line 2 1 rmeans = ( ----> 2 pci_df.assign( 3 # Create column with region name for each county 4 Region_Name=pci_df.Region.map(region_names) 5 ) 6 .groupby( 7 # Group counties by region name 8 by="Region_Name" 9 # Calculate mean by region and save only year columns 10 ) 11 .mean()[years] 12 ) File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/groupby/groupby.py:2375, in GroupBy.mean(self, numeric_only, engine, engine_kwargs) 2368 return self._numba_agg_general( 2369 grouped_mean, 2370 executor.float_dtype_mapping, 2371 engine_kwargs, 2372 min_periods=0, 2373 ) 2374 else: -> 2375 result = self._cython_agg_general( 2376 "mean", 2377 alt=lambda x: Series(x).mean(numeric_only=numeric_only), 2378 numeric_only=numeric_only, 2379 ) 2380 return result.__finalize__(self.obj, method="groupby") File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/groupby/groupby.py:1926, in GroupBy._cython_agg_general(self, how, alt, numeric_only, min_count, **kwargs) 1923 result = self._agg_py_fallback(how, values, ndim=data.ndim, alt=alt) 1924 return result -> 1926 new_mgr = data.grouped_reduce(array_func) 1927 res = self._wrap_agged_manager(new_mgr) 1928 out = self._wrap_aggregated_output(res) File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/internals/managers.py:1428, in BlockManager.grouped_reduce(self, func) 1424 if blk.is_object: 1425 # split on object-dtype blocks bc some columns may raise 1426 # while others do not. 1427 for sb in blk._split(): -> 1428 applied = sb.apply(func) 1429 result_blocks = extend_blocks(applied, result_blocks) 1430 else: File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/internals/blocks.py:366, in Block.apply(self, func, **kwargs) 360 @Final 361 def apply(self, func, **kwargs) -> list[Block]: 362 """ 363 apply the function to my values; return a block if we are not 364 one 365 """ --> 366 result = func(self.values, **kwargs) 368 result = maybe_coerce_values(result) 369 return self._split_op_result(result) File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/groupby/groupby.py:1923, in GroupBy._cython_agg_general.<locals>.array_func(values) 1920 else: 1921 return result -> 1923 result = self._agg_py_fallback(how, values, ndim=data.ndim, alt=alt) 1924 return result File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/groupby/groupby.py:1875, in GroupBy._agg_py_fallback(self, how, values, ndim, alt) 1873 msg = f"agg function failed [how->{how},dtype->{ser.dtype}]" 1874 # preserve the kind of exception that raised -> 1875 raise type(err)(msg) from err 1877 if ser.dtype == object: 1878 res_values = res_values.astype(object, copy=False) TypeError: agg function failed [how->mean,dtype->object] — Reply to this email directly, view it on GitHub<#316>, or unsubscribe<https://github.com/notifications/unsubscribe-auth/AARFR44AJ7DIOSGVD2BASCDYIYIPZAVCNFSM6AAAAABAOZM32SVHI2DSMVQWIX3LMV43ASLTON2WKOZSGAZTINJRGMZTGOA>. You are receiving this because you are subscribed to this thread.Message ID: ***@***.***>

darribas added the keepup Changes to keep up with the evolution of the python stack label Dec 10, 2023

darribas added a commit to darribas/geographic-data-science that referenced this issue Jan 5, 2024

Fix gdsbook#316 following @ljwolf suggestion

56499c1

darribas mentioned this issue Jan 5, 2024

Keep up fixes #318

Open

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Ch. 9 (inequality) bug #316

Ch. 9 (inequality) bug #316

darribas commented Dec 10, 2023

ljwolf commented Dec 10, 2023 via email

Ch. 9 (inequality) bug #316

Ch. 9 (inequality) bug #316

Comments

darribas commented Dec 10, 2023

ljwolf commented Dec 10, 2023 via email