Programming
dask
Updated Mon, 24 Oct 2022 05:30:16 GMT

dask: using compute() raised error 'apply() got an unexpected keyword argument 'how''?


dask 2.15.0 , pandas 1.0.3

unknown_dict = dict(map(lambda columnName:(columnName,'unknown'),columns_to_clean))
my_dataframe = my_dataframe.fillna(unknown_dict)
#no problem before compute
with ProgressBar():
       print(my_dataframe.isnull().sum().compute()
my_dataframe.persist()

raised the following error message:

TypeError Traceback (most recent call last) in 1 with ProgressBar(): ----> 2 print(my_dataframe.isnull().sum().compute()) 3 my_dataframe.persist()

C:\Users\KHS\lib\site-packages\dask\base.py in compute(self, **kwargs) 164 dask.base.compute 165 """ --> 166 (result,) = compute(self, traverse=False, **kwargs) 167 return result 168

C:\Users\KHS\lib\site-packages\dask\base.py in compute(*args, **kwargs) 435 keys = [x.dask_keys() for x in collections] 436 postcomputes = [x.dask_postcompute() for x in collections] --> 437 results = schedule(dsk, keys, **kwargs) 438 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)]) 439

C:\Users\KHS\lib\site-packages\dask\threaded.py in get(dsk, result, cache, num_workers, pool, **kwargs) 82 get_id=_thread_get_id, 83 pack_exception=pack_exception, ---> 84 **kwargs 85 ) 86

C:\Users\KHS\lib\site-packages\dask\local.py in get_async(apply_async, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, **kwargs) 484 _execute_task(task, data) # Re-execute locally 485 else: --> 486 raise_exception(exc, tb) 487 res, worker_id = loads(res_info) 488 state["cache"][key] = res

C:\Users\KHS\lib\site-packages\dask\local.py in reraise(exc, tb) 314 if exc.traceback is not tb: 315 raise exc.with_traceback(tb) --> 316 raise exc 317 318

C:\Users\KHS\lib\site-packages\dask\local.py in execute_task(key, task_info, dumps, loads, get_id, pack_exception) 220 try: 221 task, data = loads(task_info) --> 222 result = _execute_task(task, data) 223 id = get_id() 224 result = dumps((result, id))

C:\Users\KHS\lib\site-packages\dask\core.py in _execute_task(arg, cache, dsk) 119 # temporaries by their reference count and can execute certain 120 # operations in-place. --> 121 return func(*(_execute_task(a, cache) for a in args)) 122 elif not ishashable(arg): 123 return arg

C:\Users\KHS\lib\site-packages\dask\core.py in (.0) 119 # temporaries by their reference count and can execute certain 120 # operations in-place. --> 121 return func(*(_execute_task(a, cache) for a in args)) 122 elif not ishashable(arg): 123 return arg

C:\Users\KHS\lib\site-packages\dask\core.py in _execute_task(arg, cache, dsk) 113 """ 114 if isinstance(arg, list): --> 115 return [_execute_task(a, cache) for a in arg] 116 elif istask(arg): 117 func, args = arg[0], arg[1:]

C:\Users\KHS\lib\site-packages\dask\core.py in (.0) 113 """ 114 if isinstance(arg, list): --> 115 return [_execute_task(a, cache) for a in arg] 116 elif istask(arg): 117 func, args = arg[0], arg[1:]

C:\Users\KHS\lib\site-packages\dask\core.py in _execute_task(arg, cache, dsk) 119 # temporaries by their reference count and can execute certain 120 # operations in-place. --> 121 return func(*(_execute_task(a, cache) for a in args)) 122 elif not ishashable(arg): 123 return arg

C:\Users\KHS\lib\site-packages\dask\optimization.py in call(self, *args) 989 if not len(args) == len(self.inkeys): 990 raise ValueError("Expected %d args, got %d" % (len(self.inkeys), len(args))) --> 991 return core.get(self.dsk, self.outkey, dict(zip(self.inkeys, args))) 992 993 def reduce(self):

C:\Users\KHS\lib\site-packages\dask\core.py in get(dsk, out, cache) 149 for key in toposort(dsk): 150 task = dsk[key] --> 151 result = _execute_task(task, cache) 152 cache[key] = result 153 result = _execute_task(out, cache)

C:\Users\KHS\lib\site-packages\dask\core.py in _execute_task(arg, cache, dsk) 119 # temporaries by their reference count and can execute certain 120 # operations in-place. --> 121 return func(*(_execute_task(a, cache) for a in args)) 122 elif not ishashable(arg): 123 return arg

C:\Users\KHS\lib\site-packages\dask\core.py in (.0) 119 # temporaries by their reference count and can execute certain 120 # operations in-place. --> 121 return func(*(_execute_task(a, cache) for a in args)) 122 elif not ishashable(arg): 123 return arg

C:\Users\KHS\lib\site-packages\dask\core.py in _execute_task(arg, cache, dsk) 119 # temporaries by their reference count and can execute certain 120 # operations in-place. --> 121 return func(*(_execute_task(a, cache) for a in args)) 122 elif not ishashable(arg): 123 return arg

C:\Users\KHS\lib\site-packages\dask\core.py in (.0) 119 # temporaries by their reference count and can execute certain 120 # operations in-place. --> 121 return func(*(_execute_task(a, cache) for a in args)) 122 elif not ishashable(arg): 123 return arg

C:\Users\KHS\lib\site-packages\dask\core.py in _execute_task(arg, cache, dsk) 113 """ 114 if isinstance(arg, list): --> 115 return [_execute_task(a, cache) for a in arg] 116 elif istask(arg): 117 func, args = arg[0], arg[1:]

C:\Users\KHS\lib\site-packages\dask\core.py in (.0) 113 """ 114 if isinstance(arg, list): --> 115 return [_execute_task(a, cache) for a in arg] 116 elif istask(arg): 117 func, args = arg[0], arg[1:]

C:\Users\KHS\lib\site-packages\dask\core.py in _execute_task(arg, cache, dsk) 119 # temporaries by their reference count and can execute certain 120 # operations in-place. --> 121 return func(*(_execute_task(a, cache) for a in args)) 122 elif not ishashable(arg): 123 return arg

C:\Users\KHS\lib\site-packages\dask\utils.py in apply(func, args, kwargs) 28 def apply(func, args, kwargs=None): 29 if kwargs: ---> 30 return func(*args, **kwargs) 31 else: 32 return func(*args)

TypeError: apply() got an unexpected keyword argument 'how'




Solution

Thank you 'quasiben', I've tried minimal code to reproduce the error, which was not possible.

However, reducing the dataset to 1 fourth its original size worked on my laptop. So I guess it has to do with knowing the threshold and limits of my system. Thank you.