I want to replace the empty values in the dataframe using random already existing values, while maintaining the weights so that the correlation does not suffer and the data is not lost.
def nan_fill_random(column_name, nan):
for i in range(len(column_name)):
if column_name[i] == nan:
column_name[i] = random.choice(column_name[column_name != nan])
else:
continue
I wrote a function, but it periodically throws a KeyError
: and the value has different numbers, I assume indexes. Also, when you restart the cell, it can either disappear or be updated.
nan_fill_random(data['education'], 'unknown')
Here is the error
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
W:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3360 try:
-> 3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
W:\ProgramData\Anaconda3\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
W:\ProgramData\Anaconda3\lib\site-packages\pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.Int64HashTable.get_item()
KeyError: 14563
The above exception was the direct cause of the following exception:
KeyError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_4720/2723938638.py in <module>
----> 1 nan_fill_random(data['education'], 'unknown')
~\AppData\Local\Temp/ipykernel_4720/1980306790.py in nan_fill_random(column_name, nan)
2 for i in range(len(column_name)):
3 if column_name[i] == nan:
----> 4 column_name[i] = random.choice(column_name[column_name != nan])
5 else:
6 continue
W:\ProgramData\Anaconda3\lib\random.py in choice(self, seq)
344 """Choose a random element from a non-empty sequence."""
345 # raises IndexError if seq is empty
--> 346 return seq[self._randbelow(len(seq))]
347
348 def shuffle(self, x, random=None):
W:\ProgramData\Anaconda3\lib\site-packages\pandas\core\series.py in __getitem__(self, key)
940
941 elif key_is_scalar:
--> 942 return self._get_value(key)
943
944 if is_hashable(key):
W:\ProgramData\Anaconda3\lib\site-packages\pandas\core\series.py in _get_value(self, label, takeable)
1049
1050 # Similar to Index.get_value, but we do not fall back to positional
-> 1051 loc = self.index.get_loc(label)
1052 return self.index._get_values_for_loc(self, loc, label)
1053
W:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
3361 return self._engine.get_loc(casted_key)
3362 except KeyError as err:
-> 3363 raise KeyError(key) from err
3364
3365 if is_scalar(key) and isna(key) and not self.hasnans:
KeyError: 14563
Aucun commentaire:
Enregistrer un commentaire