I am reading a csv file that a column contains a multi keys dict. Here is an example:
import pandas as pd
df = pd.DataFrame({'a':[1,2,3], 'b':[{'AUS': {'arv': '10:00', 'vol': 5}, 'DAL': {'arv': '9:00', 'vol': 1}}, {'DAL': {'arv': '10:00', 'vol': 6}, 'NYU': {'arv': '10:00', 'vol': 3}}, {'DAL': {'arv': '8:00', 'vol': 6}, 'DAL': {'arv': '10:00', 'vol': 1}, 'GBD': {'arv': '12:00', 'vol': 1}}]})
What I am trying to do is perform a query on the column b of the above dataframe and return the corresponding values as presented in the following. However, I am trying to see if there is a more intuitive and more efficient way to perform similar operations in a large dataset without looping through the dict.
#convert column b of df to a dict
df_dict = df.b.to_dict()
print(df_dict)
{0: {'AUS': {'arv': '10:00', 'vol': 5}, 'DAL': {'arv': '9:00', 'vol': 1}}, 1: {'DAL': {'arv': '10:00', 'vol': 6}, 'NYU': {'arv': '10:00', 'vol': 3}}, 2: {'DAL': {'arv': '10:00', 'vol': 1}, 'GBD': {'arv': '12:00', 'vol': 1}}}
def get_value(my_str, my_time):
total = 0
for key in df_dict:
if my_str in df_dict[key].keys():
if df_dict[key].get(my_str).get('arv') == my_time:
total = total + df_dict[key].get(my_str).get('vol')
return total
print("total vol is at 10:00 is: ", get_value('DAL', '10:00'))
total vol is at 10:00 is: 7
question from:https://stackoverflow.com/questions/65943130/a-dictionary-in-a-pandas-dataframe-column-in-python