Reference: RL Model Code#

This page is a reference for the code used to generate the figures in the previous results page.

This is included largely for reproducibility so that others can generate similar results; it also provides more fine-grained insight into the model implementation.

Initialization#

# Run dependencies
%run ./RL_model_python_lib_utils.ipynb
%run ./RL_model_python_lib_reward.ipynb
%run ./RL_model_python_lib_decision_functions.ipynb
%run ./python_lib_visualization.ipynb
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
File /var/folders/vc/0d4071bd7rbd6q1btkk05jcw0000gn/T/ipykernel_3289/3777295251.py:10, in <cell line: 10>()
      8 import pandas as pd
      9 from scipy import stats
---> 10 import seaborn as sns
     12 from utils import N_ROUNDS
     15 def groupby_f_data(f_data, colname, bins):

ModuleNotFoundError: No module named 'seaborn'
---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
Input In [1], in <cell line: 5>()
      3 get_ipython().run_line_magic('run', './RL_model_python_lib_reward.ipynb')
      4 get_ipython().run_line_magic('run', './RL_model_python_lib_decision_functions.ipynb')
----> 5 get_ipython().run_line_magic('run', './python_lib_visualization.ipynb')

File /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/IPython/core/interactiveshell.py:2305, in InteractiveShell.run_line_magic(self, magic_name, line, _stack_depth)
   2303     kwargs['local_ns'] = self.get_local_scope(stack_depth)
   2304 with self.builtin_trap:
-> 2305     result = fn(*args, **kwargs)
   2306 return result

File /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/IPython/core/magics/execution.py:717, in ExecutionMagics.run(self, parameter_s, runner, file_finder)
    715     with preserve_keys(self.shell.user_ns, '__file__'):
    716         self.shell.user_ns['__file__'] = filename
--> 717         self.shell.safe_execfile_ipy(filename, raise_exceptions=True)
    718     return
    720 # Control the response to exit() calls made by the script being run

File /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/IPython/core/interactiveshell.py:2811, in InteractiveShell.safe_execfile_ipy(self, fname, shell_futures, raise_exceptions)
   2809 result = self.run_cell(cell, silent=True, shell_futures=shell_futures)
   2810 if raise_exceptions:
-> 2811     result.raise_error()
   2812 elif not result.success:
   2813     break

File /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/IPython/core/interactiveshell.py:251, in ExecutionResult.raise_error(self)
    249     raise self.error_before_exec
    250 if self.error_in_exec is not None:
--> 251     raise self.error_in_exec

    [... skipping hidden 1 frame]

File /var/folders/vc/0d4071bd7rbd6q1btkk05jcw0000gn/T/ipykernel_3289/3777295251.py:10, in <cell line: 10>()
      8 import pandas as pd
      9 from scipy import stats
---> 10 import seaborn as sns
     12 from utils import N_ROUNDS
     15 def groupby_f_data(f_data, colname, bins):

ModuleNotFoundError: No module named 'seaborn'
# Read data
df = read_rps_data(os.path.join("data", DEFAULT_FILE))

# Add opponent move column
separated = separate_df(df)
for e in separated:
    get_opponent_move(e)
df = pd.concat(separated)

Null Model: Reward Learning from Move Baserates#

df_a = add_col(df, ['rock_reward', 'paper_reward','scissors_reward',], value =0)
separated = separate_df(df_a)
for e in separated:
    human_reward_move(e)
df_a = pd.concat(separated)
soft_dist = get_softmax_probabilities(
    df_a, # df should be just human rows at this point, strip out nans etc. 
    ['rock_reward', 'paper_reward', 'scissors_reward']
)
/opt/anaconda3/lib/python3.9/site-packages/pandas/core/arraylike.py:364: RuntimeWarning: overflow encountered in exp
  result = getattr(ufunc, method)(*inputs, **kwargs)
df_a = pick_move(df_a, soft_dist)
df_a=df_a[df_a['is_bot']==0]
df_a=assign_agent_outcomes(df_a)
f_a = groupby_f_data(df_a, 'agent_outcome', bins=60)
f_a = f_a[f_a['bin']<='50']
plot_win_rates(f_a[f_a['agent_outcome']=='win']) # NB: add a filename argument to save the figure locally
/var/folders/tm/sjjwcmbs3250mhfs5psrzf5w0000gn/T/ipykernel_58289/3777295251.py:22: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  modified_f_data['bin'] = pd.cut(modified_f_data.loc[:, ('round_index')], bins, labels = labs)
/var/folders/tm/sjjwcmbs3250mhfs5psrzf5w0000gn/T/ipykernel_58289/3777295251.py:46: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['bot_strategy'] = data['bot_strategy'].replace([
/var/folders/tm/sjjwcmbs3250mhfs5psrzf5w0000gn/T/ipykernel_58289/3777295251.py:59: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['bin'] = data['bin'].replace(['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'], ['100', '200', '300', '400', '500', '600', '700', '800', '900', '1000'])
<AxesSubplot:xlabel='Trial round', ylabel='Mean win percentage'>
_images/RLModel_code_10_2.png

Transition Model: Reward Learning from Previous Moves#

separated = separate_df(df)
for e in separated:
    human_reward_past_cur_move(e)
df_b = pd.concat(separated)
separated = separate_df(df_b)
df_result_b = pd.DataFrame()
for e in separated:
    e = get_softmax_probabilities_3b(e)
    e=pick_move_v2(e)
    e['agent_outcome'] = e.apply(lambda x: evaluate_outcome(x['agent_move'], x['opponent_move']), axis=1)
    df_result_b=pd.concat([df_result_b,e],axis=0)
f_b = groupby_f_data(df_result_b, 'agent_outcome', bins=60)
f_b = f_b[f_b['bin']<='50']
plot_win_rates(f_b[f_b['agent_outcome']=='win']) # NB: add a filename argument to save the figure locally
/var/folders/tm/sjjwcmbs3250mhfs5psrzf5w0000gn/T/ipykernel_58289/3777295251.py:22: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  modified_f_data['bin'] = pd.cut(modified_f_data.loc[:, ('round_index')], bins, labels = labs)
/var/folders/tm/sjjwcmbs3250mhfs5psrzf5w0000gn/T/ipykernel_58289/3777295251.py:46: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['bot_strategy'] = data['bot_strategy'].replace([
/var/folders/tm/sjjwcmbs3250mhfs5psrzf5w0000gn/T/ipykernel_58289/3777295251.py:59: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['bin'] = data['bin'].replace(['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'], ['100', '200', '300', '400', '500', '600', '700', '800', '900', '1000'])
<AxesSubplot:xlabel='Trial round', ylabel='Mean win percentage'>
_images/RLModel_code_14_2.png

Transition Model: Reward Learning from Opponent Previous Moves#

# separate df into same game id
separated = separate_df(df)
for e in separated:
    human_reward_oppo_past_cur_move(e)
df_c = pd.concat(separated)
separated = separate_df(df_c)
df_result_c = pd.DataFrame()
# align results from the generaed agent move and opponent move
for e in separated:
    e = get_softmax_probabilities_3c(e)
    e=pick_move_v2(e)
    e['agent_outcome'] = e.apply(lambda x: evaluate_outcome(x['agent_move'], x['opponent_move']), axis=1)
    df_result_c=pd.concat([df_result_c,e],axis=0)
f_c = groupby_f_data(df_result_c, 'agent_outcome', bins=60)
f_c = f_c[f_c['bin']<='50']
plot_win_rates(f_c[f_c['agent_outcome']=='win']) # NB: add a filename argument to save the figure locally
/var/folders/tm/sjjwcmbs3250mhfs5psrzf5w0000gn/T/ipykernel_58289/3777295251.py:22: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  modified_f_data['bin'] = pd.cut(modified_f_data.loc[:, ('round_index')], bins, labels = labs)
/var/folders/tm/sjjwcmbs3250mhfs5psrzf5w0000gn/T/ipykernel_58289/3777295251.py:46: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['bot_strategy'] = data['bot_strategy'].replace([
/var/folders/tm/sjjwcmbs3250mhfs5psrzf5w0000gn/T/ipykernel_58289/3777295251.py:59: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['bin'] = data['bin'].replace(['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'], ['100', '200', '300', '400', '500', '600', '700', '800', '900', '1000'])
<AxesSubplot:xlabel='Trial round', ylabel='Mean win percentage'>
_images/RLModel_code_18_2.png

Combined Transition Model: Reward Learning from Self and Opponent Previous Moves#

df=df.replace('none',np.NaN)
separated = separate_df(df)
for e in separated:
    oppo_past_human_past_cur_move(e)
df_combine = pd.concat(separated)
separated = separate_df(df_combine)
df_result_combined = pd.DataFrame()
for e in separated:
    e = get_softmax_probabilities_combined(e)
    e = pick_move_v2(e)
    e['agent_outcome'] = e.apply(lambda x: evaluate_outcome(x['agent_move'], x['opponent_move']), axis=1)
    df_result_combined = pd.concat([df_result_combined,e],axis=0)
f_combined = groupby_f_data(df_result_combined, 'agent_outcome', bins=60)
f_combined = f_combined[f_combined['bin']<='50']

plot_win_rates(f_combined[f_combined['agent_outcome']=='win']) # NB: add a filename argument to save the figure locally
/var/folders/tm/sjjwcmbs3250mhfs5psrzf5w0000gn/T/ipykernel_58289/3777295251.py:22: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  modified_f_data['bin'] = pd.cut(modified_f_data.loc[:, ('round_index')], bins, labels = labs)
/var/folders/tm/sjjwcmbs3250mhfs5psrzf5w0000gn/T/ipykernel_58289/3777295251.py:46: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['bot_strategy'] = data['bot_strategy'].replace([
/var/folders/tm/sjjwcmbs3250mhfs5psrzf5w0000gn/T/ipykernel_58289/3777295251.py:59: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['bin'] = data['bin'].replace(['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'], ['100', '200', '300', '400', '500', '600', '700', '800', '900', '1000'])
<AxesSubplot:xlabel='Trial round', ylabel='Mean win percentage'>
_images/RLModel_code_22_2.png

Disjunctive Transition Model: Learning Separately from Self and Opponent Previous Moves#

separated_agent_past = separate_df(df_b)
separated_oppo_past=separate_df(df_c)
df_result_mix = pd.DataFrame()
count=0
for i in range(len(separated_oppo_past)):
    e=get_softmax_probabilities_mix(separated_agent_past[i], separated_oppo_past[i])
    e=pick_move_v2(e)
    e['agent_outcome'] = e.apply(lambda x: evaluate_outcome(x['agent_move'], x['opponent_move']), axis=1)
    df_result_mix=pd.concat([df_result_mix, e], axis=0)
f_mix = groupby_f_data(df_result_mix, 'agent_outcome', bins=60)
f_mix = f_mix[f_mix['bin']<='50']
plot_win_rates(f_mix[f_mix['agent_outcome']=='win']) # NB: add a filename argument to save the figure locally