Skip to content

Commit

Permalink
Major correction : wrong epsilon strategy, my bad
Browse files Browse the repository at this point in the history
Epsilon strategy should have been set to epsilon-t = epsilon-initial(1-t/T)
But I set it to epsilon-t = epsilon-(t-1)(1-t/T), my bad
  • Loading branch information
Jogima-cyber committed Aug 31, 2020
1 parent 82628ae commit 853a102
Showing 1 changed file with 21 additions and 1 deletion.
22 changes: 21 additions & 1 deletion portfolio-manager.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -573,7 +573,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 58,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -928,6 +928,26 @@
},
"metadata": {},
"output_type": "display_data"
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-58-51223501ec3e>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 113\u001b[0m \u001b[0mpreprocessed_states\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwts_prime\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mactions\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrewards\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpreprocessed_next_states\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdones\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnext_pts\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnext_wts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0msimulation\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mfield_index\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0msimulation\u001b[0m \u001b[0;32min\u001b[0m \u001b[0msimulations\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mfield_index\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m8\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 114\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 115\u001b[0;31m \u001b[0mnext_Q_values\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtarget\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnext_wts\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpreprocessed_next_states\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpreprocessed_next_states\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpreprocessed_next_states\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# Q-values predicted for all next_states\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 116\u001b[0m \u001b[0mmax_next_Q_values_index\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnext_Q_values\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# for each next_states find index of max Q-value predicted by target\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 117\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py\u001b[0m in \u001b[0;36m_method_wrapper\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 86\u001b[0m raise ValueError('{} is not supported in multi-worker mode.'.format(\n\u001b[1;32m 87\u001b[0m method.__name__))\n\u001b[0;32m---> 88\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 89\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 90\u001b[0m return tf_decorator.make_decorator(\n",
"\u001b[0;32m/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, x, batch_size, verbose, steps, callbacks, max_queue_size, workers, use_multiprocessing)\u001b[0m\n\u001b[1;32m 1283\u001b[0m \u001b[0mcallbacks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_predict_batch_end\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m'outputs'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mbatch_outputs\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1284\u001b[0m \u001b[0mcallbacks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_predict_end\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1285\u001b[0;31m \u001b[0mall_outputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnest\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmap_structure_up_to\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbatch_outputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mconcat\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1286\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mtf_utils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_numpy_or_python_type\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mall_outputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1287\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/opt/conda/lib/python3.7/site-packages/tensorflow/python/util/nest.py\u001b[0m in \u001b[0;36mmap_structure_up_to\u001b[0;34m(shallow_tree, func, *inputs, **kwargs)\u001b[0m\n\u001b[1;32m 1116\u001b[0m \u001b[0;32mlambda\u001b[0m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;31m# Discards the path arg.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1117\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1118\u001b[0;31m **kwargs)\n\u001b[0m\u001b[1;32m 1119\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1120\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/opt/conda/lib/python3.7/site-packages/tensorflow/python/util/nest.py\u001b[0m in \u001b[0;36mmap_structure_with_tuple_paths_up_to\u001b[0;34m(shallow_tree, func, *inputs, **kwargs)\u001b[0m\n\u001b[1;32m 1212\u001b[0m in _yield_flat_up_to(shallow_tree, inputs[0], is_seq)]\n\u001b[1;32m 1213\u001b[0m results = [func(*args, **kwargs) for args in zip(flat_path_list,\n\u001b[0;32m-> 1214\u001b[0;31m *flat_value_lists)]\n\u001b[0m\u001b[1;32m 1215\u001b[0m return pack_sequence_as(structure=shallow_tree, flat_sequence=results,\n\u001b[1;32m 1216\u001b[0m expand_composites=expand_composites)\n",
"\u001b[0;32m/opt/conda/lib/python3.7/site-packages/tensorflow/python/util/nest.py\u001b[0m in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 1211\u001b[0m flat_path_list = [path for path, _\n\u001b[1;32m 1212\u001b[0m in _yield_flat_up_to(shallow_tree, inputs[0], is_seq)]\n\u001b[0;32m-> 1213\u001b[0;31m results = [func(*args, **kwargs) for args in zip(flat_path_list,\n\u001b[0m\u001b[1;32m 1214\u001b[0m *flat_value_lists)]\n\u001b[1;32m 1215\u001b[0m return pack_sequence_as(structure=shallow_tree, flat_sequence=results,\n",
"\u001b[0;32m/opt/conda/lib/python3.7/site-packages/tensorflow/python/util/nest.py\u001b[0m in \u001b[0;36m<lambda>\u001b[0;34m(_, *values)\u001b[0m\n\u001b[1;32m 1114\u001b[0m return map_structure_with_tuple_paths_up_to(\n\u001b[1;32m 1115\u001b[0m \u001b[0mshallow_tree\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1116\u001b[0;31m \u001b[0;32mlambda\u001b[0m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;31m# Discards the path arg.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1117\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1118\u001b[0m **kwargs)\n",
"\u001b[0;32m/opt/conda/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py\u001b[0m in \u001b[0;36mconcat\u001b[0;34m(tensors, axis)\u001b[0m\n\u001b[1;32m 1738\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtensors\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mragged_tensor\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mRaggedTensor\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1739\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mragged_concat_ops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconcat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1740\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0marray_ops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconcat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtensors\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1741\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1742\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/opt/conda/lib/python3.7/site-packages/tensorflow/python/util/dispatch.py\u001b[0m in \u001b[0;36mwrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 178\u001b[0m \u001b[0;34m\"\"\"Call target, and fall back on dispatchers if there is a TypeError.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 179\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 180\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mtarget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 181\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mTypeError\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 182\u001b[0m \u001b[0;31m# Note: convert_to_eager_tensor currently raises a ValueError, not a\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/opt/conda/lib/python3.7/site-packages/tensorflow/python/ops/array_ops.py\u001b[0m in \u001b[0;36mconcat\u001b[0;34m(values, axis, name)\u001b[0m\n\u001b[1;32m 1602\u001b[0m ops.convert_to_tensor(\n\u001b[1;32m 1603\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"concat_dim\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1604\u001b[0;31m dtype=dtypes.int32).get_shape().assert_has_rank(0)\n\u001b[0m\u001b[1;32m 1605\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0midentity\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1606\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mgen_array_ops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconcat_v2\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"source": [
Expand Down

0 comments on commit 853a102

Please sign in to comment.