Skip to content

Saver Issue in step 5 #5

Open
Open
@hirenpatelatl

Description

Ran into this error in step 5 when trying to run the distributed workbook.
NotFoundErrorTraceback (most recent call last)
in ()
52 last_report_step = step
53 average_loss_total = 0
---> 54 saver.save(session, save_dir_prefix, global_step=step)
55
56 except tf.errors.UnavailableError as e:

/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/saver.pyc in save(self, sess, save_path, global_step, latest_filename, meta_graph_suffix, write_meta_graph)
1035 model_checkpoint_path = sess.run(
1036 self.saver_def.save_tensor_name,
-> 1037 {self.saver_def.filename_tensor_name: checkpoint_file})
1038 model_checkpoint_path = compat.as_str(model_checkpoint_path)
1039 self._MaybeDeleteOldCheckpoints(model_checkpoint_path,

/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.pyc in run(self, fetches, feed_dict, options, run_metadata)
338 try:
339 result = self._run(None, fetches, feed_dict, options_ptr,
--> 340 run_metadata_ptr)
341 if run_metadata:
342 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.pyc in _run(self, handle, fetches, feed_dict, options, run_metadata)
562 try:
563 results = self._do_run(handle, target_list, unique_fetches,
--> 564 feed_dict_string, options, run_metadata)
565 finally:
566 # The movers are no longer used. Delete them.

/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.pyc in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
635 if handle is None:
636 return self._do_call(_run_fn, self._session, feed_dict, fetch_list,
--> 637 target_list, options, run_metadata)
638 else:
639 return self._do_call(_prun_fn, self._session, handle, feed_dict,

/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.pyc in _do_call(self, fn, *args)
657 # pylint: disable=protected-access
658 raise errors._make_specific_exception(node_def, op, error_message,
--> 659 e.code)
660 # pylint: enable=protected-access
661

NotFoundError: /var/log/checkpoints/word2vec/1464465701/model-67.tempstate14588595220314914492
[[Node: save/save = SaveSlices[T=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_INT32], _device="/job:master/replica:0/task:0/cpu:0"](_recv_save/Const_0, save/save/tensor_names, save/save/shapes_and_slices, Variable_S1165, Variable_1_S1167, Variable_2_S1169, Variable_3_S1171, Variable_4_S1173, Variable_5_S1175, Variable_6_S1177)]]
Caused by op u'save/save', defined at:
File "/usr/lib/python2.7/runpy.py", line 162, in _run_module_as_main
"main", fname, loader, pkg_name)
File "/usr/lib/python2.7/runpy.py", line 72, in _run_code
exec code in run_globals
File "/usr/local/lib/python2.7/dist-packages/ipykernel/main.py", line 3, in
app.launch_new_instance()
File "/usr/local/lib/python2.7/dist-packages/traitlets/config/application.py", line 596, in launch_instance
app.start()
File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelapp.py", line 442, in start
ioloop.IOLoop.instance().start()
File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/ioloop.py", line 162, in start
super(ZMQIOLoop, self).start()
File "/usr/local/lib/python2.7/dist-packages/tornado/ioloop.py", line 883, in start
handler_func(fd_obj, events)
File "/usr/local/lib/python2.7/dist-packages/tornado/stack_context.py", line 275, in null_wrapper
return fn(_args, *_kwargs)
File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
self._handle_recv()
File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
self._run_callback(callback, msg)
File "/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
callback(_args, *_kwargs)
File "/usr/local/lib/python2.7/dist-packages/tornado/stack_context.py", line 275, in null_wrapper
return fn(_args, *_kwargs)
File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py", line 276, in dispatcher
return self.dispatch_shell(stream, msg)
File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py", line 228, in dispatch_shell
handler(stream, idents, msg)
File "/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py", line 391, in execute_request
user_expressions, allow_stdin)
File "/usr/local/lib/python2.7/dist-packages/ipykernel/ipkernel.py", line 199, in do_execute
shell.run_cell(code, store_history=store_history, silent=silent)
File "/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 2723, in run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 2825, in run_ast_nodes
if self.run_code(code, result):
File "/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py", line 2885, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "", line 56, in
saver = tf.train.Saver(tf.all_variables())
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/saver.py", line 832, in init
restore_sequentially=restore_sequentially)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/saver.py", line 500, in build
save_tensor = self._AddSaveOps(filename_tensor, vars_to_save)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/saver.py", line 197, in _AddSaveOps
save = self.save_op(filename_tensor, vars_to_save)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/training/saver.py", line 149, in save_op
tensor_slices=[vs.slice_spec for vs in vars_to_save])
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/io_ops.py", line 172, in _save
tensors, name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_io_ops.py", line 341, in _save_slices
name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/op_def_library.py", line 661, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2154, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1154, in init
self._traceback = _extract_stack()

Activity

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions