Skip to content

Commit

Permalink
Fix admin logout issue when overseer is dead (NVIDIA#628)
Browse files Browse the repository at this point in the history
Add NVFL_OVERSEER_HEARTBEAT_TIMEOUT env var
  • Loading branch information
IsaacYangSLA authored Jun 2, 2022
1 parent 1ce7209 commit 22df68b
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 2 deletions.
8 changes: 7 additions & 1 deletion nvflare/ha/overseer/overseer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import os
from datetime import datetime

from flask import jsonify, request
Expand All @@ -29,6 +30,11 @@
)

privilege_dict = load_privilege()
heartbeat_timeout = os.environ.get("NVFL_OVERSEER_HEARTBEAT_TIMEOUT", "10")
try:
heartbeat_timeout = int(heartbeat_timeout)
except BaseException:
heartbeat_timeout = 10


@app.route("/api/v1/heartbeat", methods=["GET", "POST"])
Expand All @@ -40,7 +46,7 @@ def heartbeat():
if project is None or role is None:
return jsonify({"Error": "project and role must be provided"})
now = datetime.utcnow()
update_sp_state(project, now)
update_sp_state(project, now, heartbeat_timeout=heartbeat_timeout)
if role == "server":
sp_end_point = req.get("sp_end_point")
if sp_end_point is None:
Expand Down
1 change: 1 addition & 0 deletions nvflare/ha/overseer_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ def resume(self):
self._flag.set()

def end(self):
self._asked_to_stop_retrying = True
self._flag.set()
self._asked_to_exit = True
self._report_and_query.join()
Expand Down
2 changes: 1 addition & 1 deletion nvflare/lighter/impl/master_template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ log_config: |
start_ovsr_sh: |
#!/usr/bin/env bash
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
AUTHZ_FILE=$DIR/privilege.yml gunicorn -c $DIR/gunicorn.conf.py --keyfile $DIR/overseer.key --certfile $DIR/overseer.crt --ca-certs $DIR/rootCA.pem
NVFL_OVERSEER_HEARTBEAT_TIMEOUT=10 AUTHZ_FILE=$DIR/privilege.yml gunicorn -c $DIR/gunicorn.conf.py --keyfile $DIR/overseer.key --certfile $DIR/overseer.crt --ca-certs $DIR/rootCA.pem
start_cln_sh: |
#!/usr/bin/env bash
Expand Down

0 comments on commit 22df68b

Please sign in to comment.