Skip to content

Commit b268216

Browse files
author
Ubuntu
committed
add SSH refactor code2 for debugging the failed installation of tomcat9 in the 450 scale case
1 parent a793464 commit b268216

File tree

1 file changed

+82
-36
lines changed

1 file changed

+82
-36
lines changed

aws_boto3_modular_multi_processing/sequential_master_modules/install_tomcat_on_each_of_new_instances_ThreadPoolExecutor_z_REFACTORED_LOGGING_MAIN_LOGGING_SSH_REFACTOR.py

Lines changed: 82 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -901,68 +901,114 @@ def install_tomcat(ip, private_ip, instance_id):
901901

902902
## REFACTOR SSH 1:
903903

904-
for command in commands:
904+
# for command in commands:
905+
# for attempt in range(3):
906+
# try:
907+
# print(f"[DEBUG] Starting SSH command attempt {attempt + 1} on {ip}: {command}")
908+
#
909+
# stdin, stdout, stderr = ssh.exec_command(command, timeout=60)
910+
#
911+
# print(f"[DEBUG] Command sent: {command}")
912+
# print(f"[DEBUG] Waiting to read stdout...")
913+
# stdout_output = stdout.read().decode()
914+
# print(f"[DEBUG] Waiting to read stderr...")
915+
# stderr_output = stderr.read().decode()
916+
#
917+
# print(f"[DEBUG] Read complete for {ip}")
918+
# print(f"[INFO] Executing command: {command}")
919+
# print(f"[INFO] STDOUT length: {len(stdout_output)} chars")
920+
# print(f"[INFO] STDERR length: {len(stderr_output)} chars")
921+
# print(f"STDOUT: {stdout_output}")
922+
# print(f"STDERR: {stderr_output}")
923+
#
924+
# # Detect specific fatal Tomcat errors early
925+
# if "E: Package 'tomcat9' has no installation candidate" in stderr_output:
926+
# print(f"[ERROR] Fatal: No install candidate on {ip}")
927+
# ssh.close()
928+
# return ip, private_ip, False
929+
#
930+
# # Warning softener
931+
# if "WARNING:" in stderr_output:
932+
# print(f"[WARN] Non-fatal warning on {ip}: {stderr_output}")
933+
# stderr_output = ""
934+
#
935+
# # Catch any remaining stderr (actual failures)
936+
# if stderr_output.strip():
937+
# print(f"[ERROR] Command error output on {ip}: {stderr_output}")
938+
# ssh.close()
939+
# return ip, private_ip, False
940+
#
941+
# print(f"[DEBUG] Retrying command: {command} (Attempt {attempt + 1})")
942+
# time.sleep(20)
943+
#
944+
# except Exception as e:
945+
# print(f"[EXCEPTION] exec_command failed on {ip}: {e}")
946+
#
947+
# # Log partial output if available
948+
# try:
949+
# if stdout:
950+
# stdout_output = stdout.read().decode()
951+
# print(f"[EXCEPTION DEBUG] Partial STDOUT ({len(stdout_output)}): {stdout_output}")
952+
# if stderr:
953+
# stderr_output = stderr.read().decode()
954+
# print(f"[EXCEPTION DEBUG] Partial STDERR ({len(stderr_output)}): {stderr_output}")
955+
# except Exception as inner:
956+
# print(f"[EXCEPTION] Error reading from stdout/stderr after failure: {inner}")
957+
#
958+
# ssh.close()
959+
# return ip, private_ip, False
960+
#
961+
# finally:
962+
# if stdin: stdin.close()
963+
# if stdout: stdout.close()
964+
# if stderr: stderr.close()
965+
#
966+
#
967+
968+
969+
## REFACTOR SSH 2:
970+
971+
972+
from datetime import datetime
973+
974+
for idx, command in enumerate(commands):
905975
for attempt in range(3):
906976
try:
907-
print(f"[DEBUG] Starting SSH command attempt {attempt + 1} on {ip}: {command}")
908-
977+
print(f"[{ip}] [{datetime.now()}] Command {idx+1}/{len(commands)}: {command} (Attempt {attempt + 1})")
909978
stdin, stdout, stderr = ssh.exec_command(command, timeout=60)
910979

911-
print(f"[DEBUG] Command sent: {command}")
912-
print(f"[DEBUG] Waiting to read stdout...")
913980
stdout_output = stdout.read().decode()
914-
print(f"[DEBUG] Waiting to read stderr...")
915981
stderr_output = stderr.read().decode()
916982

917-
print(f"[DEBUG] Read complete for {ip}")
918-
print(f"[INFO] Executing command: {command}")
919-
print(f"[INFO] STDOUT length: {len(stdout_output)} chars")
920-
print(f"[INFO] STDERR length: {len(stderr_output)} chars")
921-
print(f"STDOUT: {stdout_output}")
922-
print(f"STDERR: {stderr_output}")
983+
print(f"[{ip}] [{datetime.now()}] STDOUT: '{stdout_output.strip()}'")
984+
print(f"[{ip}] [{datetime.now()}] STDERR: '{stderr_output.strip()}'")
923985

924-
# Detect specific fatal Tomcat errors early
925986
if "E: Package 'tomcat9' has no installation candidate" in stderr_output:
926-
print(f"[ERROR] Fatal: No install candidate on {ip}")
987+
print(f"[{ip}] [{datetime.now()}] ❌ Package install failure. Exiting early.")
927988
ssh.close()
928989
return ip, private_ip, False
929990

930-
# Warning softener
931991
if "WARNING:" in stderr_output:
932-
print(f"[WARN] Non-fatal warning on {ip}: {stderr_output}")
992+
print(f"[{ip}] [{datetime.now()}] ⚠️ Warning ignored: {stderr_output.strip()}")
933993
stderr_output = ""
934994

935-
# Catch any remaining stderr (actual failures)
936995
if stderr_output.strip():
937-
print(f"[ERROR] Command error output on {ip}: {stderr_output}")
996+
print(f"[{ip}] [{datetime.now()}] ❌ Non-warning error output. Command failed.")
938997
ssh.close()
939998
return ip, private_ip, False
940999

941-
print(f"[DEBUG] Retrying command: {command} (Attempt {attempt + 1})")
1000+
print(f"[{ip}] [{datetime.now()}] ✅ Command succeeded.")
9421001
time.sleep(20)
9431002

9441003
except Exception as e:
945-
print(f"[EXCEPTION] exec_command failed on {ip}: {e}")
946-
947-
# Log partial output if available
948-
try:
949-
if stdout:
950-
stdout_output = stdout.read().decode()
951-
print(f"[EXCEPTION DEBUG] Partial STDOUT ({len(stdout_output)}): {stdout_output}")
952-
if stderr:
953-
stderr_output = stderr.read().decode()
954-
print(f"[EXCEPTION DEBUG] Partial STDERR ({len(stderr_output)}): {stderr_output}")
955-
except Exception as inner:
956-
print(f"[EXCEPTION] Error reading from stdout/stderr after failure: {inner}")
957-
1004+
print(f"[{ip}] [{datetime.now()}] 💥 Exception during exec_command: {e}")
9581005
ssh.close()
9591006
return ip, private_ip, False
9601007

9611008
finally:
962-
if stdin: stdin.close()
963-
if stdout: stdout.close()
964-
if stderr: stderr.close()
965-
1009+
stdin.close()
1010+
stdout.close()
1011+
stderr.close()
9661012

9671013

9681014
ssh.close()

0 commit comments

Comments
 (0)