Skip to content

Commit

Permalink
[ACR] Use client-side logical time for log polling (Azure#7099)
Browse files Browse the repository at this point in the history
* Change ACR Build's client log polling logic to be strictly client-side and to be logical time instead of physical time
  • Loading branch information
ehotinger authored and troydai committed Aug 21, 2018
1 parent c75d9a8 commit cae9597
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 27 deletions.
1 change: 1 addition & 0 deletions src/command_modules/azure-cli-acr/HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ Release History
+++++
* Provide a workaround for runtime operations without ARM requests.
* Exclude version control files (eg, .git, .gitignore) from uploaded tar by default in build command.
* Minor fixes

2.1.3
+++++
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import time
import os
from random import uniform
from datetime import datetime
from io import BytesIO
import tempfile
import tarfile
Expand All @@ -16,7 +15,6 @@
import colorama
from knack.log import get_logger
from knack.util import CLIError
from msrest.serialization import TZ_UTC
from msrestazure.azure_exceptions import CloudError
from azure.common import AzureHttpError
from azure.cli.core.commands import LongRunningOperation
Expand Down Expand Up @@ -86,11 +84,11 @@ def _stream_logs(byte_size, # pylint: disable=too-many-locals, too-many-stateme
start = 0
end = byte_size - 1
available = 0
last_modified = None
sleep_time = 1
max_sleep_time = 15
num_fails = 0
num_fails_for_backoff = 3
consecutive_sleep_in_sec = 0

# Try to get the initial properties so there's no waiting.
# If the storage call fails, we'll just sleep and try again after.
Expand All @@ -99,16 +97,15 @@ def _stream_logs(byte_size, # pylint: disable=too-many-locals, too-many-stateme
container_name=container_name, blob_name=blob_name)
metadata = props.metadata
available = props.properties.content_length
last_modified = props.properties.last_modified
except (AttributeError, AzureHttpError):
pass

while (_blob_is_not_complete(metadata) or start < available):

while start < available:
# Success! Reset our polling backoff.
sleep_time = 1
num_fails = 0
consecutive_sleep_in_sec = 0

try:
old_byte_size = len(stream.getvalue())
Expand All @@ -134,7 +131,6 @@ def _stream_logs(byte_size, # pylint: disable=too-many-locals, too-many-stateme
stream.write(curr_bytes[i + 1:])
print(flush.decode('utf-8', errors='ignore'))
break

except AzureHttpError as ae:
if ae.status_code != 404:
raise CLIError(ae)
Expand All @@ -149,8 +145,6 @@ def _stream_logs(byte_size, # pylint: disable=too-many-locals, too-many-stateme
container_name=container_name, blob_name=blob_name)
metadata = props.metadata
available = props.properties.content_length
last_modified = props.properties.last_modified

except AzureHttpError as ae:
if ae.status_code != 404:
raise CLIError(ae)
Expand All @@ -161,38 +155,33 @@ def _stream_logs(byte_size, # pylint: disable=too-many-locals, too-many-stateme
except Exception as err:
raise CLIError(err)

# If we're still expecting data and we have a record for the last
# modified date and the last modified date has timed out, exit
if ((last_modified is not None and _blob_is_not_complete(metadata)) or start < available):

delta = datetime.utcnow().replace(tzinfo=TZ_UTC) - last_modified

if delta.seconds > timeout_in_seconds:
# Flush anything remaining in the buffer - this would be the case
# if the file has expired and we weren't able to detect any \r\n
curr_bytes = stream.getvalue()

if curr_bytes:
print(curr_bytes.decode('utf-8', errors='ignore'))
if consecutive_sleep_in_sec > timeout_in_seconds:
# Flush anything remaining in the buffer - this would be the case
# if the file has expired and we weren't able to detect any \r\n
curr_bytes = stream.getvalue()
if curr_bytes:
print(curr_bytes.decode('utf-8', errors='ignore'))

logger.warning("No additional logs found. Timing out...")
return
logger.warning("Failed to find any new logs in %d seconds. Client will stop polling for additional logs.",
consecutive_sleep_in_sec)
return

# If no new data available but not complete, sleep before trying to process additional data.
if (_blob_is_not_complete(metadata) and start >= available):
num_fails += 1

logger.debug("Failed to find new content '%s' times in a row", num_fails)
logger.debug("Failed to find new content %d times in a row", num_fails)
if num_fails >= num_fails_for_backoff:
num_fails = 0
sleep_time = min(sleep_time * 2, max_sleep_time)
logger.debug("Resetting failure count to '%s'", num_fails)
logger.debug("Resetting failure count to %d", num_fails)

# 1.0 <= x < 2.0
rnd = uniform(1, 2)
total_sleep_time = sleep_time + rnd
logger.debug("Base sleep time: '%s' random delay: '%s' total: '%s' seconds",
sleep_time, rnd, total_sleep_time)
consecutive_sleep_in_sec += total_sleep_time
logger.debug("Base sleep time: %d, random delay: %d, total: %d, consecutive: %d",
sleep_time, rnd, total_sleep_time, consecutive_sleep_in_sec)
time.sleep(total_sleep_time)

# One final check to see if there's anything in the buffer to flush
Expand Down

0 comments on commit cae9597

Please sign in to comment.