Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion bci/browser/binary/binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,21 @@ def is_available_locally(self):
def is_available_online(self):
return self.state.has_online_binary()

@abstractmethod
def download_binary(self):
if self.is_available_locally():
logger.debug(f'Binary for {self.state} was already downloaded ({self.get_bin_path()})')
else:
binary_urls = self.state.get_online_binary_urls()
binary_dst_folder = os.path.dirname(self.get_potential_bin_path())
util.download_and_extract(binary_urls, binary_dst_folder)
self.configure_binary()

@abstractmethod
def configure_binary(self):
"""
Configures the browser binary.
This method is idempotent.
"""
pass

def is_built(self):
Expand Down
44 changes: 6 additions & 38 deletions bci/browser/binary/vendors/chromium.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
import logging
import os
import re
import shutil
import zipfile

import requests

from bci import cli, util
from bci.browser.binary.artisanal_manager import ArtisanalBuildManager
from bci.browser.binary.binary import Binary
from bci.database.mongo.binary_cache import BinaryCache
from bci.version_control.states.state import State

logger = logging.getLogger(__name__)
Expand All @@ -19,7 +16,6 @@


class ChromiumBinary(Binary):

def __init__(self, state: State):
super().__init__(state)

Expand All @@ -38,41 +34,12 @@ def browser_name(self) -> str:
def bin_folder_path(self) -> str:
return BIN_FOLDER_PATH

# def get_full_version(self, version: int):
# if re.match(r'[0-9]+\.[0-9]+\.[0-9]+', version):
# return version + ".0"
# if re.match(r'[0-9]+', version):
# return self.repo.get_release_tag(version)
# if re.match(r'[0-9]{2}', version):
# return self.full_versions[version] + ".0"
# raise AttributeError("Could not convert version '%i' to full version" % version)
# return self.repo.get_release_tag(version)

# Downloadable binaries

def download_binary(self):
if self.is_available_locally():
logger.debug(f'Binary for {self.state} was already downloaded ({self.get_bin_path()})')
return
binary_url = self.state.get_online_binary_url()
logger.info(f'Downloading binary for {self.state} from \'{binary_url}\'')
zip_file_path = f'/tmp/{self.state.name}/archive.zip'
if os.path.exists(os.path.dirname(zip_file_path)):
shutil.rmtree(os.path.dirname(zip_file_path))
os.makedirs(os.path.dirname(zip_file_path))
with requests.get(binary_url, stream=True) as req:
with open(zip_file_path, 'wb') as file:
shutil.copyfileobj(req.raw, file)
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
zip_ref.extractall(os.path.dirname(zip_file_path))
bin_path = self.get_potential_bin_path()
os.makedirs(os.path.dirname(bin_path), exist_ok=True)
unzipped_folder_path = os.path.join(os.path.dirname(zip_file_path), "chrome-linux")
self.__remove_unnecessary_files(unzipped_folder_path)
util.safe_move_dir(unzipped_folder_path, os.path.dirname(bin_path))
cli.execute_and_return_status("chmod -R a+x %s" % os.path.dirname(bin_path))
# Remove temporary files in /tmp/COMMIT_POS
shutil.rmtree(os.path.dirname(zip_file_path))
def configure_binary(self):
binary_folder = os.path.dirname(self.get_potential_bin_path())
self.__remove_unnecessary_files(binary_folder)
cli.execute_and_return_status(f'chmod -R a+x {binary_folder}')

def __remove_unnecessary_files(self, binary_folder_path: str) -> None:
"""
Expand All @@ -90,6 +57,7 @@ def _get_version(self) -> str:
if bin_path := self.get_bin_path():
output = cli.execute_and_return_output(command, cwd=os.path.dirname(bin_path))
else:
BinaryCache.remove_binary_files(self.state)
raise AttributeError(f'Could not get binary path for {self.state}')
match = re.match(r'Chromium (?P<version>[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)', output)
if match:
Expand Down
40 changes: 8 additions & 32 deletions bci/browser/binary/vendors/firefox.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
import logging
import os
import re
import shutil
import tarfile

import requests

from bci import cli, util
from bci import cli
from bci.browser.binary.artisanal_manager import ArtisanalBuildManager
from bci.browser.binary.binary import Binary
from bci.version_control.states.state import State
Expand All @@ -19,7 +15,6 @@


class FirefoxBinary(Binary):

def __init__(self, state: State):
super().__init__(state)

Expand All @@ -35,36 +30,17 @@ def browser_name(self) -> str:
def bin_folder_path(self) -> str:
return BIN_FOLDER_PATH

def download_binary(self):
if self.is_available_locally():
logger.debug(f'Binary for {self.state} was already downloaded ({self.get_bin_path()})')
return
binary_url = self.state.get_online_binary_url()
logger.debug(f'Downloading binary for {self.state} from \'{binary_url}\'')
tar_file_path = f'/tmp/{self.state.name}/archive.tar.bz2'
if os.path.exists(os.path.dirname(tar_file_path)):
shutil.rmtree(os.path.dirname(tar_file_path))
os.makedirs(os.path.dirname(tar_file_path))
with requests.get(binary_url, stream=True) as req:
with open(tar_file_path, 'wb') as file:
shutil.copyfileobj(req.raw, file)
with tarfile.open(tar_file_path, "r:bz2") as tar_ref:
tar_ref.extractall(os.path.dirname(tar_file_path))
bin_path = self.get_potential_bin_path()
os.makedirs(os.path.dirname(bin_path), exist_ok=True)
unzipped_folder_path = os.path.join(os.path.dirname(tar_file_path), "firefox")
util.safe_move_dir(unzipped_folder_path, os.path.dirname(bin_path))
cli.execute_and_return_status("chmod -R a+x %s" % os.path.dirname(bin_path))
cli.execute_and_return_status("chmod -R a+w %s" % os.path.dirname(bin_path))
# Remove temporary files in /tmp/COMMIT_POS
shutil.rmtree(os.path.dirname(tar_file_path))
def configure_binary(self) -> None:
binary_folder = os.path.dirname(self.get_potential_bin_path())
cli.execute_and_return_status(f'chmod -R a+x {binary_folder}')
cli.execute_and_return_status(f'chmod -R a+w {binary_folder}')
# Add policy.json to prevent updating. (this measure is effective from version 60)
# https://github.com/mozilla/policy-templates/blob/master/README.md
# (For earlier versions, the prefs.js file is used)
distributions_path = os.path.join(os.path.dirname(bin_path), "distribution")
distributions_path = os.path.join(binary_folder, 'distribution')
os.makedirs(distributions_path, exist_ok=True)
policies_path = os.path.join(distributions_path, "policies.json")
with open(policies_path, "a") as file:
policies_path = os.path.join(distributions_path, 'policies.json')
with open(policies_path, 'a') as file:
file.write('{ "policies": { "DisableAppUpdate": true } }')

def _get_version(self):
Expand Down
4 changes: 4 additions & 0 deletions bci/database/mongo/binary_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,10 @@ def store_file(file_path: str) -> None:
elapsed_time = time.time() - start_time
logger.debug(f'Stored binary in {elapsed_time:.2f}s')

@staticmethod
def remove_binary_files(state: State) -> None:
BinaryCache.__remove_revision_binary_files(state.type, state.index)

@staticmethod
def __count_cached_binaries(state_type: Optional[str] = None) -> int:
"""
Expand Down
79 changes: 73 additions & 6 deletions bci/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,15 @@
import logging
import os
import shutil
import tarfile
import time
import zipfile
from typing import Optional
from urllib.parse import urlparse

import requests

LOGGER = logging.getLogger(__name__)
logger = logging.getLogger(__name__)


def safe_move_file(src_path, dst_path):
Expand All @@ -37,6 +40,7 @@ def safe_move_dir(src_path, dst_path):
safe_move_dir(new_src_path, new_dst_path)
else:
raise AttributeError("Something went wrong")
shutil.rmtree(src_path)


def copy_folder(src_path, dst_path):
Expand Down Expand Up @@ -83,30 +87,93 @@ def read_web_report(file_name):


def request_html(url: str):
LOGGER.debug(f"Requesting {url}")
logger.debug(f"Requesting {url}")
resp = requests.get(url, timeout=60)
if resp.status_code >= 400:
raise PageNotFound(f"Could not connect to url '{url}'")
return resp.content


def request_json(url: str):
LOGGER.debug(f"Requesting {url}")
logger.debug(f"Requesting {url}")
resp = requests.get(url, timeout=60)
if resp.status_code >= 400:
raise PageNotFound(f"Could not connect to url '{url}'")
LOGGER.debug('Request completed')
logger.debug('Request completed')
return resp.json()


def request_final_url(url: str) -> str:
LOGGER.debug(f"Requesting {url}")
logger.debug(f"Requesting {url}")
resp = requests.get(url, timeout=60)
if resp.status_code >= 400:
raise PageNotFound(f"Could not connect to url '{url}'")
LOGGER.debug('Request completed')
logger.debug('Request completed')
return resp.url


def download_and_extract(urls: list[str], dst_folder_path: str) -> bool:
"""
Downloads the archive residing at the given URL and extracts it to the given dest_path.
This method currently supports zip, tar.bz2 and tar.xz archives.

:return bool: Returns True if the archive was successfully downloaded and extracted, otherwise False.
"""
for url in urls:
logger.debug(f"Attempting to download archive from '{url}'")
tmp_file_name = urlparse(url).path.split('/')[-1]
tmp_file_path = os.path.join('/tmp', tmp_file_name)
if os.path.exists(tmp_file_path):
os.remove(tmp_file_path)
with requests.get(url, stream=True) as req:
if req.status_code != 200:
continue
with open(tmp_file_path, 'wb') as file:
shutil.copyfileobj(req.raw, file)
_, file_extension = os.path.splitext(tmp_file_path)

logger.debug(f"Extracting downloaded archive '{tmp_file_path}'")
match file_extension:
case '.zip':
unzip(tmp_file_path, dst_folder_path)
case '.bz2':
untar(tmp_file_path, dst_folder_path)
case '.xz':
untar(tmp_file_path, dst_folder_path)
case _:
AttributeError(f"File extension {file_extension} is not supported.")
os.remove(tmp_file_path)
return True
return False


def unzip(src_archive_path: str, dst_folder_path: str) -> None:
with zipfile.ZipFile(src_archive_path, 'r') as zip:
members = zip.namelist()
top_dirs_and_files = {name.split('/')[0] for name in members}
# If there is a single top-level directory, we move all contents up.
if len(top_dirs_and_files) == 1:
parent_folder_path = os.path.dirname(dst_folder_path)
zip.extractall(parent_folder_path)
safe_move_dir(os.path.join(parent_folder_path, top_dirs_and_files.pop()), dst_folder_path)
else:
os.makedirs(dst_folder_path, exist_ok=True)
zip.extractall(dst_folder_path)


def untar(src_archive_path: str, dst_folder_path: str) -> None:
os.makedirs(dst_folder_path, exist_ok=True)
# We do not inspects contents first like in unzip, because this is a very costly operation for tar archives.
with tarfile.open(src_archive_path, 'r:*') as tar:
tar.extractall(dst_folder_path)
members = os.listdir(dst_folder_path)
top_dirs_and_files = {name.split('/')[0] for name in members}
# If there is a single top-level directory, we move all contents up.
if len(top_dirs_and_files) == 1:
safe_move_dir(os.path.join(dst_folder_path, members.pop()), dst_folder_path + '_2')
shutil.rmtree(dst_folder_path)
safe_move_dir(os.path.join(dst_folder_path + '_2'), dst_folder_path)


class PageNotFound(Exception):
pass
10 changes: 5 additions & 5 deletions bci/version_control/states/revisions/chromium.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,16 @@ def has_online_binary(self) -> bool:
if cached_binary_available_online is not None:
return cached_binary_available_online
url = f'https://www.googleapis.com/storage/v1/b/chromium-browser-snapshots/o/Linux_x64%2F{self._revision_nb}%2Fchrome-linux.zip'
req = requests.get(url)
has_binary_online = req.status_code == 200
response = requests.get(url, stream=True)
has_binary_online = response.status_code == 200
MongoDB().store_binary_availability_online_cache('chromium', self, has_binary_online)
return has_binary_online

def get_online_binary_url(self):
return (
def get_online_binary_urls(self) -> list[str]:
return [(
'https://www.googleapis.com/download/storage/v1/b/chromium-browser-snapshots/o/%s%%2F%s%%2Fchrome-%s.zip?alt=media'
% ('Linux_x64', self._revision_nb, 'linux')
)
)]

def _fetch_missing_data(self) -> None:
"""
Expand Down
4 changes: 2 additions & 2 deletions bci/version_control/states/revisions/firefox.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,14 @@ def browser_name(self) -> str:
def has_online_binary(self) -> bool:
return RevisionCache.firefox_has_binary_for(revision_nb=self.revision_nb, revision_id=self._revision_id)

def get_online_binary_url(self) -> str:
def get_online_binary_urls(self) -> list[str]:
result = RevisionCache.firefox_get_binary_info(self._revision_id)
if result is None:
raise AttributeError(f"Could not find binary url for '{self._revision_id}")
binary_base_url = result['files_url']
app_version = result['app_version']
binary_url = f'{binary_base_url}firefox-{app_version}.en-US.linux-x86_64.tar.bz2'
return binary_url
return [binary_url]

def get_previous_and_next_state_with_binary(self) -> tuple[State, State]:
previous_revision_nb, next_revision_nb = RevisionCache.firefox_get_previous_and_next_revision_nb_with_binary(
Expand Down
5 changes: 4 additions & 1 deletion bci/version_control/states/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,10 @@ def has_online_binary(self) -> bool:
pass

@abstractmethod
def get_online_binary_url(self) -> str:
def get_online_binary_urls(self) -> list[str]:
"""
Returns a list of URLs where the associated binary can potentially be downloaded from.
"""
pass

def has_available_binary(self) -> bool:
Expand Down
6 changes: 3 additions & 3 deletions bci/version_control/states/versions/chromium.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,11 @@ def has_online_binary(self):
MongoDB().store_binary_availability_online_cache('chromium', self, has_binary_online)
return has_binary_online

def get_online_binary_url(self):
return (
def get_online_binary_urls(self) -> list[str]:
return [(
'https://www.googleapis.com/download/storage/v1/b/chromium-browser-snapshots/o/%s%%2F%s%%2Fchrome-%s.zip?alt=media'
% ('Linux_x64', self._revision_nb, 'linux')
)
)]

def convert_to_revision(self) -> ChromiumRevision:
return ChromiumRevision(revision_nb=self._revision_nb)
10 changes: 6 additions & 4 deletions bci/version_control/states/versions/firefox.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
from bci.version_control.repository.online.firefox import get_release_revision_number, get_release_revision_id
from bci.version_control.repository.online.firefox import get_release_revision_id, get_release_revision_number
from bci.version_control.states.revisions.firefox import FirefoxRevision
from bci.version_control.states.versions.base import BaseVersion


class FirefoxVersion(BaseVersion):

def __init__(self, major_version: int):
super().__init__(major_version)

Expand All @@ -21,8 +20,11 @@ def browser_name(self) -> str:
def has_online_binary(self) -> bool:
return True

def get_online_binary_url(self) -> str:
return f'https://ftp.mozilla.org/pub/firefox/releases/{self.major_version}.0/linux-x86_64/en-US/firefox-{self.major_version}.0.tar.bz2'
def get_online_binary_urls(self) -> list[str]:
return [
f'https://ftp.mozilla.org/pub/firefox/releases/{self.major_version}.0/linux-x86_64/en-US/firefox-{self.major_version}.0.tar.bz2',
f'https://ftp.mozilla.org/pub/firefox/releases/{self.major_version}.0/linux-x86_64/en-US/firefox-{self.major_version}.0.tar.xz'
]

def convert_to_revision(self) -> FirefoxRevision:
return FirefoxRevision(revision_nb=self._revision_nb)
Loading