Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 20 additions & 3 deletions app/util/data_preparation/confluence_prepare_data.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import random
import string

import urllib3

from util.util import print_timing
from multiprocessing.pool import ThreadPool
from util.conf import CONFLUENCE_SETTINGS
from util.api.confluence_clients import ConfluenceRpcClient, ConfluenceRestClient
from util.project_paths import CONFLUENCE_USERS, CONFLUENCE_PAGES, CONFLUENCE_BLOGS, CONFLUENCE_CUSTOM_PAGES
Expand All @@ -25,13 +26,23 @@ def generate_random_string(length=20):
return "".join([random.choice(string.ascii_lowercase) for _ in range(length)])


@print_timing('Creating dataset started')
def __create_data_set(rest_client, rpc_client):
dataset = dict()
dataset[USERS] = __get_users(rest_client, rpc_client, CONFLUENCE_SETTINGS.concurrency)
perf_user = random.choice(dataset[USERS])['user']
perf_user_api = ConfluenceRestClient(CONFLUENCE_SETTINGS.server_url, perf_user['username'], DEFAULT_USER_PASSWORD)
dataset[PAGES] = __get_pages(perf_user_api, 5000)
dataset[BLOGS] = __get_blogs(perf_user_api, 5000)

pool = ThreadPool(processes=2)
async_pages = pool.apply_async(__get_pages, (perf_user_api, 5000))
async_blogs = pool.apply_async(__get_blogs, (perf_user_api, 5000))

async_pages.wait()
async_blogs.wait()

dataset[PAGES] = async_pages.get()
dataset[BLOGS] = async_blogs.get()

dataset[CUSTOM_PAGES] = __get_custom_pages(perf_user_api, 5000, CONFLUENCE_SETTINGS.custom_dataset_query)
print(f'Users count: {len(dataset[USERS])}')
print(f'Pages count: {len(dataset[PAGES])}')
Expand All @@ -41,6 +52,7 @@ def __create_data_set(rest_client, rpc_client):
return dataset


@print_timing('Getting users')
def __get_users(confluence_api, rpc_api, count):
errors_count = 0
cur_perf_users = confluence_api.get_users(DEFAULT_USER_PREFIX, count)
Expand All @@ -65,6 +77,7 @@ def __get_users(confluence_api, rpc_api, count):
return cur_perf_users


@print_timing('Getting pages')
def __get_pages(confluence_api, count):
pages = confluence_api.get_content_search(
0, count, cql='type=page'
Expand All @@ -79,6 +92,7 @@ def __get_pages(confluence_api, count):
return pages


@print_timing('Getting custom pages')
def __get_custom_pages(confluence_api, count, cql):
pages = []
if cql:
Expand All @@ -89,6 +103,7 @@ def __get_custom_pages(confluence_api, count, cql):
return pages


@print_timing('Getting blogs')
def __get_blogs(confluence_api, count):
blogs = confluence_api.get_content_search(
0, count, cql='type=blogpost'
Expand All @@ -110,6 +125,7 @@ def __write_to_file(file_path, items):
f.write(f"{item}\n")


@print_timing('Started writing data to files')
def write_test_data_to_files(dataset):
pages = [f"{page['id']},{page['space']['key']}" for page in dataset[PAGES]]
__write_to_file(CONFLUENCE_PAGES, pages)
Expand Down Expand Up @@ -144,6 +160,7 @@ def __check_for_admin_permissions(confluence_api):
raise SystemExit(f"The '{confluence_api.user}' user does not have admin permissions.")


@print_timing('Confluence data preparation')
def main():
print("Started preparing data")

Expand Down
28 changes: 3 additions & 25 deletions app/util/data_preparation/jsm_prepare_data.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
import datetime
import functools

import random
import string
from concurrent.futures.thread import ThreadPoolExecutor
from datetime import timedelta
from itertools import repeat
from timeit import default_timer as timer

import urllib3

import urllib3
from util.util import print_timing
from util.api.abstract_clients import JSM_EXPERIMENTAL_HEADERS
from util.api.jira_clients import JiraRestClient
from util.api.jsm_clients import JsmRestClient
Expand Down Expand Up @@ -54,26 +52,6 @@
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)


def print_timing(message, sep='-'):
assert message is not None, "Message is not passed to print_timing decorator"

def deco_wrapper(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
start = timer()
print(sep * 20)
print(f'{message} started {datetime.datetime.now().strftime("%H:%M:%S")}')
result = func(*args, **kwargs)
end = timer()
print(f"{message} finished in {timedelta(seconds=end - start)}")
print(sep * 20)
return result

return wrapper

return deco_wrapper


def __calculate_issues_per_project(projects_count):
calculated_issues_per_project_count = {}
max_percentage_key = max(PROJECTS_ISSUES_PERC, key=int)
Expand Down
24 changes: 24 additions & 0 deletions app/util/util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from datetime import timedelta
from timeit import default_timer as timer
import datetime
import functools


def print_timing(message, sep='-'):
assert message is not None, "Message is not passed to print_timing decorator"

def deco_wrapper(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
start = timer()
print(sep * 20)
print(f'{message} started {datetime.datetime.now().strftime("%H:%M:%S")}')
result = func(*args, **kwargs)
end = timer()
print(f"{message} finished in {timedelta(seconds=end - start)}")
print(sep * 20)
return result

return wrapper

return deco_wrapper