-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
100 lines (80 loc) · 2.59 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import sys
from typing import Tuple
from IPython.core import ultratb
class NamedDict(dict):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.__dict__ = self
def __setattr__(self, key, value):
self[key] = value
def __getattr__(self, key):
try:
return self[key]
except KeyError:
return AttributeError
class _IPythonExceptionHook:
instance = None
def __call__(self, *args, **kwargs):
if self.instance is None:
self.instance = ultratb.FormattedTB(mode='Verbose', color_scheme='Linux', call_pdb=1)
self.instance(*args, **kwargs)
def set_ipython_exception_hook():
sys.excepthook = _IPythonExceptionHook()
def test(_n, _m):
import numpy as np
tot = _n
each = _m
tot_arr = np.arange(tot, dtype=np.int32)
sel = np.zeros_like(tot_arr, dtype=bool)
i = 0
while np.sum(sel) < tot:
i += 1
sel[np.random.choice(tot_arr, each)] = True
# print(i)
return i
def calculate_confidence_k(task_num: int,
select_n_from_task: int,
k_range: Tuple[int, int] = (50, 300),
minimum_confidence: float = 0.95) -> int:
"""
Calculate the minimum k that can guarantee the confidence of selecting all tasks\
Parameters
----------
task_num : int
The number of tasks to select
select_n_from_task : int
The number of tasks to be sampled from all tasks
k_range : Tuple[int, int]
The range of k to search, k is n_run
minimum_confidence : float
The minimum confidence range(0, 1) to guarantee
Returns
-------
int:
The minimum k that can guarantee the confidence of selecting all tasks
If k is at the end of the range, the confidence is not guaranteed
"""
m, n, ks = select_n_from_task, task_num, list(range(*k_range))
for k in ks:
p = ((n ** k - (n - m) ** k) / n ** k) ** n
if p >= minimum_confidence:
return k
return ks[-1]
def draw_curve(n, m):
import numpy as np
import math
import matplotlib.pyplot as plt
k = list(range(10, 200))
y = [((n ** ki - (n - m) ** ki) / n ** ki) ** n for ki in k]
arr = [test(n, m) for _ in range(2000)]
plt.hist(arr, bins=50, density=True)
div = np.diff(y) / np.diff(k)
div = np.concatenate(([div[0]], div))
# plt.plot(k, y)
plt.plot(k, div)
plt.show()
if __name__ == '__main__':
_n, _m = 2000, 100
# draw_curve(_n, _m)
v = calculate_confidence_k(_n, _m)
print(v)