Skip to content

Commit 5e9d610

Browse files
committed
Revert "[ML] Improve hard_limit audit message (#486)"
This reverts commit e8f59dc.
1 parent e8f59dc commit 5e9d610

File tree

6 files changed

+35
-78
lines changed

6 files changed

+35
-78
lines changed

docs/CHANGELOG.asciidoc

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,6 @@ to the model. (See {ml-pull}214[#214].)
4444
4545
* Stop linking to libcrypt on Linux. (See {ml-pull}480[#480].)
4646
47-
* Improvements to hard_limit audit message. (See {ml-pull}486[#486].)
48-
4947
=== Bug Fixes
5048
5149
* Handle NaNs when detrending seasonal components. {ml-pull}408[#408]

include/model/CResourceMonitor.h

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -34,15 +34,12 @@ class MODEL_EXPORT CResourceMonitor {
3434
public:
3535
struct MODEL_EXPORT SResults {
3636
std::size_t s_Usage;
37-
std::size_t s_AdjustedUsage;
3837
std::size_t s_ByFields;
3938
std::size_t s_PartitionFields;
4039
std::size_t s_OverFields;
4140
std::size_t s_AllocationFailures;
4241
model_t::EMemoryStatus s_MemoryStatus;
4342
core_t::TTime s_BucketStartTime;
44-
std::size_t s_BytesExceeded;
45-
std::size_t s_BytesMemoryLimit;
4643
};
4744

4845
public:
@@ -68,6 +65,10 @@ class MODEL_EXPORT CResourceMonitor {
6865
//! taking up too much memory and further allocations should be banned
6966
bool areAllocationsAllowed() const;
7067

68+
//! Query the resource monitor to found out if it's Ok to
69+
//! create structures of a certain size
70+
bool areAllocationsAllowed(std::size_t size) const;
71+
7172
//! Return the amount of remaining space for allocations
7273
std::size_t allocationLimit() const;
7374

@@ -163,11 +164,6 @@ class MODEL_EXPORT CResourceMonitor {
163164
//! Returns the sum of used memory plus any extra memory
164165
std::size_t totalMemory() const;
165166

166-
//! Adjusts the amount of memory reported to take into
167-
//! account the current value of the byte limit margin and the effects
168-
//! of background persistence.
169-
std::size_t adjustedUsage(std::size_t usage) const;
170-
171167
private:
172168
//! The registered collection of components
173169
TDetectorPtrSizeUMap m_Detectors;
@@ -230,9 +226,6 @@ class MODEL_EXPORT CResourceMonitor {
230226
//! Don't do any sort of memory checking if this is set
231227
bool m_NoLimit;
232228

233-
//! The number of bytes over the high limit for memory usage at the last allocation failure
234-
std::size_t m_CurrentBytesExceeded;
235-
236229
//! Test friends
237230
friend class ::CResourceMonitorTest;
238231
friend class ::CResourceLimitTest;

lib/api/CModelSizeStatsJsonWriter.cc

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,6 @@ namespace {
1616
const std::string JOB_ID("job_id");
1717
const std::string MODEL_SIZE_STATS("model_size_stats");
1818
const std::string MODEL_BYTES("model_bytes");
19-
const std::string MODEL_BYTES_EXCEEDED("model_bytes_exceeded");
20-
const std::string MODEL_BYTES_MEMORY_LIMIT("model_bytes_memory_limit");
2119
const std::string TOTAL_BY_FIELD_COUNT("total_by_field_count");
2220
const std::string TOTAL_OVER_FIELD_COUNT("total_over_field_count");
2321
const std::string TOTAL_PARTITION_FIELD_COUNT("total_partition_field_count");
@@ -35,15 +33,17 @@ void CModelSizeStatsJsonWriter::write(const std::string& jobId,
3533

3634
writer.String(JOB_ID);
3735
writer.String(jobId);
38-
3936
writer.String(MODEL_BYTES);
40-
writer.Uint64(results.s_AdjustedUsage);
41-
42-
writer.String(MODEL_BYTES_EXCEEDED);
43-
writer.Uint64(results.s_BytesExceeded);
44-
45-
writer.String(MODEL_BYTES_MEMORY_LIMIT);
46-
writer.Uint64(results.s_BytesMemoryLimit);
37+
// Background persist causes the memory size to double due to copying
38+
// the models. On top of that, after the persist is done we may not
39+
// be able to retrieve that memory back. Thus, we report twice the
40+
// memory usage in order to allow for that.
41+
// See https://github.com/elastic/x-pack-elasticsearch/issues/1020.
42+
// Issue https://github.com/elastic/x-pack-elasticsearch/issues/857
43+
// discusses adding an option to perform only foreground persist.
44+
// If that gets implemented, we should only double when background
45+
// persist is configured.
46+
writer.Uint64(results.s_Usage * 2);
4747

4848
writer.String(TOTAL_BY_FIELD_COUNT);
4949
writer.Uint64(results.s_ByFields);

lib/api/unittest/CJsonOutputWriterTest.cc

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1555,15 +1555,12 @@ void CJsonOutputWriterTest::testReportMemoryUsage() {
15551555

15561556
ml::model::CResourceMonitor::SResults resourceUsage;
15571557
resourceUsage.s_Usage = 1;
1558-
resourceUsage.s_AdjustedUsage = 2;
1559-
resourceUsage.s_ByFields = 3;
1560-
resourceUsage.s_PartitionFields = 4;
1561-
resourceUsage.s_OverFields = 5;
1562-
resourceUsage.s_AllocationFailures = 6;
1558+
resourceUsage.s_ByFields = 2;
1559+
resourceUsage.s_PartitionFields = 3;
1560+
resourceUsage.s_OverFields = 4;
1561+
resourceUsage.s_AllocationFailures = 5;
15631562
resourceUsage.s_MemoryStatus = ml::model_t::E_MemoryStatusHardLimit;
1564-
resourceUsage.s_BucketStartTime = 7;
1565-
resourceUsage.s_BytesExceeded = 8;
1566-
resourceUsage.s_BytesMemoryLimit = 9;
1563+
resourceUsage.s_BucketStartTime = 6;
15671564

15681565
writer.reportMemoryUsage(resourceUsage);
15691566
writer.endOutputBatch(false, 1ul);
@@ -1583,26 +1580,22 @@ void CJsonOutputWriterTest::testReportMemoryUsage() {
15831580
CPPUNIT_ASSERT(sizeStats.HasMember("model_bytes"));
15841581
CPPUNIT_ASSERT_EQUAL(2, sizeStats["model_bytes"].GetInt());
15851582
CPPUNIT_ASSERT(sizeStats.HasMember("total_by_field_count"));
1586-
CPPUNIT_ASSERT_EQUAL(3, sizeStats["total_by_field_count"].GetInt());
1583+
CPPUNIT_ASSERT_EQUAL(2, sizeStats["total_by_field_count"].GetInt());
15871584
CPPUNIT_ASSERT(sizeStats.HasMember("total_partition_field_count"));
1588-
CPPUNIT_ASSERT_EQUAL(4, sizeStats["total_partition_field_count"].GetInt());
1585+
CPPUNIT_ASSERT_EQUAL(3, sizeStats["total_partition_field_count"].GetInt());
15891586
CPPUNIT_ASSERT(sizeStats.HasMember("total_over_field_count"));
1590-
CPPUNIT_ASSERT_EQUAL(5, sizeStats["total_over_field_count"].GetInt());
1587+
CPPUNIT_ASSERT_EQUAL(4, sizeStats["total_over_field_count"].GetInt());
15911588
CPPUNIT_ASSERT(sizeStats.HasMember("bucket_allocation_failures_count"));
1592-
CPPUNIT_ASSERT_EQUAL(6, sizeStats["bucket_allocation_failures_count"].GetInt());
1589+
CPPUNIT_ASSERT_EQUAL(5, sizeStats["bucket_allocation_failures_count"].GetInt());
15931590
CPPUNIT_ASSERT(sizeStats.HasMember("timestamp"));
1594-
CPPUNIT_ASSERT_EQUAL(7000, sizeStats["timestamp"].GetInt());
1591+
CPPUNIT_ASSERT_EQUAL(6000, sizeStats["timestamp"].GetInt());
15951592
CPPUNIT_ASSERT(sizeStats.HasMember("memory_status"));
15961593
CPPUNIT_ASSERT_EQUAL(std::string("hard_limit"),
15971594
std::string(sizeStats["memory_status"].GetString()));
15981595
CPPUNIT_ASSERT(sizeStats.HasMember("log_time"));
15991596
int64_t nowMs = ml::core::CTimeUtils::now() * 1000ll;
16001597
CPPUNIT_ASSERT(nowMs >= sizeStats["log_time"].GetInt64());
16011598
CPPUNIT_ASSERT(nowMs + 1000ll >= sizeStats["log_time"].GetInt64());
1602-
CPPUNIT_ASSERT(sizeStats.HasMember("model_bytes_exceeded"));
1603-
CPPUNIT_ASSERT_EQUAL(8, sizeStats["model_bytes_exceeded"].GetInt());
1604-
CPPUNIT_ASSERT(sizeStats.HasMember("model_bytes_memory_limit"));
1605-
CPPUNIT_ASSERT_EQUAL(9, sizeStats["model_bytes_memory_limit"].GetInt());
16061599
}
16071600

16081601
void CJsonOutputWriterTest::testWriteScheduledEvent() {

lib/api/unittest/CModelSnapshotJsonWriterTest.cc

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -33,15 +33,12 @@ void CModelSnapshotJsonWriterTest::testWrite() {
3333
{
3434
model::CResourceMonitor::SResults modelSizeStats{
3535
10000, // bytes used
36-
20000, // bytes used (adjusted)
3736
3, // # by fields
3837
1, // # partition fields
3938
150, // # over fields
4039
4, // # allocation failures
4140
model_t::E_MemoryStatusOk, // memory status
42-
core_t::TTime(1521046309), // bucket start time
43-
0, // model bytes exceeded
44-
50000 // model bytes memory limit
41+
core_t::TTime(1521046309) // bucket start time
4542
};
4643

4744
CModelSnapshotJsonWriter::SModelSnapshotReport report{
@@ -117,11 +114,6 @@ void CModelSnapshotJsonWriterTest::testWrite() {
117114
CPPUNIT_ASSERT(modelSizeStats.HasMember("timestamp"));
118115
CPPUNIT_ASSERT_EQUAL(int64_t(1521046309000), modelSizeStats["timestamp"].GetInt64());
119116
CPPUNIT_ASSERT(modelSizeStats.HasMember("log_time"));
120-
CPPUNIT_ASSERT(modelSizeStats.HasMember("model_bytes_exceeded"));
121-
CPPUNIT_ASSERT_EQUAL(int64_t(0), modelSizeStats["model_bytes_exceeded"].GetInt64());
122-
CPPUNIT_ASSERT(modelSizeStats.HasMember("model_bytes_memory_limit"));
123-
CPPUNIT_ASSERT_EQUAL(int64_t(50000),
124-
modelSizeStats["model_bytes_memory_limit"].GetInt64());
125117

126118
CPPUNIT_ASSERT(snapshot.HasMember("quantiles"));
127119
const rapidjson::Value& quantiles = snapshot["quantiles"];

lib/model/CResourceMonitor.cc

Lines changed: 10 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,7 @@ CResourceMonitor::CResourceMonitor(double byteLimitMargin)
3535
m_HasPruningStarted(false), m_PruneThreshold(0), m_LastPruneTime(0),
3636
m_PruneWindow(std::numeric_limits<std::size_t>::max()),
3737
m_PruneWindowMaximum(std::numeric_limits<std::size_t>::max()),
38-
m_PruneWindowMinimum(std::numeric_limits<std::size_t>::max()),
39-
m_NoLimit(false), m_CurrentBytesExceeded(0) {
38+
m_PruneWindowMinimum(std::numeric_limits<std::size_t>::max()), m_NoLimit(false) {
4039
this->updateMemoryLimitsAndPruneThreshold(DEFAULT_MEMORY_LIMIT_MB);
4140
}
4241

@@ -109,21 +108,18 @@ void CResourceMonitor::refresh(CAnomalyDetector& detector) {
109108

110109
void CResourceMonitor::forceRefresh(CAnomalyDetector& detector) {
111110
this->memUsage(&detector);
112-
111+
core::CProgramCounters::counter(counter_t::E_TSADMemoryUsage) = this->totalMemory();
112+
LOG_TRACE(<< "Checking allocations: currently at " << this->totalMemory());
113113
this->updateAllowAllocations();
114114
}
115115

116116
void CResourceMonitor::updateAllowAllocations() {
117117
std::size_t total{this->totalMemory()};
118-
core::CProgramCounters::counter(counter_t::E_TSADMemoryUsage) = total;
119-
LOG_TRACE(<< "Checking allocations: currently at " << total);
120118
if (m_AllowAllocations) {
121119
if (total > this->highLimit()) {
122120
LOG_INFO(<< "Over current allocation high limit. " << total
123121
<< " bytes used, the limit is " << this->highLimit());
124122
m_AllowAllocations = false;
125-
std::size_t bytesExceeded{total - this->highLimit()};
126-
m_CurrentBytesExceeded = this->adjustedUsage(bytesExceeded);
127123
}
128124
} else if (total < this->lowLimit()) {
129125
LOG_INFO(<< "Below current allocation low limit. " << total
@@ -208,6 +204,13 @@ bool CResourceMonitor::areAllocationsAllowed() const {
208204
return m_AllowAllocations;
209205
}
210206

207+
bool CResourceMonitor::areAllocationsAllowed(std::size_t size) const {
208+
if (m_AllowAllocations) {
209+
return this->totalMemory() + size < this->highLimit();
210+
}
211+
return false;
212+
}
213+
211214
std::size_t CResourceMonitor::allocationLimit() const {
212215
return this->highLimit() - std::min(this->highLimit(), this->totalMemory());
213216
}
@@ -265,9 +268,6 @@ CResourceMonitor::SResults CResourceMonitor::createMemoryUsageReport(core_t::TTi
265268
res.s_OverFields = 0;
266269
res.s_PartitionFields = 0;
267270
res.s_Usage = this->totalMemory();
268-
res.s_AdjustedUsage = this->adjustedUsage(res.s_Usage);
269-
res.s_BytesMemoryLimit = 2 * m_ByteLimitHigh;
270-
res.s_BytesExceeded = m_CurrentBytesExceeded;
271271
res.s_AllocationFailures = 0;
272272
res.s_MemoryStatus = m_MemoryStatus;
273273
res.s_BucketStartTime = bucketStartTime;
@@ -281,25 +281,6 @@ CResourceMonitor::SResults CResourceMonitor::createMemoryUsageReport(core_t::TTi
281281
return res;
282282
}
283283

284-
std::size_t CResourceMonitor::adjustedUsage(std::size_t usage) const {
285-
// Background persist causes the memory size to double due to copying
286-
// the models. On top of that, after the persist is done we may not
287-
// be able to retrieve that memory back. Thus, we report twice the
288-
// memory usage in order to allow for that.
289-
// See https://github.com/elastic/x-pack-elasticsearch/issues/1020.
290-
// Issue https://github.com/elastic/x-pack-elasticsearch/issues/857
291-
// discusses adding an option to perform only foreground persist.
292-
// If that gets implemented, we should only double when background
293-
// persist is configured.
294-
295-
// We also scale the reported memory usage by the inverse of the byte limit margin.
296-
// This gives the user a fairer indication of how close the job is to hitting
297-
// the model memory limit in a concise manner (as the limit is scaled down by
298-
// the margin during the beginning period of the job's existence).
299-
size_t adjustedUsage = static_cast<std::size_t>(2 * usage / m_ByteLimitMargin);
300-
return adjustedUsage;
301-
}
302-
303284
void CResourceMonitor::acceptAllocationFailureResult(core_t::TTime time) {
304285
m_MemoryStatus = model_t::E_MemoryStatusHardLimit;
305286
++m_AllocationFailures[time];

0 commit comments

Comments
 (0)