|
| 1 | +// SPDX-License-Identifier: GPL-2.0-or-later |
| 2 | + |
| 3 | +/* |
| 4 | + * Copyright 2021, Sandipan Das, IBM Corp. |
| 5 | + * Configuration helpers for the Hot-Cold Affinity helper |
| 6 | + */ |
| 7 | + |
| 8 | +#ifndef _ASM_POWERPC_HCA_H |
| 9 | +#define _ASM_POWERPC_HCA_H |
| 10 | + |
| 11 | +#include <linux/types.h> |
| 12 | +#include <linux/bitops.h> |
| 13 | +#include <linux/minmax.h> |
| 14 | + |
| 15 | +#define KB (1024UL) |
| 16 | +#define MB (1024 * KB) |
| 17 | +#define GB (1024 * MB) |
| 18 | +#define TB (1024 * GB) |
| 19 | + |
| 20 | +#define HCA_ENGINES_PER_CHIP 1 /* 2 */ |
| 21 | +#define HCA_ENTRY_SIZE 8 |
| 22 | + |
| 23 | +#ifdef CONFIG_PPC_4K_PAGES |
| 24 | +#define HCA_PAGE_SIZE (4 * KB) |
| 25 | +#else /* CONFIG_PPC_64K_PAGES */ |
| 26 | +#define HCA_PAGE_SIZE (64 * KB) |
| 27 | +#endif /* CONFIG_PPC_4K_PAGES */ |
| 28 | + |
| 29 | +/* |
| 30 | + * @m: The counter overflow mask |
| 31 | + * |
| 32 | + * Supported overflow masks are 16, 32, 64 ... 4096. The page stats in |
| 33 | + * the HCA cache are written back to memory once the count reaches @m. |
| 34 | + */ |
| 35 | +#define HCA_OVERFLOW_MASK(m) min((u64)4096, max((u64)16, (u64)roundup_pow_of_two(m))) |
| 36 | +#define HCA_OVERFLOW_MASK_DEFAULT 4096 |
| 37 | + |
| 38 | +/* |
| 39 | + * @m: The command sampling mode |
| 40 | + * |
| 41 | + * Supported command sampling modes are |
| 42 | + * 0 -> No sampling (capture all commands) |
| 43 | + * 1 -> Sample 1 of 16 commands |
| 44 | + * 2 -> Sample 1 of 32 commands |
| 45 | + * 3 -> Dynamic sampling (configured separately) |
| 46 | + * |
| 47 | + * The HCA fabric update traffic is reduced at the cost of accuracy. The |
| 48 | + * counts are scaled based on the sampling rate, i.e. if a single command |
| 49 | + * is seen when 1 of 16 mode is used, the corresponding page count will be |
| 50 | + * incremented by 16. |
| 51 | + */ |
| 52 | +#define HCA_SAMPLING_MODE(m) min((u64)3, max((u64)0, (u64)(m) & 0x3)) |
| 53 | +#define HCA_SAMPLING_MODE_DEFAULT 3 |
| 54 | + |
| 55 | +/* |
| 56 | + * @p: The command sampling period (in cycles) |
| 57 | + * |
| 58 | + * Supported command sampling periods are 256, 512, 1024 ... 65536 cycles. |
| 59 | + * HCA update commands sent to the fabric are counted every @p cycles. |
| 60 | + * |
| 61 | + * Only used when dynamic sampling is enabled. |
| 62 | + * Actual period is = (value+1) * 256 |
| 63 | + * |
| 64 | + */ |
| 65 | +#define HCA_SAMPLING_PERIOD(p) min((u64)65536, max((u64)256, (u64)roundup_pow_of_two(p))) |
| 66 | +#define HCA_SAMPLING_PERIOD_DEFAULT 0 |
| 67 | + |
| 68 | +/* |
| 69 | + * @t: The command threshold |
| 70 | + * |
| 71 | + * Supported command thresholds are 0, 1, 2 ... 255 commands. |
| 72 | + * |
| 73 | + * With the upper command threshold, the sampling rate will reduce when |
| 74 | + * more than @t number of update commands are detected within a sampling |
| 75 | + * period. |
| 76 | + * |
| 77 | + * With the lower command threshold, the sampling rate will increase when |
| 78 | + * fewer than @t number of update commands are detected within a sampling |
| 79 | + * period. |
| 80 | + * |
| 81 | + * Only used when dynamic sampling is enabled. |
| 82 | + */ |
| 83 | +#define HCA_SAMPLING_LOWER_THRESH(t) min((u64)255, (u64)(t)) |
| 84 | +#define HCA_SAMPLING_LOWER_THRESH_DEFAULT 64UL |
| 85 | + |
| 86 | +/* |
| 87 | + * @t: The command threshold |
| 88 | + * |
| 89 | + * Supported command thresholds are 0, 1, 2 ... 255 commands. |
| 90 | + * |
| 91 | + * With the upper command threshold, the sampling rate will reduce when |
| 92 | + * more than @t number of update commands are detected within a sampling |
| 93 | + * period. |
| 94 | + * |
| 95 | + * With the lower command threshold, the sampling rate will increase when |
| 96 | + * fewer than @t number of update commands are detected within a sampling |
| 97 | + * period. |
| 98 | + * |
| 99 | + * Only used when dynamic sampling is enabled. |
| 100 | + */ |
| 101 | +#define HCA_SAMPLING_UPPER_THRESH(t) min((u64)255, (u64)(t)) |
| 102 | +#define HCA_SAMPLING_UPPER_THRESH_DEFAULT 255UL |
| 103 | + |
| 104 | +/* |
| 105 | + * @s: The monitor region size (in bytes) |
| 106 | + * |
| 107 | + * Supported monitor region sizes are 16GB, 32GB, 64GB ... 512TB. The |
| 108 | + * minimum and maximum region sizes are always guaranteed to be 16GB |
| 109 | + * and 512TB respectively if the specified value is out of bounds. |
| 110 | + */ |
| 111 | +#define HCA_MONITOR_SIZE(s) min((u64)512 * TB, max((u64)16 * GB, (u64)roundup_pow_of_two(s))) |
| 112 | +//#define HCA_MONITOR_SIZE_DEFAULT (16 * GB) |
| 113 | + |
| 114 | +/* |
| 115 | + * @b: The monitor region base |
| 116 | + * @s: The monitor region size (in bytes) |
| 117 | + * |
| 118 | + * The monitor region base address must be aligned to its size. |
| 119 | + */ |
| 120 | +#define HCA_MONITOR_BASE(b, s) ALIGN((u64)(b), HCA_MONITOR_SIZE(s)) |
| 121 | +//#define HCA_MONITOR_BASE_DEFAULT 0 |
| 122 | + |
| 123 | +/* |
| 124 | + * @s: The monitor region size |
| 125 | + * |
| 126 | + * The counter region size is directly derived from the monitor region |
| 127 | + * size and the page size. |
| 128 | + */ |
| 129 | +#define HCA_COUNTER_SIZE(s) ((HCA_MONITOR_SIZE(s) * (u64)HCA_ENTRY_SIZE) / PAGE_SIZE) |
| 130 | +#define HCA_COUNTER_SIZE_DEFAULT 0 |
| 131 | +#define HCA_COUNTER_BASE_DEFAULT 0 |
| 132 | + |
| 133 | +/* |
| 134 | + * @d: The decay delay (in ns) |
| 135 | + * |
| 136 | + * Decay delay defines the interval between updates to HCA cachelines of |
| 137 | + * 128 bytes. This parameter is not indicative of the absolute time taken |
| 138 | + * to apply decay updates to the entire counter region. However, that can |
| 139 | + * be derived from the configured decay delay. |
| 140 | + * |
| 141 | + * E.g. monitoring a 512GB region of 64kB pages requires a 64MB counter |
| 142 | + * region. To apply one round of decay updates to the entire region will |
| 143 | + * require (64M / 128) = 524288 HCA cache lines to be updated. If @d is |
| 144 | + * 2048, (524288 * 2048) ns = ~1.07s is required to update the entire |
| 145 | + * counter region. |
| 146 | + * |
| 147 | + * If the delay is set to 0, the decay feature is disabled. Otherwise, |
| 148 | + * supported decay delay periods are 16ns, 32ns, 64ns ... 147573952589s. |
| 149 | + * Since @d is in the nanosecond scale, representing the upper bound is |
| 150 | + * not possible with a 64-bit integer. Moreover, such large delays are |
| 151 | + * impractical for most intents and purposes. So, while the hardware can |
| 152 | + * support it, the maximum configurable decay delay is restricted to |
| 153 | + * 9223372036854775808ns. The minimum and maximum decay delays are always |
| 154 | + * guaranteed to be 32ns and 9223372036854775808ns respectively if the |
| 155 | + * specified value is out of bounds. |
| 156 | + */ |
| 157 | +#define HCA_DECAY_DELAY(d) ((d) ? min((uint64_t)9223372036854775808ULL, max((uint64_t)16, (uint64_t)roundup_pow_of_two(d))) : (uint64_t)0) |
| 158 | +/* 1 msec */ |
| 159 | +#define HCA_DECAY_DELAY_DEFAULT 1 |
| 160 | + |
| 161 | +/* Entry constants and helpers */ |
| 162 | +#define HCA_ENTRY_SIZE 8 |
| 163 | + |
| 164 | +/* |
| 165 | + * @v: The raw value of the entry |
| 166 | + * @s: The start of the bitfield |
| 167 | + * @n: The length of the bitfield |
| 168 | + */ |
| 169 | +#define HCA_ENTRY_FIELD(v, s, n) (((v) >> ( 64 - (s + n))) & ((1UL << (n)) - 1)) |
| 170 | + |
| 171 | +/* |
| 172 | + * The value of the HCA count is : 4^e * m |
| 173 | + * e = X[0:3], m = X[4:15] |
| 174 | + */ |
| 175 | +#define HCA_ENTRY_COUNT_EXP(e) HCA_ENTRY_FIELD((e), 0, 4) |
| 176 | +#define HCA_ENTRY_COUNT_MNT(e) HCA_ENTRY_FIELD((e), 4, 12) |
| 177 | +#define HCA_ENTRY_COUNT(e) ((1UL << (2 * HCA_ENTRY_COUNT_EXP(e))) * HCA_ENTRY_COUNT_MNT(e)) |
| 178 | + |
| 179 | +#define HCA_ENTRY_AGE(e) HCA_ENTRY_FIELD((e), 16, 3) |
| 180 | + |
| 181 | +#define HCA_ENTRY_GEN(e) HCA_ENTRY_FIELD((e), 19, 1) |
| 182 | + |
| 183 | +/* |
| 184 | + * The value of the HCA prev_count is : 4^e * m |
| 185 | + * e = X[0:3], m = X[4:11] |
| 186 | + */ |
| 187 | +#define HCA_ENTRY_PREV_COUNT_EXP_LENGTH 4 |
| 188 | +#define HCA_ENTRY_PREV_COUNT_EXP_START 20 |
| 189 | +#define HCA_ENTRY_PREV_COUNT_MNT_LENGTH 8 |
| 190 | +#define HCA_ENTRY_PREV_COUNT_MNT_START 24 |
| 191 | +#define HCA_ENTRY_PREV_COUNT_EXP(e) HCA_ENTRY_FIELD((e), 20, 4) |
| 192 | +#define HCA_ENTRY_PREV_COUNT_MNT(e) HCA_ENTRY_FIELD((e), 24, 8) |
| 193 | +#define HCA_ENTRY_PREV_COUNT(e) ((1UL << (2 * HCA_ENTRY_PREV_COUNT_EXP(e))) * HCA_ENTRY_PREV_COUNT_MNT(e)) |
| 194 | + |
| 195 | +#define HCA_ENTRY_TIMELOG_LENGTH 7 |
| 196 | +#define HCA_ENTRY_TIMELOG_START 32 |
| 197 | +#define HCA_ENTRY_TIMELOG(e) HCA_ENTRY_FIELD((e), 32, 7) |
| 198 | + |
| 199 | +#define HCA_ENTRY_SOCKETID_COUNT 5 |
| 200 | +#define HCA_ENTRY_SOCKETID_LENGTH 5 |
| 201 | +#define HCA_ENTRY_SOCKETID_START(s) (39 + (s) * HCA_ENTRY_SOCKETID_LENGTH) |
| 202 | +#define HCA_ENTRY_SOCKETID(e, s) HCA_ENTRY_FIELD((e), HCA_ENTRY_SOCKETID_START(s), HCA_ENTRY_SOCKETID_LENGTH) |
| 203 | + |
| 204 | +struct hca_entry { |
| 205 | + unsigned long count; |
| 206 | + uint8_t age; |
| 207 | + uint8_t gen; |
| 208 | + unsigned long prev_count; |
| 209 | + uint8_t timelog; |
| 210 | + uint8_t socketid[HCA_ENTRY_SOCKETID_COUNT]; |
| 211 | +}; |
| 212 | + |
| 213 | +static inline unsigned long hotness_score(struct hca_entry * entry) |
| 214 | +{ |
| 215 | + unsigned long hotness; |
| 216 | + |
| 217 | + /* |
| 218 | + * Give more weightage to the prev_count because it got |
| 219 | + * historical values. Take smaller part of count as we |
| 220 | + * age more because prev_count would be a better approximation. |
| 221 | + * We still need to consider count to accomidate spike in access. |
| 222 | + * + 1 with age to handle age == 0. |
| 223 | + */ |
| 224 | + hotness = entry->prev_count + (entry->count / (entry->age + 1)); |
| 225 | + |
| 226 | + return hotness; |
| 227 | +} |
| 228 | + |
| 229 | +extern bool hca_lru_age; |
| 230 | +extern bool hca_lru_evict; |
| 231 | +extern int (*hca_pfn_entry)(unsigned long pfn, struct hca_entry *entry); |
| 232 | +extern bool (*hca_node_enabled)(int numa_node); |
| 233 | +extern void (*hca_backend_node_debugfs_init)(int numa_node, struct dentry *node_dentry); |
| 234 | +extern void (*hca_backend_debugfs_init)(struct dentry *root_dentry); |
| 235 | +extern int (*hca_clear_entry)(unsigned long pfn); |
| 236 | +int map_hca_lru_seq(struct lruvec *lruvec, struct folio *folio); |
| 237 | +bool hca_try_to_inc_max_seq(struct lruvec *lruvec, unsigned long nr_to_scan, unsigned long max_seq); |
| 238 | +void restablish_hotness_range(int node); |
| 239 | +#endif /* _ASM_POWERPC_HCA_H */ |
0 commit comments