Skip to content

Commit edefe47

Browse files
increment iter per eval instead of epoch
1 parent 884431c commit edefe47

File tree

3 files changed

+31
-25
lines changed

3 files changed

+31
-25
lines changed

examples/mnist/mnist-common.cpp

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -555,15 +555,20 @@ void mnist_model_train(mnist_model & model, const float * images, const float *
555555
// For the last iteration, calculate gradients and also apply the optimizer:
556556
ggml_backend_graph_compute(model.backend, gb_opt);
557557
ggml_graph_reset(gb_grad); // Set gradients to zero, do not reset optimizer.
558-
}
559-
for (int j = 0; j < gb_grad->n_nodes; ++j) {
560-
struct ggml_tensor * node = gb_grad->nodes[j];
561558

562-
if (node->op != GGML_OP_OPT_STEP_ADAM) {
563-
continue;
564-
}
559+
// Increment iterations for the optimizer tensors:
560+
for (int j = 0; j < gb_opt->n_nodes; ++j) {
561+
struct ggml_tensor * node = gb_opt->nodes[j];
565562

566-
node->op_params[0]++;
563+
if (node->op != GGML_OP_OPT_STEP_ADAM) {
564+
continue;
565+
}
566+
567+
int64_t iter;
568+
memcpy(&iter, node->op_params + 0, sizeof(int64_t));
569+
iter++;
570+
memcpy(node->op_params + 0, &iter, sizeof(int64_t));
571+
}
567572
}
568573

569574
ggml_backend_tensor_get(model.loss, &loss, 0, ggml_nbytes(model.loss));

src/ggml-cuda/opt-step-adam.cu

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -63,12 +63,12 @@ void ggml_cuda_opt_step_adam(ggml_backend_cuda_context & ctx, ggml_tensor * dst)
6363

6464
const int64_t ne = ggml_nelements(src0);
6565

66-
int32_t iter; memcpy(&iter, &dst->op_params[0], sizeof(float));
67-
float alpha; memcpy(&alpha, &dst->op_params[1], sizeof(float));
68-
float beta1; memcpy(&beta1, &dst->op_params[2], sizeof(float));
69-
float beta2; memcpy(&beta2, &dst->op_params[3], sizeof(float));
70-
float eps; memcpy(&eps, &dst->op_params[4], sizeof(float));
71-
float l1; memcpy(&l1, &dst->op_params[5], sizeof(float));
66+
int64_t iter; memcpy(&iter, &dst->op_params[0], sizeof(int64_t));
67+
float alpha; memcpy(&alpha, &dst->op_params[2], sizeof(float));
68+
float beta1; memcpy(&beta1, &dst->op_params[3], sizeof(float));
69+
float beta2; memcpy(&beta2, &dst->op_params[4], sizeof(float));
70+
float eps; memcpy(&eps, &dst->op_params[5], sizeof(float));
71+
float l1; memcpy(&l1, &dst->op_params[6], sizeof(float));
7272

7373
const float beta1h = alpha/(1.0f - powf(beta1, iter));
7474
const float beta2h = 1.0f/(1.0f - powf(beta2, iter));

src/ggml.c

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8132,12 +8132,13 @@ struct ggml_tensor * ggml_opt_step_adam(
81328132
result->src[2] = ggml_dup_tensor(ctx, a->grad);
81338133
result->src[3] = ggml_dup_tensor(ctx, a->grad);
81348134

8135-
ggml_set_op_params_i32(result, 0, 1); // iteration
8136-
ggml_set_op_params_f32(result, 1, alpha);
8137-
ggml_set_op_params_f32(result, 2, beta1);
8138-
ggml_set_op_params_f32(result, 3, beta2);
8139-
ggml_set_op_params_f32(result, 4, eps);
8140-
ggml_set_op_params_f32(result, 5, l1);
8135+
const int64_t iter = 1;
8136+
memcpy(result->op_params + 0, &iter, sizeof(int64_t));
8137+
ggml_set_op_params_f32(result, 2, alpha);
8138+
ggml_set_op_params_f32(result, 3, beta1);
8139+
ggml_set_op_params_f32(result, 4, beta2);
8140+
ggml_set_op_params_f32(result, 5, eps);
8141+
ggml_set_op_params_f32(result, 6, l1);
81418142

81428143
return result;
81438144
}
@@ -17162,12 +17163,12 @@ static void ggml_compute_forward_opt_step_adam_f32(
1716217163
const int ir1 = MIN(ir0 + dr, nr);
1716317164

1716417165
/* const float gnorm = 1.0f; */
17165-
const int32_t iter = ggml_get_op_params_i32(dst, 0);
17166-
const float alpha = ggml_get_op_params_f32(dst, 1);
17167-
const float beta1 = ggml_get_op_params_f32(dst, 2);
17168-
const float beta2 = ggml_get_op_params_f32(dst, 3);
17169-
const float eps = ggml_get_op_params_f32(dst, 4);
17170-
const float l1 = ggml_get_op_params_f32(dst, 5);
17166+
int64_t iter; memcpy(&iter, dst->op_params + 0, sizeof(int64_t));
17167+
const float alpha = ggml_get_op_params_f32(dst, 2);
17168+
const float beta1 = ggml_get_op_params_f32(dst, 3);
17169+
const float beta2 = ggml_get_op_params_f32(dst, 4);
17170+
const float eps = ggml_get_op_params_f32(dst, 5);
17171+
const float l1 = ggml_get_op_params_f32(dst, 6);
1717117172

1717217173
const float beta1h = alpha/(1.0f - powf(beta1, iter));
1717317174
const float beta2h = 1.0f/(1.0f - powf(beta2, iter));

0 commit comments

Comments
 (0)