diff --git a/.gitignore b/.gitignore
index 1b28f21..cf7118a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,7 @@
 *.o
 *.i
 *.s
+*.d
 
 *.png
 *.jpg
diff --git a/Makefile b/Makefile
index 8a2b503..612c2ea 100644
--- a/Makefile
+++ b/Makefile
@@ -1,12 +1,14 @@
 CC:=gcc
-CFLAGS:=-std=c11 -Wall -Wextra -Winline -pedantic -Ofast -march=pentium4 -s -DNDEBUG
+CFLAGS:=-std=c11 -Wall -Wextra -Winline -pedantic -Ofast -march=pentium4 -DNDEBUG
 #CFLAGS:=-std=c11 -Wall -Wextra -pedantic -Og -g
+LFLAGS:=-s
 LIBS:=-ljpeg -lpng -lfftw3f -lm
+OBJS:=jpeg2png.o utils.o jpeg.o png.o box.o upsample.o compute.o
 
-SRCS:=jpeg2png.c utils.c jpeg.c
+jpeg2png: $(OBJS)
+	$(CC) $^ -o $@ $(LFLAGS) $(LIBS)
 
-jpeg2png: jpeg2png.o utils.o jpeg.o png.o box.o
-	$(CC) $^ -o $@ $(LIBS)
+-include $(OBJS:.o=.d)
 
-%o: %.c *.h
-	$(CC) $< -o $@ $(CFLAGS)
+%.o: %.c
+	$(CC) -MMD $< -c -o $@ $(CFLAGS)
diff --git a/compute.c b/compute.c
new file mode 100644
index 0000000..fcc6641
--- /dev/null
+++ b/compute.c
@@ -0,0 +1,218 @@
+#include <stdint.h>
+#include <fftw3.h>
+
+#include "jpeg2png.h"
+#include "compute.h"
+#include "utils.h"
+#include "box.h"
+
+static float compute_step(struct coef *coef, float *objective_gradient, float *fdata_x, float *fdata_y, float weight, float step_size) {
+        int w = coef->w;
+        int h = coef->h;
+        float *fdata = coef->fdata;
+        float alpha = weight / sqrt(4. / 2.);
+
+        for(int i = 0; i < h * w; i++) {
+                objective_gradient[i] = 0.;
+        }
+
+        float tv = 0.;
+        for(int y = 0; y < h; y++) {
+                for(int x = 0; x < w; x++) {
+                        // forward gradient x
+                        float g_x = x >= w-1 ? 0. : *p(fdata, x+1, y, w, h) - *p(fdata, x, y, w, h);
+                        // forward gradient y
+                        float g_y = y >= h-1 ? 0. : *p(fdata, x, y+1, w, h) - *p(fdata, x, y, w, h);
+                        // norm
+                        float g_norm = sqrt(g_x * g_x + g_y * g_y);
+                        tv += g_norm;
+                        // compute derivatives
+                        if(g_norm != 0) {
+                                *p(objective_gradient, x, y, w, h) += -(g_x + g_y) / g_norm;
+                                if(x < w-1) {
+                                        *p(objective_gradient, x+1, y, w, h) += g_x / g_norm;
+                                }
+                                if(y < h-1) {
+                                        *p(objective_gradient, x, y+1, w, h) += g_y / g_norm;
+                                }
+                        }
+                        if(alpha != 0.) {
+                                *p(fdata_x, x, y, w, h) = g_x;
+                                *p(fdata_y, x, y, w, h) = g_y;
+                        }
+                }
+        }
+        // printf("tv = %f, %f\n", tv, tv / (w * h) / sqrt(2.));
+
+        float tv2 = 0;
+        if(alpha != 0.) {
+                for(int y = 0; y < h; y++) {
+                        for(int x = 0; x < w; x++) {
+                                // backward x
+                                float g_xx = x <= 0 ? 0. : *p(fdata_x, x, y, w, h) - *p(fdata_x, x-1, y, w, h);
+                                // backward x
+                                float g_yx = x <= 0 ? 0. : *p(fdata_y, x, y, w, h) - *p(fdata_y, x-1, y, w, h);
+                                // backward y
+                                float g_xy = y <= 0 ? 0. : *p(fdata_x, x, y, w, h) - *p(fdata_x, x, y-1, w, h);
+                                // backward y
+                                float g_yy = y <= 0 ? 0. : *p(fdata_y, x, y, w, h) - *p(fdata_y, x, y-1, w, h);
+                                // norm
+                                float g2_norm = sqrt(g_xx * g_xx + g_yx * g_yx + g_xy * g_xy + g_yy * g_yy);
+                                tv2 += g2_norm;
+                                // compute derivatives
+                                if(g2_norm != 0.) {
+                                        *p(objective_gradient, x, y, w, h) += alpha * (-(2. * g_xx + g_xy + g_yx + 2. *  g_yy) / g2_norm);
+                                        if(x > 0) {
+                                                *p(objective_gradient, x-1, y, w, h) += alpha * ((g_yx + g_xx) / g2_norm);
+                                        }
+                                        if(x < w-1) {
+                                                *p(objective_gradient, x+1, y, w, h) += alpha * ((g_xx + g_xy) / g2_norm);
+                                        }
+                                        if(y > 0) {
+                                                *p(objective_gradient, x, y-1, w, h) += alpha * ((g_yy + g_xy) / g2_norm);
+                                        }
+                                        if(y < h-1) {
+                                                *p(objective_gradient, x, y+1, w, h) += alpha * ((g_yy + g_yx) / g2_norm);
+                                        }
+                                        if(x < w-1 && y > 0) {
+                                                *p(objective_gradient, x+1, y-1, w, h) += alpha * ((-g_xy) / g2_norm);
+                                        }
+                                        if(x > 0 && y < h-1) {
+                                                *p(objective_gradient, x-1, y+1, w, h) += alpha * ((-g_yx) / g2_norm);
+                                        }
+                                }
+                        }
+                }
+                // printf("tv2 = %f, %f\n", tv2, tv2 / (w * h));
+        }
+
+        for(int i = 0; i < h * w; i++) {
+                fdata[i] -= step_size * (objective_gradient[i] / (alpha + 1.));
+        }
+
+        return (tv + alpha * tv2) / (alpha + 1.);
+}
+
+struct compute_projection_aux {
+        float * restrict q_min;
+        float * restrict q_max;
+        float *temp;
+        fftwf_plan dct;
+        fftwf_plan idct;
+};
+
+static void compute_projection_init(struct coef *coef, uint16_t quant_table[64],struct compute_projection_aux *aux) {
+        int w = coef->w;
+        int h = coef->h;
+
+        float *q_max = fftwf_alloc_real(h * w);
+        if(!q_max) { die("allocation error"); }
+        float *q_min = fftwf_alloc_real(h * w);
+        if(!q_min) { die("allocation error"); }
+        int blocks = (h / 8) * (w / 8);
+
+        for(int i = 0; i < blocks; i++) {
+                for(int j = 0; j < 64; j++) {
+                       q_max[i*64+j] = (coef->data[i*64+j] + 0.5) * quant_table[j];
+                       q_min[i*64+j] = (coef->data[i*64+j] - 0.5) * quant_table[j];
+                }
+        }
+
+        for(int i = 0; i < blocks; i++) {
+                for(int v = 0; v < 8; v++) {
+                        for(int u = 0; u < 8; u++) {
+                                q_max[i*64 + v*8+u] /= a(u) * a(v);
+                                q_min[i*64 + v*8+u] /= a(u) * a(v);
+                        }
+                }
+        }
+
+        aux->q_min = q_min;
+        aux->q_max = q_max;
+
+        float *temp = fftwf_alloc_real(h * w);
+        if(!temp) { die("allocation error"); }
+
+        aux->temp = temp;
+
+        fftwf_plan dct = fftwf_plan_many_r2r(
+                2, (int[]){8, 8}, blocks,
+                temp, (int[]){8, 8}, 1, 64,
+                temp, (int[]){8, 8}, 1, 64,
+                (void*)(int[]){FFTW_REDFT10, FFTW_REDFT10}, FFTW_ESTIMATE);
+
+        aux->dct = dct;
+
+        fftwf_plan idct = fftwf_plan_many_r2r(
+                2, (int[]){8, 8}, blocks,
+                temp, (int[]){8, 8}, 1, 64,
+                temp, (int[]){8, 8}, 1, 64,
+                (void*)(int[]){FFTW_REDFT01, FFTW_REDFT01}, FFTW_ESTIMATE);
+
+        aux->idct = idct;
+}
+
+static void compute_projection_destroy(struct compute_projection_aux *aux) {
+        fftwf_destroy_plan(aux->idct);
+        fftwf_destroy_plan(aux->dct);
+        fftwf_free(aux->temp);
+        fftwf_free(aux->q_min);
+        fftwf_free(aux->q_max);
+}
+
+static void compute_projection(struct coef *coef, struct compute_projection_aux *aux) {
+        int w = coef->w;
+        int h = coef->h;
+        float *fdata = coef->fdata;
+
+        float *temp = aux->temp;
+
+        int blocks = (h / 8) * (w / 8);
+
+        box(fdata, temp, w, h);
+
+        fftwf_execute(aux->dct);
+        for(int i = 0; i < h * w; i++) {
+                temp[i] /= 16.;
+        }
+
+        for(int i = 0; i < h * w; i++) {
+                temp[i] = CLAMP(temp[i], aux->q_min[i], aux->q_max[i]);
+        }
+
+        fftwf_execute(aux->idct);
+        for(int i = 0; i < blocks * 64; i++) {
+                temp[i] /= 16.;
+        }
+
+        unbox(temp, fdata, w, h);
+}
+
+void compute(struct coef *coef, uint16_t quant_table[64]) {
+        struct compute_projection_aux cpa;
+        compute_projection_init(coef, quant_table, &cpa);
+
+        int h = coef->h;
+        int w = coef->w;
+
+        float *fdata_x = fftwf_alloc_real(h * w);
+        if(!fdata_x) { die("allocation error"); }
+        float *fdata_y = fftwf_alloc_real(h * w);
+        if(!fdata_y) { die("allocation error"); }
+        float *objective_gradient = fftwf_alloc_real(h * w);
+        if(!objective_gradient) { die("allocation error"); }
+
+        const int N = 100;
+        float tv;
+        for(int i = 0; i < N; i++) {
+                compute_projection(coef, &cpa);
+                tv = compute_step(coef, fdata_x, fdata_y, objective_gradient, 0.3, 1. / sqrt(1 + N));
+        }
+
+        printf("objective = %f, %f\n", tv, tv / (coef->w * coef->h) / sqrt(2.));
+
+        fftwf_free(fdata_x);
+        fftwf_free(fdata_y);
+        fftwf_free(objective_gradient);
+        compute_projection_destroy(&cpa);
+}
diff --git a/compute.h b/compute.h
new file mode 100644
index 0000000..96c368a
--- /dev/null
+++ b/compute.h
@@ -0,0 +1,8 @@
+#ifndef JPEG2PNG_COMPUTE_H
+#define JPEG2PNG_COMPUTE_H
+
+#include <stdint.h>
+
+void compute(struct coef *coef, uint16_t quant_table[64]);
+
+#endif
diff --git a/jpeg.c b/jpeg.c
index 45e866f..1c8aa00 100644
--- a/jpeg.c
+++ b/jpeg.c
@@ -1,9 +1,12 @@
+#include <assert.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <stdbool.h>
+#include <stdint.h>
 #include <string.h>
 
 #include <jpeglib.h>
+#include <fftw3.h>
 
 #include "jpeg.h"
 #include "utils.h"
@@ -47,3 +50,29 @@ void read_jpeg(FILE *in, struct jpeg *jpeg) {
         }
         jpeg_destroy_decompress(&d);
 }
+
+void decode_coefficients(struct coef *coef, uint16_t *quant_table) {
+        coef->fdata = fftwf_alloc_real(coef->h * coef->w);
+        if(!coef->fdata) { die("allocation error"); }
+        int blocks = (coef->h / 8) * (coef->w / 8);
+        for(int i = 0; i < blocks; i++) {
+                for(int j = 0; j < 64; j++) {
+                        coef->fdata[i*64+j] = coef->data[i*64+j] * quant_table[j];
+                }
+                for(int v = 0; v < 8; v++) {
+                        for(int u = 0; u < 8; u++) {
+                                coef->fdata[i*64 + v*8+u] /= a(u) * a(v);
+                        }
+                }
+        }
+        fftwf_plan p = fftwf_plan_many_r2r(
+                2, (int[]){8, 8}, blocks,
+                coef->fdata, (int[]){8, 8}, 1, 64,
+                coef->fdata, (int[]){8, 8}, 1, 64,
+                (void*)(int[]){FFTW_REDFT01, FFTW_REDFT01}, FFTW_ESTIMATE);
+        fftwf_execute(p);
+        fftwf_destroy_plan(p);
+        for(int i = 0; i < blocks * 64; i++) {
+                coef->fdata[i] /= 16.;
+        }
+}
diff --git a/jpeg.h b/jpeg.h
index 41c5811..c6711e0 100644
--- a/jpeg.h
+++ b/jpeg.h
@@ -2,6 +2,7 @@
 #define JPEG2PNG_JPEG_H
 
 #include <stdio.h>
+#include <stdint.h>
 
 #include "jpeg2png.h"
 
@@ -13,4 +14,5 @@ struct jpeg {
 };
 
 void read_jpeg(FILE *in, struct jpeg *jpeg);
+void decode_coefficients(struct coef *coef, uint16_t *quant_table);
 #endif
diff --git a/jpeg2png.c b/jpeg2png.c
index 5e9be60..c4ab11e 100644
--- a/jpeg2png.c
+++ b/jpeg2png.c
@@ -12,255 +12,8 @@
 #include "jpeg.h"
 #include "png.h"
 #include "box.h"
-
-static void element_product(const int16_t data[64], const uint16_t quant_table[64], float *out) {
-        for(int i = 0; i < 64; i++) {
-                out[i] = data[i] * quant_table[i];
-        }
-}
-
-static float a(int n) {
-        if(n == 0) {
-                return 1./sqrt(2.);
-        } else {
-                return 1.;
-        }
-}
-
-void decode_coefficients(struct coef *coef, uint16_t *quant_table) {
-        coef->fdata = fftwf_alloc_real(coef->h * coef->w);
-        if(!coef->fdata) { die("allocation error"); }
-        int blocks = (coef->h / 8) * (coef->w / 8);
-        for(int i = 0; i < blocks; i++) {
-                element_product(&coef->data[i*64], quant_table, &coef->fdata[i*64]);
-                for(int v = 0; v < 8; v++) {
-                        for(int u = 0; u < 8; u++) {
-                                coef->fdata[i*64 + v*8+u] /= a(u) * a(v);
-                        }
-                }
-        }
-        fftwf_plan p = fftwf_plan_many_r2r(
-                2, (int[]){8, 8}, blocks,
-                coef->fdata, (int[]){8, 8}, 1, 64,
-                coef->fdata, (int[]){8, 8}, 1, 64,
-                (void*)(int[]){FFTW_REDFT01, FFTW_REDFT01}, FFTW_ESTIMATE);
-        fftwf_execute(p);
-        fftwf_destroy_plan(p);
-        for(int i = 0; i < blocks * 64; i++) {
-                coef->fdata[i] /= 16.;
-        }
-}
-
-static float compute_step(struct coef *coef, float *objective_gradient, float *fdata_x, float *fdata_y, float weight, float step_size) {
-        int w = coef->w;
-        int h = coef->h;
-        float *fdata = coef->fdata;
-        float alpha = weight / sqrt(4. / 2.);
-
-        for(int i = 0; i < h * w; i++) {
-                objective_gradient[i] = 0.;
-        }
-
-        float tv = 0.;
-        for(int y = 0; y < h; y++) {
-                for(int x = 0; x < w; x++) {
-                        // forward gradient x
-                        float g_x = x >= w-1 ? 0. : *p(fdata, x+1, y, w, h) - *p(fdata, x, y, w, h);
-                        // forward gradient y
-                        float g_y = y >= h-1 ? 0. : *p(fdata, x, y+1, w, h) - *p(fdata, x, y, w, h);
-                        // norm
-                        float g_norm = sqrt(g_x * g_x + g_y * g_y);
-                        tv += g_norm;
-                        // compute derivatives
-                        if(g_norm != 0) {
-                                *p(objective_gradient, x, y, w, h) += -(g_x + g_y) / g_norm;
-                                if(x < w-1) {
-                                        *p(objective_gradient, x+1, y, w, h) += g_x / g_norm;
-                                }
-                                if(y < h-1) {
-                                        *p(objective_gradient, x, y+1, w, h) += g_y / g_norm;
-                                }
-                        }
-                        if(alpha != 0.) {
-                                *p(fdata_x, x, y, w, h) = g_x;
-                                *p(fdata_y, x, y, w, h) = g_y;
-                        }
-                }
-        }
-        // printf("tv = %f, %f\n", tv, tv / (w * h) / sqrt(2.));
-
-        float tv2 = 0;
-        if(alpha != 0.) {
-                for(int y = 0; y < h; y++) {
-                        for(int x = 0; x < w; x++) {
-                                // backward x
-                                float g_xx = x <= 0 ? 0. : *p(fdata_x, x, y, w, h) - *p(fdata_x, x-1, y, w, h);
-                                // backward x
-                                float g_yx = x <= 0 ? 0. : *p(fdata_y, x, y, w, h) - *p(fdata_y, x-1, y, w, h);
-                                // backward y
-                                float g_xy = y <= 0 ? 0. : *p(fdata_x, x, y, w, h) - *p(fdata_x, x, y-1, w, h);
-                                // backward y
-                                float g_yy = y <= 0 ? 0. : *p(fdata_y, x, y, w, h) - *p(fdata_y, x, y-1, w, h);
-                                // norm
-                                float g2_norm = sqrt(g_xx * g_xx + g_yx * g_yx + g_xy * g_xy + g_yy * g_yy);
-                                tv2 += g2_norm;
-                                // compute derivatives
-                                if(g2_norm != 0.) {
-                                        *p(objective_gradient, x, y, w, h) += alpha * (-(2. * g_xx + g_xy + g_yx + 2. *  g_yy) / g2_norm);
-                                        if(x > 0) {
-                                                *p(objective_gradient, x-1, y, w, h) += alpha * ((g_yx + g_xx) / g2_norm);
-                                        }
-                                        if(x < w-1) {
-                                                *p(objective_gradient, x+1, y, w, h) += alpha * ((g_xx + g_xy) / g2_norm);
-                                        }
-                                        if(y > 0) {
-                                                *p(objective_gradient, x, y-1, w, h) += alpha * ((g_yy + g_xy) / g2_norm);
-                                        }
-                                        if(y < h-1) {
-                                                *p(objective_gradient, x, y+1, w, h) += alpha * ((g_yy + g_yx) / g2_norm);
-                                        }
-                                        if(x < w-1 && y > 0) {
-                                                *p(objective_gradient, x+1, y-1, w, h) += alpha * ((-g_xy) / g2_norm);
-                                        }
-                                        if(x > 0 && y < h-1) {
-                                                *p(objective_gradient, x-1, y+1, w, h) += alpha * ((-g_yx) / g2_norm);
-                                        }
-                                }
-                        }
-                }
-                // printf("tv2 = %f, %f\n", tv2, tv2 / (w * h));
-        }
-
-        for(int i = 0; i < h * w; i++) {
-                fdata[i] -= step_size * (objective_gradient[i] / (alpha + 1.));
-        }
-
-        return (tv + alpha * tv2) / (alpha + 1.);
-}
-
-struct compute_projection_aux {
-        float * restrict q_min;
-        float * restrict q_max;
-        float *temp;
-        fftwf_plan dct;
-        fftwf_plan idct;
-};
-
-static void compute_projection_init(struct coef *coef, uint16_t quant_table[64],struct compute_projection_aux *aux) {
-        int w = coef->w;
-        int h = coef->h;
-
-        float *q_max = fftwf_alloc_real(h * w);
-        if(!q_max) { die("allocation error"); }
-        float *q_min = fftwf_alloc_real(h * w);
-        if(!q_min) { die("allocation error"); }
-        int blocks = (h / 8) * (w / 8);
-
-        for(int i = 0; i < blocks; i++) {
-                for(int j = 0; j < 64; j++) {
-                       q_max[i*64+j] = (coef->data[i*64+j] + 0.5) * quant_table[j];
-                       q_min[i*64+j] = (coef->data[i*64+j] - 0.5) * quant_table[j];
-                }
-        }
-
-        for(int i = 0; i < blocks; i++) {
-                for(int v = 0; v < 8; v++) {
-                        for(int u = 0; u < 8; u++) {
-                                q_max[i*64 + v*8+u] /= a(u) * a(v);
-                                q_min[i*64 + v*8+u] /= a(u) * a(v);
-                        }
-                }
-        }
-
-        aux->q_min = q_min;
-        aux->q_max = q_max;
-
-        float *temp = fftwf_alloc_real(h * w);
-        if(!temp) { die("allocation error"); }
-
-        aux->temp = temp;
-
-        fftwf_plan dct = fftwf_plan_many_r2r(
-                2, (int[]){8, 8}, blocks,
-                temp, (int[]){8, 8}, 1, 64,
-                temp, (int[]){8, 8}, 1, 64,
-                (void*)(int[]){FFTW_REDFT10, FFTW_REDFT10}, FFTW_ESTIMATE);
-
-        aux->dct = dct;
-
-        fftwf_plan idct = fftwf_plan_many_r2r(
-                2, (int[]){8, 8}, blocks,
-                temp, (int[]){8, 8}, 1, 64,
-                temp, (int[]){8, 8}, 1, 64,
-                (void*)(int[]){FFTW_REDFT01, FFTW_REDFT01}, FFTW_ESTIMATE);
-
-        aux->idct = idct;
-}
-
-static void compute_projection_destroy(struct compute_projection_aux *aux) {
-        fftwf_destroy_plan(aux->idct);
-        fftwf_destroy_plan(aux->dct);
-        fftwf_free(aux->temp);
-        fftwf_free(aux->q_min);
-        fftwf_free(aux->q_max);
-}
-
-static void compute_projection(struct coef *coef, struct compute_projection_aux *aux) {
-        int w = coef->w;
-        int h = coef->h;
-        float *fdata = coef->fdata;
-
-        float *temp = aux->temp;
-
-        int blocks = (h / 8) * (w / 8);
-
-        box(fdata, temp, w, h);
-
-        fftwf_execute(aux->dct);
-        for(int i = 0; i < h * w; i++) {
-                temp[i] /= 16.;
-        }
-
-        for(int i = 0; i < h * w; i++) {
-                temp[i] = CLAMP(temp[i], aux->q_min[i], aux->q_max[i]);
-        }
-
-        fftwf_execute(aux->idct);
-        for(int i = 0; i < blocks * 64; i++) {
-                temp[i] /= 16.;
-        }
-
-        unbox(temp, fdata, w, h);
-}
-
-static void compute(struct coef *coef, uint16_t quant_table[64]) {
-        struct compute_projection_aux cpa;
-        compute_projection_init(coef, quant_table, &cpa);
-
-        int h = coef->h;
-        int w = coef->w;
-
-        float *fdata_x = fftwf_alloc_real(h * w);
-        if(!fdata_x) { die("allocation error"); }
-        float *fdata_y = fftwf_alloc_real(h * w);
-        if(!fdata_y) { die("allocation error"); }
-        float *objective_gradient = fftwf_alloc_real(h * w);
-        if(!objective_gradient) { die("allocation error"); }
-
-        const int N = 100;
-        float tv;
-        for(int i = 0; i < N; i++) {
-                compute_projection(coef, &cpa);
-                tv = compute_step(coef, fdata_x, fdata_y, objective_gradient, 0.3, 1. / sqrt(1 + N));
-        }
-
-        printf("objective = %f, %f\n", tv, tv / (coef->w * coef->h) / sqrt(2.));
-
-        fftwf_free(fdata_x);
-        fftwf_free(fdata_y);
-        fftwf_free(objective_gradient);
-        compute_projection_destroy(&cpa);
-}
+#include "upsample.h"
+#include "compute.h"
 
 int main(int argc, char *argv[]) {
         if(argc != 3) {
@@ -272,19 +25,15 @@ int main(int argc, char *argv[]) {
         FILE *out = fopen(argv[2], "wb");
         if(!out) { die(NULL); }
 
-        puts("reading jpeg");
         struct jpeg jpeg;
         read_jpeg(in, &jpeg);
         fclose(in);
 
-        START_TIMER(decoding_coefficients);
         for(int c = 0; c < 3; c++) {
                 struct coef *coef = &jpeg.coefs[c];
                 decode_coefficients(coef, jpeg.quant_table[c]);
         }
-        STOP_TIMER(decoding_coefficients);
 
-        START_TIMER(unboxing);
         for(int i = 0; i < 3; i++) {
                 struct coef *coef = &jpeg.coefs[i];
                 float *temp = fftwf_alloc_real(coef->h * coef->w);
@@ -295,7 +44,6 @@ int main(int argc, char *argv[]) {
                 fftwf_free(coef->fdata);
                 coef->fdata = temp;
         }
-        STOP_TIMER(unboxing);
 
         START_TIMER(computing);
         for(int i = 0; i < 3; i++) {
@@ -307,59 +55,19 @@ int main(int argc, char *argv[]) {
         }
         STOP_TIMER(computing);
 
-        START_TIMER(adjusting_luma);
         struct coef *coef = &jpeg.coefs[0];
         for(int i = 0; i < coef->h * coef->w; i++) {
                 coef->fdata[i] += 128.;
         }
-        STOP_TIMER(adjusting_luma);
 
         START_TIMER(upsampling);
         for(int i = 0; i < 3; i++) {
-                struct coef *coef = &jpeg.coefs[i];
-                if(coef->h < jpeg.h) {
-                        assert(coef->h / 8 == ((jpeg.h + 7) / 8 + 1) / 2);
-                        float *new = fftwf_alloc_real(coef->w * coef->h * 2);
-                        for(int y = 0; y < coef->h; y++) {
-                                for(int x = 0; x < coef->w; x++) {
-                                        new[(y*2)*coef->w + x] = coef->fdata[y * coef->w + x];
-                                }
-                                for(int x = 0; x < coef->w; x++) {
-                                        if(y == coef->h - 1) {
-                                                new[(y*2+1)*coef->w + x] = coef->fdata[y * coef->w + x];
-                                        } else {
-                                                new[(y*2+1)*coef->w + x] = (coef->fdata[y * coef->w + x] + coef->fdata[(y+1) * coef->w + x]) / 2;
-                                        }
-                                }
-                        }
-                        fftwf_free(coef->fdata);
-                        coef->fdata = new;
-                        coef->h = coef->h * 2;
-                }
-                if(coef->w < jpeg.w) {
-                        assert(coef->w / 8 == ((jpeg.w + 7) / 8 + 1) / 2);
-                        float *new = fftwf_alloc_real(coef->w * coef->h * 2);
-                        for(int y = 0; y < coef->h; y++) {
-                                for(int x = 0; x < coef->w; x++) {
-                                        new[(y*coef->w + x)*2] = coef->fdata[y * coef->w + x];
-                                        if(x == coef->w - 1) {
-                                                new[(y*coef->w + x)*2 + 1] = coef->fdata[y * coef->w + x];
-                                        } else {
-                                                new[(y*coef->w + x)*2 + 1] = (coef->fdata[y * coef->w + x] + coef->fdata[y * coef->w + x + 1]) / 2;
-                                        }
-                                }
-                        }
-                        fftwf_free(coef->fdata);
-                        coef->fdata = new;
-                        coef->w = coef->w * 2;
-                }
+                upsample(&jpeg.coefs[i], jpeg.w, jpeg.h);
         }
         STOP_TIMER(upsampling);
 
-        START_TIMER(writing_png);
         write_png(out, jpeg.w, jpeg.h, &jpeg.coefs[0], &jpeg.coefs[1], &jpeg.coefs[2]);
         fclose(out);
-        STOP_TIMER(writing_png);
 
         for(int i = 0; i < 3; i++) {
                 fftwf_free(jpeg.coefs[i].fdata);
diff --git a/upsample.c b/upsample.c
new file mode 100644
index 0000000..7283007
--- /dev/null
+++ b/upsample.c
@@ -0,0 +1,44 @@
+#include <assert.h>
+
+#include <fftw3.h>
+
+#include "jpeg2png.h"
+
+void upsample(struct coef *coef, int w, int h) {
+        if(coef->h < h) {
+                assert(coef->h / 8 == ((h + 7) / 8 + 1) / 2);
+                float *new = fftwf_alloc_real(coef->w * coef->h * 2);
+                for(int y = 0; y < coef->h; y++) {
+                        for(int x = 0; x < coef->w; x++) {
+                                new[(y*2)*coef->w + x] = coef->fdata[y * coef->w + x];
+                        }
+                        for(int x = 0; x < coef->w; x++) {
+                                if(y == coef->h - 1) {
+                                        new[(y*2+1)*coef->w + x] = coef->fdata[y * coef->w + x];
+                                } else {
+                                        new[(y*2+1)*coef->w + x] = (coef->fdata[y * coef->w + x] + coef->fdata[(y+1) * coef->w + x]) / 2;
+                                }
+                        }
+                }
+                fftwf_free(coef->fdata);
+                coef->fdata = new;
+                coef->h = coef->h * 2;
+        }
+        if(coef->w < w) {
+                assert(coef->w / 8 == ((w + 7) / 8 + 1) / 2);
+                float *new = fftwf_alloc_real(coef->w * coef->h * 2);
+                for(int y = 0; y < coef->h; y++) {
+                        for(int x = 0; x < coef->w; x++) {
+                                new[(y*coef->w + x)*2] = coef->fdata[y * coef->w + x];
+                                if(x == coef->w - 1) {
+                                        new[(y*coef->w + x)*2 + 1] = coef->fdata[y * coef->w + x];
+                                } else {
+                                        new[(y*coef->w + x)*2 + 1] = (coef->fdata[y * coef->w + x] + coef->fdata[y * coef->w + x + 1]) / 2;
+                                }
+                        }
+                }
+                fftwf_free(coef->fdata);
+                coef->fdata = new;
+                coef->w = coef->w * 2;
+        }
+}
diff --git a/upsample.h b/upsample.h
new file mode 100644
index 0000000..9e20e64
--- /dev/null
+++ b/upsample.h
@@ -0,0 +1,6 @@
+#ifndef JPEG2PNG_UPSAMPLE_H
+#define JPEG2PNG_UPSAMPLE_H
+
+void upsample(struct coef *coef, int w, int h);
+
+#endif
diff --git a/utils.h b/utils.h
index e304b49..b1dfb55 100644
--- a/utils.h
+++ b/utils.h
@@ -1,8 +1,10 @@
 #ifndef JPEG2PNG_UTILS_H
 #define JPEG2PNG_UTILS_H
 
+#include <assert.h>
 #include <stdnoreturn.h>
 #include <time.h>
+#include <math.h>
 
 noreturn void die(const char *msg, ...);
 clock_t start_timer(const char *name);
@@ -32,4 +34,12 @@ inline float *p(float *in, int x, int y, int w, int h) {
         return &in[y * w + x];
 }
 
+inline float a(int n) {
+        if(n == 0) {
+                return 1./sqrt(2.);
+        } else {
+                return 1.;
+        }
+}
+
 #endif