-
Notifications
You must be signed in to change notification settings - Fork 0
/
undistort.cc
360 lines (324 loc) · 13 KB
/
undistort.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
/**
\file undistort.cc
Apply Lensfun corrections to a PNM file in place. This means, the original
file is overwritten. The command line parameters are:
- path to the PNM file
- x coordinate of top left corner
- y coordinate of top left corner
- x coordinate of top right corner
- y coordinate of top right corner
- x coordinate of bottom left corner
- y coordinate of bottom left corner
- x coordinate of bottom right corner
- y coordinate of bottom right corner
- Lensfun name of camera make
- Lensfun name of camera model
- Lensfun name of lens make
- Lensfun name of lens model
All coordinates are pixel coordinates, with the top left of the image the
origin. The corners must be the corners of a perfect rectangle which was
taken a picture of, e.g. a sheet of paper. These are used for the
perspective correction as well as the rotation, so that the edges of the
rectangle are parellel to the image borders.
The program returns the position and the dimensions of the rectangle <b>in
the output image</b> to stdout in JSON format:
\code{.json}
[x₀, y₀, width, height]
\endcode
Here, x₀ and y₀ are the coordinates of the top left corner, and width and
height are the dimensions of the rectangle.
This program does not apply colour corrections such as vignetting
correction, as those are handled by kamscan.py using flat field images.
*/
#include <Python.h>
#include <fstream>
#include <vector>
#include <iterator>
#include <iostream>
#include <string>
#include <algorithm>
#include <cmath>
#include "lensfun/lensfun.h"
/** Class for bitmap data.
In case of 2 bytes per channel, network byte order is assumed. */
class Image {
public:
Image(int width, int height, int channel_size, int channels);
Image() {};
Image(const Image &image);
/** Get the channel intensity at a certian coordinate.
\param x x coordinate
\param y y coordinate
\param channel index of the channel (for greyscale, it is always zero;
for RGB, it is 0, 1, or 2)
\return raw integer value of the intensity of this channel at this
position
*/
int get(int x, int y, int channel);
/** Get the channel intensity at a certian coordinate. The coordinates are
floats and may contain fractions. In this case, the intensity is
calculated using bilinear interpolation between the four pixels around
this coordinate.
\param x x coordinate
\param y y coordinate
\param channel index of the channel (for greyscale, it is always zero;
for RGB, it is 0, 1, or 2)
\return raw integer value of the intensity of this channel at this
position
*/
int get(float x, float y, int channel);
/** Set the channel intensity at a certian coordinate.
\param x x coordinate
\param y y coordinate
\param channel index of the channel (for greyscale, it is always zero;
for RGB, it is 0, 1, or 2)
\param value raw integer value of the intensity of this channel at this
position
*/
void set(int x, int y, int channel, int value);
/** Determine the channel descriptions. This is used by Lensfun internally
and necessary if you want to apply colour corrections, e.g. vignetting
correction.
\return the components of each pixel
*/
int components();
/** Determine the pixel format à la Lensfun. It is derived from
channel_size.
\return the pixel format as it is needed by Lensfun
*/
lfPixelFormat pixel_format();
int width, height; ///< width and height of the image in pixels
int channels; ///< number of channels; may be 1 (greyscale) or 3 (RGB)
/** the raw data (1:1 dump of the PNM content, without header)
*/
std::vector<unsigned char> data;
private:
friend std::istream& operator>>(std::istream &inputStream, Image &other);
friend std::ostream& operator<<(std::ostream &outputStream, const Image &other);
int channel_size; ///< width of one channel in bytes; may be 1 or 2
};
Image::Image(int width, int height, int channel_size, int channels) :
width(width), height(height), channel_size(channel_size), channels(channels)
{
data.resize(width * height * channel_size * channels);
}
Image::Image(const Image &image) :
width(image.width), height(image.height), channel_size(image.channel_size), channels(image.channels) {
data.resize(width * height * channel_size * channels);
}
int Image::get(int x, int y, int channel) {
if (x < 0 || x >= width || y < 0 || y >= height)
return 0;
int position = channel_size * (channels * (y * width + x) + channel);
int result = static_cast<int>(data[position]);
if (channel_size == 2)
result = (result << 8) + static_cast<int>(data[position + 1]);
return result;
}
int Image::get(float x, float y, int channel) {
float dummy;
int x0 = static_cast<int>(x);
int y0 = static_cast<int>(y);
float i0 = static_cast<float>(get(x0, y0, channel));
float i1 = static_cast<float>(get(x0 + 1, y0, channel));
float i2 = static_cast<float>(get(x0, y0 + 1, channel));
float i3 = static_cast<float>(get(x0 + 1, y0 + 1, channel));
float fraction_x = std::modf(x, &dummy);
float i01 = (1 - fraction_x) * i0 + fraction_x * i1;
float i23 = (1 - fraction_x) * i2 + fraction_x * i3;
float fraction_y = std::modf(y, &dummy);
return static_cast<int>(std::round((1 - fraction_y) * i01 + fraction_y * i23));
}
void Image::set(int x, int y, int channel, int value) {
if (x >= 0 && x < width && y >= 0 && y < height) {
int position = channel_size * (channels * (y * width + x) + channel);
if (channel_size == 1)
data[position] = static_cast<unsigned char>(value);
else if (channel_size == 2) {
data[position] = static_cast<unsigned char>(value >> 8);
data[position + 1] = static_cast<unsigned char>(value & 256);
}
}
}
int Image::components() {
switch (channels) {
case 1:
return LF_CR_1(INTENSITY);
case 3:
return LF_CR_3(RED, GREEN, BLUE);
default:
throw std::runtime_error("Invalid value of 'channels'.");
}
}
lfPixelFormat Image::pixel_format() {
switch (channel_size) {
case 1:
return LF_PF_U8;
case 2:
return LF_PF_U16;
default:
throw std::runtime_error("Invalid value of 'channel_size'.");
}
}
std::istream& operator>>(std::istream &inputStream, Image &other)
{
std::string magic_number;
int maximum_color_value;
inputStream >> magic_number;
if (magic_number == "P5")
other.channels = 1;
else if (magic_number == "P6")
other.channels = 3;
else
throw std::runtime_error("Invalid input file. Must start with 'P5' or 'P6'.");
inputStream >> other.width >> other.height >> maximum_color_value;
inputStream.get(); // skip the trailing white space
switch (maximum_color_value) {
case 255:
other.channel_size = 1;
break;
case 65535:
other.channel_size = 2;
break;
default:
throw std::runtime_error("Invalid PPM file: Maximum color value must be 255 or 65535.");
}
size_t size = other.width * other.height * other.channel_size * other.channels;
other.data.resize(size);
inputStream.read(reinterpret_cast<char*>(other.data.data()), size);
return inputStream;
}
std::ostream& operator<<(std::ostream &outputStream, const Image &other)
{
outputStream << (other.channels == 3 ? "P6" : "P5") << "\n"
<< other.width << " "
<< other.height << "\n"
<< (other.channel_size == 1 ? "255" : "65535") << "\n";
outputStream.write(reinterpret_cast<const char*>(other.data.data()), other.data.size());
return outputStream;
}
static PyObject *undistort(PyObject *self, PyObject *args) {
const char *filename, *camera_make, *camera_model, *lens_make, *lens_model;
float x0, y0, x1, y1, x2, y2, x3, y3;
PyArg_ParseTuple(args, "sffffffffssss", &filename, &x0, &y0, &x1, &y1, &x2, &y2, &x3, &y3,
&camera_make, &camera_model, &lens_make, &lens_model);
lfDatabase ldb;
if (ldb.Load() != LF_NO_ERROR) {
PyErr_SetString(PyExc_RuntimeError, "Database could not be loaded");
return NULL;
}
const lfCamera *camera;
const lfCamera **cameras = ldb.FindCamerasExt(camera_make, camera_model);
if (cameras && !cameras[1])
camera = cameras[0];
else {
std::vector<char> buffer(std::snprintf(nullptr, 0, "Cannot find unique camera in database. %i cameras found.",
(int)sizeof(cameras)));
std::snprintf(&buffer[0], buffer.size(), "Cannot find unique camera in database. %i cameras found.",
(int)sizeof(cameras));
// FixMe: Is the second argument really copied on the other side?
// Because buffer.data() will be free'ed after the return.
PyErr_SetString(PyExc_RuntimeError, buffer.data());
lf_free(cameras);
return NULL;
}
lf_free(cameras);
const lfLens *lens;
const lfLens **lenses = ldb.FindLenses(camera, lens_make, lens_model);
if (lenses) {
lens = lenses[0];
if (lenses[1])
PyErr_WarnEx(NULL, "Lens name ambiguous", 1);
} else {
PyErr_SetString(PyExc_RuntimeError, "Cannot find lens in database");
lf_free(lenses);
return NULL;
}
lf_free(lenses);
Image image;
{
std::ifstream file(filename, std::ios::binary);
file >> image;
}
lfModifier modifier(lens, 50, camera->CropFactor, image.width, image.height, image.pixel_format());
lfModifier pc_coord_modifier(lens, 50, camera->CropFactor, image.width, image.height, image.pixel_format(), true);
lfModifier back_modifier(lens, 50, camera->CropFactor, image.width, image.height, image.pixel_format(), true);
if (!modifier.EnableDistortionCorrection() || !back_modifier.EnableDistortionCorrection() ||
!pc_coord_modifier.EnableDistortionCorrection()) {
PyErr_SetString(PyExc_RuntimeError, "Failed to activate undistortion");
return NULL;
}
if (image.channels == 3)
if (!modifier.EnableTCACorrection()) {
PyErr_SetString(PyExc_RuntimeError, "Failed to activate un-TCA");
return NULL;
}
std::vector<float> x, y;
x.push_back(x0);
y.push_back(y0);
x.push_back(x2);
y.push_back(y2);
x.push_back(x1);
y.push_back(y1);
x.push_back(x3);
y.push_back(y3);
x.push_back(x0);
y.push_back(y0);
x.push_back(x1);
y.push_back(y1);
std::vector<float> x_undist, y_undist;
for (int i = 0; i < x.size(); i++) {
float result[2];
pc_coord_modifier.ApplyGeometryDistortion(x[i], y[i], 1, 1, result);
x_undist.push_back(result[0]);
y_undist.push_back(result[1]);
}
if (!modifier.EnablePerspectiveCorrection(x_undist.data(), y_undist.data(), 6, 0) ||
!back_modifier.EnablePerspectiveCorrection(x_undist.data(), y_undist.data(), 6, 0)) {
PyErr_SetString(PyExc_RuntimeError, "Failed to activate perspective correction");
return NULL;
}
std::vector<float> res(image.width * image.height * 2 * image.channels);
if (image.channels == 3)
modifier.ApplySubpixelGeometryDistortion(0, 0, image.width, image.height, res.data());
else
modifier.ApplyGeometryDistortion(0, 0, image.width, image.height, res.data());
Image new_image = image;
for (int x = 0; x < image.width; x++)
for (int y = 0; y < image.height; y++) {
int position = 2 * image.channels * (y * image.width + x);
float source_x_R = res[position];
float source_y_R = res[position + 1];
new_image.set(x, y, 0, image.get(source_x_R, source_y_R, 0));
if (image.channels == 3) {
float source_x_G = res[position + 2];
float source_y_G = res[position + 3];
float source_x_B = res[position + 4];
float source_y_B = res[position + 5];
new_image.set(x, y, 1, image.get(source_x_G, source_y_G, 1));
new_image.set(x, y, 2, image.get(source_x_B, source_y_B, 2));
}
}
std::ofstream file(filename, std::ios::binary);
file << new_image;
for (int i = 0; i < 4; i++) {
float result[2];
back_modifier.ApplyGeometryDistortion(x[i], y[i], 1, 1, result);
x[i] = result[0];
y[i] = result[1];
}
return Py_BuildValue("ffff", std::min(x[0], x[2]), std::min(y[0], y[1]),
std::max(x[1], x[3]) - std::min(x[0], x[2]), std::max(y[2], y[3]) - std::min(y[0], y[1]));
}
static PyMethodDef UndistortMethods[] = {
{"undistort", undistort, METH_VARARGS, "Undistort PNM image data."},
{NULL, NULL, 0, NULL}
};
static struct PyModuleDef undistortmodule = {
PyModuleDef_HEAD_INIT, "undistort", NULL, -1, UndistortMethods
};
PyMODINIT_FUNC
PyInit_undistort(void)
{
return PyModule_Create(&undistortmodule);
}