/* * Copyright 2018 Google Inc. * * Use of this source code is governed by a BSD-style license that can be * found in the LICENSE file. */ #include "../skcms.h" #include "GaussNewton.h" #include "LinearAlgebra.h" #include "TransferFunction.h" #include #include bool skcms_gauss_newton_step(float (* t)(float x, const void*), const void* t_ctx, float (* f)(float x, const void*, const float P[4]), const void* f_ctx, void (*grad_f)(float x, const void*, const float P[4], float dfdP[4]), const void* g_ctx, float P[4], float x0, float x1, int N) { // We'll sample x from the range [x0,x1] (both inclusive) N times with even spacing. // // We want to do P' = P + (Jf^T Jf)^-1 Jf^T r(P), // where r(P) is the residual vector t(x) - f(x,P) // and Jf is the Jacobian matrix of f(), ∂r/∂P. // // Let's review the shape of each of these expressions: // r(P) is [N x 1], a column vector with one entry per value of x tested // Jf is [N x 4], a matrix with an entry for each (x,P) pair // Jf^T is [4 x N], the transpose of Jf // // Jf^T Jf is [4 x N] * [N x 4] == [4 x 4], a 4x4 matrix, // and so is its inverse (Jf^T Jf)^-1 // Jf^T r(P) is [4 x N] * [N x 1] == [4 x 1], a column vector with the same shape as P // // Our implementation strategy to get to the final ∆P is // 1) evaluate Jf^T Jf, call that lhs // 2) evaluate Jf^T r(P), call that rhs // 3) invert lhs // 4) multiply inverse lhs by rhs // // This is a friendly implementation strategy because we don't have to have any // buffers that scale with N, and equally nice don't have to perform any matrix // operations that are variable size. // // Other implementation strategies could trade this off, e.g. evaluating the // pseudoinverse of Jf ( (Jf^T Jf)^-1 Jf^T ) directly, then multiplying that by // the residuals. That would probably require implementing singular value // decomposition, and would create a [4 x N] matrix to be multiplied by the // [N x 1] residual vector, but on the upside I think that'd eliminate the // possibility of this skcms_gauss_newton_step() function ever failing. // 0) start off with lhs and rhs safely zeroed. skcms_Matrix4x4 lhs = {{ {0,0,0,0}, {0,0,0,0}, {0,0,0,0}, {0,0,0,0} }}; skcms_Vector4 rhs = { {0,0,0,0} }; // 1,2) evaluate lhs and evaluate rhs // We want to evaluate Jf only once, but both lhs and rhs involve Jf^T, // so we'll have to update lhs and rhs at the same time. float dx = (x1-x0)/(N-1); for (int i = 0; i < N; i++) { float x = x0 + i*dx; float resid = t(x,t_ctx) - f(x,f_ctx,P); float dfdP[4] = {0,0,0,0}; grad_f(x,g_ctx,P, dfdP); for (int r = 0; r < 4; r++) { for (int c = 0; c < 4; c++) { lhs.vals[r][c] += dfdP[r] * dfdP[c]; } rhs.vals[r] += dfdP[r] * resid; } } // If any of the 4 P parameters are unused, this matrix will be singular. // Detect those cases and fix them up to indentity instead, so we can invert. for (int k = 0; k < 4; k++) { if (lhs.vals[0][k]==0 && lhs.vals[1][k]==0 && lhs.vals[2][k]==0 && lhs.vals[3][k]==0 && lhs.vals[k][0]==0 && lhs.vals[k][1]==0 && lhs.vals[k][2]==0 && lhs.vals[k][3]==0) { lhs.vals[k][k] = 1; } } // 3) invert lhs skcms_Matrix4x4 lhs_inv; if (!skcms_Matrix4x4_invert(&lhs, &lhs_inv)) { return false; } // 4) multiply inverse lhs by rhs skcms_Vector4 dP = skcms_Matrix4x4_Vector4_mul(&lhs_inv, &rhs); P[0] += dP.vals[0]; P[1] += dP.vals[1]; P[2] += dP.vals[2]; P[3] += dP.vals[3]; return true; } float skcms_eval_curve(float x, const void* vctx) { const skcms_Curve* curve = (const skcms_Curve*)vctx; if (curve->table_entries == 0) { return skcms_TransferFunction_eval(&curve->parametric, x); } // TODO: today we should always hit an entry exactly, but if that changes, lerp? // (We add half to account for slight int -> float -> int round tripping issues.) int ix = (int)( x*(curve->table_entries - 1) + 0.5f ); if (curve->table_8) { return curve->table_8[ix] * (1/255.0f); } else { uint16_t be; memcpy(&be, curve->table_16 + 2*ix, 2); uint16_t le = ((be << 8) | (be >> 8)) & 0xffff; return le * (1/65535.0f); } }