summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile10
-rw-r--r--nnls.c417
-rw-r--r--nnls.h1
-rw-r--r--nnls_solver.c169
4 files changed, 597 insertions, 0 deletions
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..47c2fcf
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,10 @@
+VERSION=\"v0.0.1\"
+CC = gcc
+NNLS_CFLAGS = -O3 -s -mtune=native -Wall -lm -lz -DVERSION=$(VERSION)
+
+all: nnls_solver
+
+nnls_solver:
+ $(CC) nnls_solver.c nnls.c -o nnls_solver $(NNLS_CFLAGS)
+clean:
+ rm -vf nnls_solver
diff --git a/nnls.c b/nnls.c
new file mode 100644
index 0000000..f45bfec
--- /dev/null
+++ b/nnls.c
@@ -0,0 +1,417 @@
+/*
+ * nnls.c (c) 2003-2009 Turku PET Centre
+ * This file contains the routine NNLS (nonnegative least squares)
+ * and the subroutines required by it, except h12, which is in
+ * file 'lss_h12.c'.
+ *
+ * This routine is based on the text and fortran code in
+ * C.L. Lawson and R.J. Hanson, Solving Least Squares Problems,
+ * Prentice-Hall, Englewood Cliffs, New Jersey, 1974.
+ * Version:
+ * 2002-08-19 Vesa Oikonen
+ * 2003-05-08 Kaisa Sederholm & VO
+ * Included function nnlsWght().
+ * 2003-05-12 KS
+ * Variable a_dim1 excluded
+ *
+ * Usage of the coefficient matrix altered so that it is
+ * given in a[][] instead of a[].
+ * 2003-11-06 VO
+ * If n<2, then itmax is set to n*n instead of previous n*3.
+ * 2004-09-17 VO
+ * Doxygen style comments.
+ * 2006-24-04 Pauli Sundberg
+ * Added some debuging output, and made some comments more precise.
+ * 2007-05-17 VO
+ * 2009-04-16 VO
+ * Corrected a bug in nnls() which may have caused an infinite loop.
+ * 009-04-27 VO
+ * Added function nnlsWghtSquared() for faster pixel-by-pixel calculations.
+ * Checking for exceeding iteration count is corrected in nnls().
+ */
+#include <stdio.h>
+#include <stdint.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <math.h>
+#define MAX(a,b) ((a) >= (b) ? (a) : (b))
+#define ABS(x) ((x) >= 0 ? (x) : -(x))
+
+int64_t h12( int64_t mode, int64_t lpivot, int64_t l1, int64_t m, double *u, int64_t u_dim1, double *up, double *cm, int64_t ice, int64_t icv, int64_t ncv) {
+ double d1, b, clinv, cl, sm;
+ int64_t k, j;
+
+ /* Check parameters */
+ if (mode!=1 && mode!=2)
+ return(1);
+ if (m<1 || u==NULL || u_dim1<1 || cm==NULL)
+ assert(0);
+ // return(1);
+ if (lpivot<0 || lpivot>=l1 || l1>m)
+ // assert(0);
+ return(1);
+
+ /* Function Body */
+ cl = ABS( u[lpivot*u_dim1] );
+ // cl= (d1 = u[lpivot*u_dim1], fabs(d1));
+
+ if (mode==2)
+ { /* Apply transformation I+U*(U**T)/B to cm[] */
+ if(cl<=0.)
+ // assert(0);
+ return(0);
+ }
+ else
+ { /* Construct the transformation */
+
+
+ /* This is the way provided in the original pseudocode
+ sm = 0;
+ for (j = l1; j < m; j++)
+ {
+ d1 = u[j * u_dim1];
+ sm += d1*d1;
+ }
+ d1 = u[lpivot * u_dim1];
+ sm += d1*d1;
+ sm = sqrt(sm);
+
+ if (u[lpivot*u_dim1] > 0)
+ sm=-sm;
+
+ up[0] = u[lpivot*u_dim1] - sm;
+ u[lpivot*u_dim1]=sm;
+ printf("Got sum: %f\n",sm);
+ */
+
+ /* and this trying to compensate overflow */
+ for (j=l1; j<m; j++)
+ { // Computing MAX
+ cl = MAX( ABS( u[j*u_dim1] ), cl );
+ }
+ // zero vector?
+
+ if (cl<=0.)
+ return(0);
+
+ clinv=1.0/cl;
+
+ // Computing 2nd power
+ d1=u[lpivot*u_dim1]*clinv;
+ sm=d1*d1;
+
+ for (j=l1; j<m; j++)
+ {
+ d1=u[j*u_dim1]*clinv;
+ sm+=d1*d1;
+ }
+ cl *= sqrt(sm);
+
+ if (u[lpivot*u_dim1] > 0.)
+ cl=-cl;
+ up[0] = u[lpivot*u_dim1] - cl;
+ u[lpivot*u_dim1]=cl;
+ }
+
+ // no vectors where to apply? only change pivot vector!
+ b=up[0] * u[lpivot*u_dim1];
+
+ /* b must be nonpositive here; if b>=0., then return */
+ if (b == 0)
+ return(0);
+
+ // ok, for all vectors we want to apply
+ for (j =0; j < ncv; j++) {
+ sm = cm[ lpivot * ice + j * icv ] * (up[0]);
+
+ for (k=l1; k<m; k++)
+ sm += cm[ k * ice + j*icv ] * u[ k*u_dim1 ];
+
+ if (sm != 0.0) {
+ sm *= (1/b);
+ // cm[lpivot, j] = ..
+ cm[ lpivot * ice + j*icv] += sm * (up[0]);
+ for (k= l1; k<m; k++)
+ {
+ cm[ k*ice + j*icv] += u[k * u_dim1]*sm;
+ }
+ }
+ }
+
+ return(0);
+}
+
+
+void g1(double a, double b, double *cterm, double *sterm, double *sig)
+{
+ double d1, xr, yr;
+
+ if( fabs(a) > fabs(b) ) {
+ xr = b / a;
+ d1 = xr;
+ yr = sqrt(d1*d1 + 1.);
+ d1 = 1./yr;
+ *cterm=(a>=0.0 ? fabs(d1) : -fabs(d1));
+ *sterm=(*cterm)*xr;
+ *sig=fabs(a)*yr;
+ } else if( b != 0.) {
+ xr = a / b;
+ d1 = xr;
+ yr = sqrt(d1 * d1 + 1.);
+ d1 = 1. / yr;
+ *sterm=(b>=0.0 ? fabs(d1) : -fabs(d1));
+ *cterm=(*sterm)*xr; *sig=fabs(b)*yr;
+ } else {
+ *sig=0.; *cterm=0.; *sterm=1.;
+ }
+}
+
+
+int64_t nnls_algorithm(double *a, int64_t m,int64_t n, double *b, double *x, double *rnorm) {
+ int64_t pfeas;
+ int ret=0;
+ int64_t iz;
+ int64_t jz;
+ int64_t k, j=0, l, itmax, izmax=0, ii, jj=0, ip;
+ double d1, d2, sm, up, ss;
+ double temp, wmax, t, alpha, asave, dummy, unorm, ztest, cc;
+
+
+ /* Check the parameters and data */
+ if(m <= 0 || n <= 0 || a == NULL || b == NULL || x == NULL)
+ return(2);
+
+ /* Allocate memory for working space, if required */
+ double *w = (double*)calloc(n, sizeof(double));
+ double *zz = (double*)calloc(m, sizeof(double));
+ int64_t *index = (int64_t*)calloc(n, sizeof(int64_t));
+ if(w == NULL || zz == NULL || index == NULL)
+ return(2);
+
+ /* Initialize the arrays INDEX[] and X[] */
+ for(k=0; k<n; k++) {
+ x[k]=0.;
+ index[k]=k;
+ }
+
+ int64_t iz2 = n - 1;
+ int64_t iz1 = 0;
+ int64_t iter=0;
+ int64_t nsetp=0;
+ int64_t npp1=0;
+
+ /* Main loop; quit if all coeffs are already in the solution or */
+ /* if M cols of A have been triangularized */
+ if(n < 3)
+ itmax=n*3;
+ else
+ itmax=n*n;
+
+
+ while(iz1 <= iz2 && nsetp < m) {
+ /* Compute components of the dual (negative gradient) vector W[] */
+ for(iz=iz1; iz<=iz2; iz++) {
+ j=index[iz];
+ sm=0.;
+ for(l=npp1; l<m; l++)
+ sm+=a[j*m + l]*b[l];
+ w[j]=sm;
+ }
+
+ while(1) {
+ /* Find largest positive W[j] */
+ for(wmax=0., iz=iz1; iz<=iz2; iz++) {
+ j=index[iz]; if(w[j]>wmax) {wmax=w[j]; izmax=iz;}}
+
+ /* Terminate if wmax<=0.; */
+ /* it indicates satisfaction of the Kuhn-Tucker conditions */
+ if(wmax<=0.0)
+ break;
+
+ iz=izmax;
+ j=index[iz];
+
+ /* The sign of W[j] is ok for j to be moved to set P. */
+ /* Begin the transformation and check new diagonal element to avoid */
+ /* near linear dependence. */
+ asave=a[j*m + npp1];
+ h12(1, npp1, npp1+1, m, &a[j*m +0], 1, &up, &dummy, 1, 1, 0);
+ unorm=0.;
+ if(nsetp!=0){
+ for(l=0; l<nsetp; l++) {
+ d1=a[j*m + l];
+ unorm+=d1*d1;
+ }
+ }
+
+ unorm=sqrt(unorm);
+ d2=unorm+(d1=a[j*m + npp1], fabs(d1)) * 0.01;
+ if((d2-unorm)>0.) {
+ /* Col j is sufficiently independent. Copy B into ZZ, update ZZ */
+ /* and solve for ztest ( = proposed new value for X[j] ) */
+ for(l=0; l<m; l++) zz[l]=b[l];
+ h12(2, npp1, npp1+1, m, &a[j*m + 0], 1, &up, zz, 1, 1, 1);
+ ztest=zz[npp1]/a[j*m +npp1];
+ /* See if ztest is positive */
+ if(ztest>0.) break;
+ }
+
+ /* Reject j as a candidate to be moved from set Z to set P. Restore */
+ /* A[npp1,j], set W[j]=0., and loop back to test dual coeffs again */
+ a[j*m+ npp1]=asave; w[j]=0.;
+ } /* while(1) */
+
+ if(wmax<=0.0)
+ break;
+
+ /* Index j=INDEX[iz] has been selected to be moved from set Z to set P. */
+ /* Update B and indices, apply householder transformations to cols in */
+ /* new set Z, zero subdiagonal elts in col j, set W[j]=0. */
+ for(l=0; l<m; ++l)
+ b[l]=zz[l];
+
+ index[iz]=index[iz1];
+ index[iz1]=j;
+ iz1++;
+ npp1++;
+ nsetp=npp1;
+
+ if(iz1<=iz2) {
+ for(jz=iz1; jz<=iz2; jz++) {
+ jj=index[jz];
+ h12(2, nsetp-1, npp1, m, &a[j*m +0], 1, &up, &a[jj*m +0], 1, m, 1);
+ }
+ }
+
+ if(nsetp!=m) {
+ for(l=npp1; l<m; l++)
+ a[j*m +l]=0.;
+ }
+
+ w[j]=0.;
+
+ /* Solve the triangular system; store the solution temporarily in Z[] */
+ for(l=0; l<nsetp; l++) {
+ ip=nsetp-(l+1);
+ if(l!=0) for(ii=0; ii<=ip; ii++) zz[ii]-=a[jj*m + ii]*zz[ip+1];
+ jj=index[ip]; zz[ip]/=a[jj*m +ip];
+ }
+
+ /* Secondary loop begins here */
+ while(++iter < itmax) {
+ /* See if all new constrained coeffs are feasible; if not, compute alpha */
+ for(alpha = 2.0, ip = 0; ip < nsetp; ip++) {
+ l=index[ip];
+ if(zz[ip]<=0.) {
+ t = -x[l]/(zz[ip]-x[l]);
+ if(alpha > t) {
+ alpha = t;
+ jj = ip - 1;
+ }
+ }
+ }
+
+ /* If all new constrained coeffs are feasible then still alpha==2. */
+ /* If so, then exit from the secondary loop to main loop */
+ if(alpha==2.0)
+ break;
+
+ /* Use alpha (0.<alpha<1.) to interpolate between old X and new ZZ */
+ for(ip=0; ip<nsetp; ip++) {
+ l = index[ip];
+ x[l] += alpha*(zz[ip]-x[l]);
+ }
+
+ /* Modify A and B and the INDEX arrays to move coefficient i */
+ /* from set P to set Z. */
+ k=index[jj+1]; pfeas=1;
+ do {
+ x[k]=0.;
+ if(jj!=(nsetp-1)) {
+ jj++;
+ for(j=jj+1; j<nsetp; j++) {
+ ii=index[j]; index[j-1]=ii;
+ g1(a[ii*m + (j-1)], a[ii*m + j], &cc, &ss, &a[ii*m + j-1]);
+ for(a[ii*m + j]=0., l=0; l<n; l++) if(l!=ii) {
+ /* Apply procedure G2 (CC,SS,A(J-1,L),A(J,L)) */
+ temp=a[l*m + j-1];
+ a[l*m + j-1]=cc*temp+ss*a[l*m + j];
+ a[l*m + j]=-ss*temp+cc*a[l*m + j];
+ }
+ /* Apply procedure G2 (CC,SS,B(J-1),B(J)) */
+ temp=b[j-1]; b[j-1]=cc*temp+ss*b[j]; b[j]=-ss*temp+cc*b[j];
+ }
+ }
+ npp1=nsetp-1; nsetp--; iz1--; index[iz1]=k;
+
+ /* See if the remaining coeffs in set P are feasible; they should */
+ /* be because of the way alpha was determined. If any are */
+ /* infeasible it is due to round-off error. Any that are */
+ /* nonpositive will be set to zero and moved from set P to set Z */
+ for(jj=0, pfeas=1; jj<nsetp; jj++) {
+ k=index[jj]; if(x[k]<=0.) {pfeas=0; break;}
+ }
+ } while(pfeas==0);
+
+ /* Copy B[] into zz[], then solve again and loop back */
+ for(k=0; k<m; k++)
+ zz[k]=b[k];
+ for(l=0; l<nsetp; l++) {
+ ip=nsetp-(l+1);
+ if(l!=0) for(ii=0; ii<=ip; ii++) zz[ii]-=a[jj*m + ii]*zz[ip+1];
+ jj=index[ip]; zz[ip]/=a[jj*m + ip];
+ }
+ } /* end of secondary loop */
+
+ if(iter>=itmax) {
+ ret = 1;
+ break;
+ }
+
+ for(ip=0; ip<nsetp; ip++) {
+ k=index[ip];
+ x[k]=zz[ip];
+ }
+ } /* end of main loop */
+
+ /* Compute the norm of the final residual vector */
+ sm=0.;
+
+ if (rnorm != NULL) {
+ if (npp1<m)
+ for (k=npp1; k<m; k++)
+ sm+=(b[k] * b[k]);
+ else
+ for (j=0; j<n; j++)
+ w[j]=0.;
+ *rnorm=sqrt(sm);
+ }
+
+ /* Free working space, if it was allocated here */
+ free(w);
+ free(zz);
+ free(index);
+ return(ret);
+}
+/* nnls_ */
+
+
+double *nnls(double *a_matrix, double *b_matrix, int64_t height, int64_t width) {
+
+ double *solution = (double*)calloc(height, sizeof(double));
+
+ if(solution == NULL) {
+ fprintf(stderr, "could not allocate enough memory for nnls\n");
+ exit(EXIT_FAILURE);
+ }
+
+ int ret = nnls_algorithm(a_matrix, width, height, b_matrix, solution, NULL);
+ if(ret == 1) {
+ printf("NNLS has reached the maximum iterations\n");
+ } else if(ret == 2) {
+ fprintf(stderr, "NNLS could not allocate enough memory\n");
+ exit(EXIT_FAILURE);
+ }
+
+ return solution;
+}
diff --git a/nnls.h b/nnls.h
new file mode 100644
index 0000000..ded8d01
--- /dev/null
+++ b/nnls.h
@@ -0,0 +1 @@
+double *nnls(double *a_matrix, double *b_matrix, int height, int width);
diff --git a/nnls_solver.c b/nnls_solver.c
new file mode 100644
index 0000000..a8e6798
--- /dev/null
+++ b/nnls_solver.c
@@ -0,0 +1,169 @@
+#include <ctype.h>
+#include <errno.h>
+#include <getopt.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "nnls.h"
+
+#define USAGE "."
+
+int load_a_matrix(char *filename) {
+
+ long x = 0;
+ long y = 0;
+ char *line = NULL;
+ size_t len = 0;
+
+ fh = fopen(filename, "r");
+ if(fh == NULL) {
+ fprintf(stderr, "Could not open %s\n", filename);
+ exit(EXIT_FAILURE);
+ }
+
+ if(getline(&line, &len, fp) != -1) {
+
+ }
+
+ while ((read = getline(&line, &len, fp)) != -1) {
+
+ }
+
+
+}
+int main(int argc, char **argv) {
+
+
+ int c;
+
+ char *a_matrix_filename = NULL;
+ char *b_matrix_filename = NULL;
+ char *output_filename = NULL;
+ char *format = NULL;
+
+ long x = 0;
+ long y = 0;
+
+ int verbose = 0;
+
+ long width = 0;
+ long height = 0;
+
+ while (1) {
+ static struct option long_options[] = {
+ {"a", required_argument, 0, 'a'},
+ {"b", required_argument, 0, 'b'},
+ {"output", required_argument, 0, 'o'},
+ {"verbose", no_argument, 0, 'v'},
+ {"format", required_argument, 0, 'f'},
+ {"version", no_argument, 0, 'V'},
+ {0, 0, 0, 0}
+ };
+
+ int option_index = 0;
+
+ c = getopt_long (argc, argv, "i:o:hdvV", long_options, &option_index);
+
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 0:
+ case 'a':
+ a_matrix_filename = optarg;
+ break;
+ case 'b':
+ b_matrix_filename = optarg;
+ break;
+ case 'o':
+ output_filename = optarg;
+ break;
+ case 'f':
+ format = optarg;
+ break;
+ case 'v':
+ verbose = 1;
+ break;
+ case 'V':
+ printf("%s\n", VERSION);
+ exit(EXIT_SUCCESS);
+ break;
+ case 'h':
+ printf("%s\n", USAGE);
+ exit(EXIT_SUCCESS);
+ break;
+ default:
+ break;
+ }
+ }
+
+
+ if(a_matrix_filename == NULL) {
+ fprintf(stderr, "Error: a matrix file (-a) must be specified\n\n");
+ fprintf(stderr, "%s\n", USAGE);
+ exit(EXIT_FAILURE);
+ }
+
+ if(b_matrix_filename == NULL) {
+ fprintf(stderr, "Error: b matrix file (-b) must be specified\n\n");
+ fprintf(stderr, "%s\n", USAGE);
+ exit(EXIT_FAILURE);
+ }
+
+ if(output_filename == NULL) {
+ fprintf(stderr, "Error: output filename (-o) must be specified\n\n");
+ fprintf(stderr, "%s\n", USAGE);
+ exit(EXIT_FAILURE);
+ }
+
+ if(format == NULL) {
+ format = "%10lf";
+ }
+
+ if(verbose) {
+ printf("a matrix: %s\n", a_matrix_filename);
+ printf("b matrix: %s\n", a_matrix_filename);
+ printf("output file: %s\n", output_filename);
+ }
+
+ if(access (a_matrix_filename, F_OK) == -1) {
+ fprintf(stderr, "Error: could not open %s\n", a_matrix_filename);
+ exit(EXIT_FAILURE);
+ }
+
+ if(access (b_matrix_filename, F_OK) == -1) {
+ fprintf(stderr, "Error: could not open %s\n", b_matrix_filename);
+ exit(EXIT_FAILURE);
+ }
+
+ // load our matricies
+ double *a_matrix = load_a_matrix(a_matrix_filename);
+ double *b_matrix = load_b_matrix(b_matrix_filename);
+
+ // run NNLS
+ double *solution = nnls(a_matrix, b_matrix, height, width);
+
+ // print to stdout if the user supplies "-" as the output argument
+ FILE *output_fh = NULL;
+ if(strcmp(output_filename, "-") == 0) {
+ output_fh = stdout;
+ }
+ else {
+ output_fh = fopen(output_filename, "w");
+ }
+
+ if(output_fh == NULL) {
+ fprintf(stderr, "Could not open %s for writing\n", output_filename);
+ exit(EXIT_FAILURE);
+ }
+ for(x = 0; x < height; x++) {
+ fprintf(output_fh, format, solution[x]);
+ }
+ fclose(output_fh);
+
+ return EXIT_SUCCESS;
+}