/*
Copyright (c) 2015 to 2016 by Cornell University and The Regents Of
The University Of California. All Rights Reserved.

Permission to use this Procedural Yarn Fitting and Generation Tool (the "Work")
and its associated copyrights solely for educational, research and non-profit
purposes, without fee is hereby granted, provided that the user agrees as
follows:

Those desiring to incorporate the Work into commercial products or use Work and
its associated copyrights for commercial purposes should contact the Center for
Technology Licensing at Cornell University at

395 Pine Tree Road, Suite 310, Ithaca, NY 14850;
email: ctl-connect@cornell.edu;
Tel: 607-254-4698;
FAX: 607-254-5454

for a commercial license.

IN NO EVENT SHALL CORNELL UNIVERSITY ("CORNELL") OR THE UNIVERSITY OF
CALIFORNIA ("UC") BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF
THE USE OF THE WORK AND ITS ASSOCIATED COPYRIGHTS, EVEN IF CORNELL OR UC MAY
HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

THE WORK PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND NEITHER CORNELL NOR UC HAS
ANY OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
MODIFICATIONS. CORNELL AND UC MAKE NO REPRESENTATIONS AND EXTEND NO WARRANTIES
OF ANY KIND, EITHER IMPLIED OR EXPRESS, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR
THAT THE USE OF WORK AND ITS ASSOCIATED COPYRIGHTS WILL NOT INFRINGE ANY PATENT,
TRADEMARK OR OTHER RIGHTS.
*/

#include "stdafx.h"
#include "MicroCT.h"
#include "Simplex.h"

//#define CV_DEBUG
#define STAT_AVG_N 3
#define FLYAWAY_SIGMA_COUNT 1

using namespace cv;

namespace CT {

	void MitsubaVol::destroy() {
		this->m_data.clear();
	}

	int MitsubaVol::index(int x, int y, int z) {
		int width = this->m_dim[0], height= this->m_dim[1], slicesNum = this->m_dim[2];
		assert(x >= 0 && x < width);
		assert(y >= 0 && y < height);
		assert(z >= 0 && z < slicesNum);
		int id = (z*height+y)*width+x;
		return id;
	}

	bool MitsubaVol::load(const std::string &filename, bool verbose) {
		bool ret = true;
		if ( verbose ) _tprintf(_T("Loading..."));

		FILE *fin;
		if (fin = (fopen(filename.c_str(), "rb")))
		{
			static const char tag[] = "VOL";
			static const unsigned char ver = 0x3;

			int data_format = 1;

			char in_tag[4] = { 0 };
			if ( fread(in_tag, 1, 3, fin) != 3 || strcmp(in_tag, tag) )
			{
				destroy(); ret = false;
			}

			unsigned char in_ver;
			if ( ret && (fread(&in_ver, 1, 1, fin) != 1 || in_ver != ver) )
			{
				destroy(); ret = false;
			}

			int in_data_format;
			if ( ret && (fread(&in_data_format, sizeof(int), 1, fin) != 1 || in_data_format != data_format) )
			{
				destroy(); ret = false;
			}

			if ( ret && fread(&m_dim, sizeof(int), 3, fin) != 3 )
			{
				destroy(); ret = false;
			}
			if ( ret && (fread(&channel, sizeof(int), 1, fin) != 1) )
			{
				destroy(); ret = false;
			}
			if ( ret && fread(m_aabb, sizeof(float), 6, fin) != 6 )
			{
				destroy(); ret = false;
			}
			m_data.resize(m_dim[0]*m_dim[1]*m_dim[2]);


			for ( int i = 0; i < m_dim[2]; ++i )
				if ( ret && fread(&m_data[i*m_dim[0]*m_dim[1]], sizeof(float) * channel, m_dim[0]*m_dim[1], fin) != m_dim[0]*m_dim[1] )
				{
					destroy(); ret = false;
				}

				fclose(fin);
		}
		else
		{
			destroy(); ret = false;
		}

		if ( verbose )
			if ( ret )
			{
				_tprintf(_T("\r(%u x %u x %u) volume loaded.\n"), m_dim[0], m_dim[1], m_dim[2]);
				_tprintf(_T("AABB: (%.4f, %.4f, %.4f) ~ (%.4f, %.4f, %.4f)\n"), m_aabb[0][0], m_aabb[0][1], m_aabb[0][2], m_aabb[1][0], m_aabb[1][1], m_aabb[1][2]);
			}
			else
				_tprintf(_T("\rFailed.   \n"));
		return ret;
	}

	bool MitsubaVol::save(const std::string &filename, bool verbose) {
		const TCHAR *fn = (const TCHAR*)(filename.c_str());
		bool ret = true;
		if ( verbose ) _tprintf(_T("Saving..."));

		FILE *fout;
		if (fout = (fopen(filename.c_str(), "wb")))
		{
			static const char tag[] = "VOL";
			static const unsigned char ver = 0x3;

			int data_format = 1;

			if ( fwrite(tag, 1, 3, fout) != 3 ) ret = false;
			if ( ret && fwrite(&ver, 1, 1, fout) != 1 ) ret = false;
			if ( ret && fwrite(&data_format, sizeof(int), 1, fout) != 1 ) ret = false;

			if ( ret && fwrite(&m_dim, sizeof(int), 3, fout) != 3 ) ret = false;
			if ( ret && fwrite(&channel, sizeof(int), 1, fout) != 1 ) ret = false;
			if ( ret && fwrite(m_aabb, sizeof(float), 6, fout) != 6 ) ret = false;

			for ( int i = 0; i < m_dim[2]; ++i )
				if ( ret && fwrite(&m_data[i*m_dim[0]*m_dim[1]], sizeof(float) * channel, m_dim[0]*m_dim[1], fout) != m_dim[0]*m_dim[1] )
					ret = false;

			fclose(fout);
		}
		else
			ret = false;

		if ( verbose )
			if ( ret )
			{
				_tprintf(_T("\r(%u x %u x %u) volume saved.\n"), m_dim[0], m_dim[1], m_dim[2]);
				_tprintf(_T("AABB: (%.4f, %.4f, %.4f) ~ (%.4f, %.4f, %.4f)\n"), m_aabb[0][0], m_aabb[0][1], m_aabb[0][2], m_aabb[1][0], m_aabb[1][1], m_aabb[1][2]);
			}
			else
				_tprintf(_T("\rFailed.   \n"));
		return ret;
	}

	bool PlyCurves::load(const std::string &filename, bool verbose) {
		std::ifstream fin(filename.c_str());
		fin >> this->m_curve_num;    if (!fin) return false;
		this->curves.resize(m_curve_num);
		this->m_curve_length = 0.f;
		for (int i = 0; i < this->m_curve_num; i++) {
			float curve_len = 0.f;
			int vertexNum = 0;       vec3f prev_vertex;
			fin >> vertexNum;		 if (!fin) return false;
			this->curves[i].resize(vertexNum);
			for (int j = 0; j < vertexNum; j++) {
				fin >> this->curves[i][j].x 
					>> this->curves[i][j].y 
					>> this->curves[i][j].z;
				if (!fin) return false;

				if (j > 0) {
					vec3f this_vertex = this->curves[i][j];
					curve_len += nv::length(this_vertex - prev_vertex);
				}
				prev_vertex = this->curves[i][j];
			}
			this->m_curve_length += curve_len;
		}
		fin.close();
		if (verbose) {
			std::cout << "PlyCurves::load(): curve number = " << this->m_curve_num << " loaded. " << std::endl;
			std::cout << "PlyCurves::load(): total_curve_length = " << this->m_curve_length << " avg_length = " << this->m_curve_length / this->m_curve_num << std::endl;
		}
		return true;
	}

	bool PlyCurves::save(const std::string &filename, bool verbose) {
		std::ofstream fout(filename.c_str());
		fout << this->m_curve_num << std::endl;    if (!fout) return false;
		for (int i = 0; i < this->m_curve_num; i++) {
			int vertexNum = this->curves[i].size();
			fout << vertexNum << std::endl;		   if (!fout) return false;
			for (int j = 0; j < vertexNum; j++) {
				fout << this->curves[i][j].x << " "
					<< this->curves[i][j].y << " "
					<< this->curves[i][j].z << " " << std::endl;
				if (!fout) return false;
			}
		}
		fout.close();
		if (verbose) {
			std::cout << "PlyCurves::save(): curve number = " << this->m_curve_num << " saved. " << std::endl;
		}
		return true;
	}

	bool Fibers::load(const std::string &filename, bool verbose) {
		bool ret = true;
		std::ifstream fin(filename.c_str());
		fin >> this->m_fiber_num;	if(!fin) ret = false;
		this->fibers.resize(this->m_fiber_num);
		int vertexNum;
		this->m_fiber_length = 0.f;
		for (int f = 0; f < this->m_fiber_num; f++) {
			float fiber_len = 0.f; vec3f prev_vertex;
			fin >> vertexNum;	if(!fin) ret = false;
			this->fibers[f].resize(vertexNum);
			for (int v = 0; v < vertexNum; v++) {
				fin >> this->fibers[f][v].x 
					>> this->fibers[f][v].y
					>> this->fibers[f][v].z;	if(!fin) ret = false;
				
				if (v > 0) {
					vec3f this_vertex = this->fibers[f][v];
					fiber_len += nv::length(this_vertex - prev_vertex);
				} 
				prev_vertex = this->fibers[f][v];
			}
			this->m_fiber_length += fiber_len;
		}
		fin.close();
		if (verbose) {
			std::cout << "Fibers::load(): filename = " << filename << " fiber number = " << this->m_fiber_num << " loaded. " << std::endl;
			std::cout << "Fibers::load(): total_fiber_length = " << this->m_fiber_length << std::endl;
		}
		return ret;
	}

	bool Fibers::save(const std::string &filename, bool verbose) {
		bool ret = true;
		std::ofstream fout(filename.c_str());
		int fiberNum = this->fibers.size();
		fout << fiberNum << std::endl; if (!fout) ret = false;
		int vertexNum;
		for (int f = 0; f < fiberNum; f++) {
			Polyline &fiber = this->fibers[f];
			vertexNum = fiber.size();
			fout << vertexNum << std::endl;		if (!fout) ret = false;
			for (int v = 0; v < vertexNum; v++) {
				fout << fiber[v].x << " " << fiber[v].y << " " << fiber[v].z << std::endl;
				if (!fout)		if (!fout) ret = false;
			}
		}
		fout.close();
		if (verbose) {
			std::cout << "Fibers::save(): filename = " << filename << " fiber number = " << this->m_fiber_num << " saved. " << std::endl;
		}
		return ret;
	}

	void Config::load(const std::string &filename) {
		std::ifstream fin(filename.c_str());
		std::string line;
		while (std::getline(fin, line)) {
			std::vector<std::string> splits = split(line, ' ');
			if (splits.size() < 2)    continue; 
			std::string p_name = splits[0];
			if (p_name == "ply_num:") {
				this->ply_num = (atoi(splits[1].c_str()));
			} else if (p_name == "fiber_num:") {
				this->fiber_num= atoi(splits[1].c_str());
#ifndef IMPROVED_FLYAWAYS
			} else if (p_name == "flyaway_num:") {
				this->flyaway_num = atoi(splits[1].c_str());
#endif
			} else if (p_name == "fly_step_size:") {
				this->fly_step_size =  atof(splits[1].c_str());
			} else if (p_name == "z_step_size:") {
				this->z_step_size = atof(splits[1].c_str());
			} else if (p_name == "z_step_num:") {
				this->z_step_num = atof(splits[1].c_str());
			} else if (p_name == "yarn_clock_wise:") {
				this->yarn_clock_wise = atoi(splits[1].c_str());
			} else if (p_name == "fiber_clock_wise:") {
				this->fiber_clock_wise = atoi(splits[1].c_str());	
			} else if (p_name == "use_migration:") {
				this->use_migration = atoi(splits[1].c_str());
			} else if (p_name == "yarn_alpha:") {
				this->yarn_alpha = atof(splits[1].c_str());
			} else if (p_name == "yarn_radius:") {
				this->yarn_radius = atof(splits[1].c_str());
			} else if (p_name == "epsilon:") {
				this->epsilon = atof(splits[1].c_str());
			} else if (p_name == "R_max:") {
				this->R_max = atof(splits[1].c_str());
			} else if (p_name == "beta:") {
				this->beta = atof(splits[1].c_str());
			} else if (p_name == "alpha:") {
				this->alpha = atof(splits[1].c_str());
			} else if (p_name == "s_i:") {
				this->s_i = atof(splits[1].c_str());
			} else if (p_name == "rho_min:") {
				this->rho_min = atof(splits[1].c_str());
			} else if (p_name == "rho_max:") {
				this->rho_max = atof(splits[1].c_str());
			} else if (p_name == "ellipse_long:") {
				this->ellipse_long = atof(splits[1].c_str());
			} else if (p_name == "ellipse_short:") {
				this->ellipse_short = atof(splits[1].c_str());
#ifdef IMPROVED_FLYAWAYS
			} else if (p_name == "use_flyaways:") {
				this->use_flyaways = atoi(splits[1].c_str());
			} else if (p_name == "flyaway_hair_density:") {
				this->flyaway_hair_density = atof(splits[1].c_str());
			} else if (p_name == "flyaway_hair_ze:") {
				this->flyaway_hair_ze_mu = atof(splits[1].c_str());
                this->flyaway_hair_ze_sigma = atof(splits[2].c_str());
			} else if (p_name == "flyaway_hair_r0:") {
				this->flyaway_hair_r0_mu = atof(splits[1].c_str());
                this->flyaway_hair_r0_sigma = atof(splits[2].c_str());
			} else if (p_name == "flyaway_hair_re:") {
				this->flyaway_hair_re_mu = atof(splits[1].c_str());
                this->flyaway_hair_re_sigma = atof(splits[2].c_str());
			} else if (p_name == "flyaway_hair_pe:") {
				this->flyaway_hair_pe_mu = atof(splits[1].c_str());
                this->flyaway_hair_pe_sigma = atof(splits[2].c_str());
			} else if (p_name == "flyaway_loop_density:") {
				this->flyaway_loop_density = atof(splits[1].c_str());
			} else if (p_name == "flyaway_loop_r1:") {
				this->flyaway_loop_r1_mu = atof(splits[1].c_str());
                this->flyaway_loop_r1_sigma = atof(splits[2].c_str());
#else
			} else if (p_name == "mu:") {
				this->mu = atof(splits[1].c_str());
			} else if (p_name == "sigma:") {
				this->sigma = atof(splits[1].c_str());
#endif
			} else if (p_name == "aabb_min:") {
				std::string min_str = splits[1];
				std::vector<std::string> min_values = split(min_str.substr(1, min_str.size()-2), ',');
				assert(min_values.size() == 3);
				for (int i = 0; i < 3; i++) {
					this->aabb_min[i] = atof(min_values[i].c_str());
				}
			} else if (p_name == "aabb_max:") {
				std::string max_str = splits[1];
				std::vector<std::string> max_values = split(max_str.substr(1, max_str.size()-2), ',');
				assert(max_values.size() == 3);
				for (int i = 0; i < 3; i++) {
					this->aabb_max[i] = atof(max_values[i].c_str());
				}
			}

		}
		fin.close();
	}

	void Config::save(const std::string &filename) {
		std::ofstream fout(filename.c_str());
		fout << "ply_num: " << this->ply_num << std::endl;
		fout << "fiber_num: " << this->fiber_num << std::endl;
#ifndef IMPROVED_FLYAWAYS
		fout << "flyaway_num: " << this->flyaway_num << std::endl;
#endif
		fout << std::endl;

		fout << "aabb_min: [" << this->aabb_min.x << "," << this->aabb_min.y << "," << this->aabb_min.z << "]" << std::endl;
		fout << "aabb_max: [" << this->aabb_max.x << "," << this->aabb_max.y << "," << this->aabb_max.z << "]" << std::endl;
		fout << std::endl;

		fout << "z_step_size: " << this->z_step_size << std::endl;
		fout << "z_step_num: " << this->z_step_num << std::endl;
		fout << "fly_step_size: " << this->fly_step_size << std::endl;
		fout << std::endl;

		fout << "yarn_clock_wise: " << this->yarn_clock_wise << std::endl;
		fout << "fiber_clock_wise: " << this->fiber_clock_wise << std::endl;
		fout << "yarn_alpha: " << this->yarn_alpha << std::endl;
		fout << "alpha: " << this->alpha << std::endl;
		fout << std::endl;

		fout << "yarn_radius: " << this->yarn_radius << std::endl;
		fout << "ellipse_long: " << this->ellipse_long << std::endl;
		fout << "ellipse_short: " << this->ellipse_short << std::endl;
		fout << std::endl;

		fout << "epsilon: " << this->epsilon << std::endl;
		fout << "beta: " << this->beta << std::endl;
		fout << "R_max: " << this->R_max << std::endl;
		fout << std::endl;

		fout << "use_migration: " << this->use_migration << std::endl;
		fout << "s_i: " << this->s_i << std::endl;
		fout << "rho_min: " << this->rho_min << std::endl;
		fout << "rho_max: " << this->rho_max << std::endl;
		fout << std::endl;

#ifdef IMPROVED_FLYAWAYS
        fout << "use_flyaways: " << this->use_flyaways << std::endl;
        fout << "flyaway_hair_density: " << this->flyaway_hair_density << std::endl;
        fout << "flyaway_hair_ze: " << this->flyaway_hair_ze_mu << ' ' << this->flyaway_hair_ze_sigma << std::endl;
        fout << "flyaway_hair_r0: " << this->flyaway_hair_r0_mu << ' ' << this->flyaway_hair_r0_sigma << std::endl;
        fout << "flyaway_hair_re: " << this->flyaway_hair_re_mu << ' ' << this->flyaway_hair_re_sigma << std::endl;
        fout << "flyaway_hair_pe: " << this->flyaway_hair_pe_mu << ' ' << this->flyaway_hair_pe_sigma << std::endl;
        fout << "flyaway_loop_density: " << this->flyaway_loop_density << std::endl;
        fout << "flyaway_loop_r1: " << this->flyaway_loop_r1_mu << ' ' << this->flyaway_loop_r1_sigma << std::endl;
#else
		fout << "mu: " << this->mu << std::endl;
		fout << "sigma: " << this->sigma << std::endl;
#endif

		fout.close();
	}

	void CTAnalyzer::load_ct_volume(const std::string &filename) {
		bool success = this->vol.load(filename, true);
		if (!success) {
			std::cerr << "load_ct_volume(): failed, filename=" << filename << std::endl;
		}
	}

	void CTAnalyzer::save_ct_volume(const std::string &filename) {
		bool success = this->vol.save(filename, true);
		if (!success) {
			std::cerr << "load_ct_volume(): failed, filename=" << filename << std::endl;
		}
	}

	void CTAnalyzer::load_ply_curves(const std::string &filename) {
		bool success = this->curve.load(filename);
		if (!success) {
			std::cerr << "load_ply_curves(): failed, filename=" << filename << std::endl;
		} else {
			std::cout << "Smoothing curves..." << std::endl;
			this->smooth_ply_curves();
		}
	}

	void CTAnalyzer::smooth_ply_curves() {
		int K = this->curve.m_curve_num;
		this->smooth_curve.m_curve_num = K;
		this->smooth_curve.curves.resize(K);
		int neighborSize = 30;
		for (int k = 0; k < K; k++) {
			const int N = this->curve.curves[k].size();
			this->smooth_curve.curves[k].resize(N);

#pragma omp parallel for num_threads(num_of_omp_cores)
			for (int i = 0; i < N; i++) {

				int js = std::max(i-neighborSize, 0), je = std::min(i+neighborSize, N-1);
				vec2f sumXY = vec2f(0, 0); int counter = 0;
				for (int j = js; j <= je; j++, counter++) {
					vec2f XY = this->curve.curves[k][j];
					sumXY += XY;
				}
				vec2f xy = sumXY / counter;

				this->smooth_curve.curves[k][i] = vec3f(xy.x, xy.y, this->curve.curves[k][i].z);
			}

		}

		this->smooth_curve.save(std::string(WORK_PATH) + "output2.txt");

		return ;
	}

	void CTAnalyzer::load_ct_fibers(const std::string &fiberFile) {
		bool success = this->fibers.load(fiberFile);
		if (!success) {
			std::cerr << "load_ct_fibers(): failed, filename = " << fiberFile << std::endl;
		}
	}

	void CTAnalyzer::smooth_ct_fibers(Fibers &fibers, int filter_size) {
		const int numFiber = fibers.fibers.size();
#pragma omp parallel for num_threads(num_of_omp_cores)
		for (int f = 0; f < numFiber; f++) {
			Polyline &fiber = fibers.fibers[f], smooth_fiber;
			const int numVertex = fiber.size();
			smooth_fiber.resize(numVertex);
			for (int v = 0; v < numVertex; v++) {
				int v_s = std::max(v - filter_size, 0), v_e = std::min(v + filter_size, numVertex-1);
				vec3f sum_vertex = vec3f(0.f,0.f,0.f); int counter = 0;
				for (int vv = v_s; vv <= v_e; vv++, counter++) {
					sum_vertex += fiber[vv];
				}
				smooth_fiber[v] = sum_vertex / counter;
			}
			std::copy(smooth_fiber.begin(), smooth_fiber.end(), fiber.begin());
		}
	}

	void CTAnalyzer::smooth_ct_fiber(Polyline &fiber, int filter_size) {
		const int numVertex = fiber.size();
		Polyline smooth_fiber(numVertex);
		for (int v = 0; v < numVertex; v++) {
			int v_s = std::max(v - filter_size, 0), v_e = std::min(v + filter_size, numVertex-1);
			vec3f sum_vertex = vec3f(0.f,0.f,0.f); int counter = 0;
			for (int vv = v_s; vv <= v_e; vv++, counter++) {
				sum_vertex += fiber[vv];
			}
			smooth_fiber[v] = sum_vertex / counter;
		}
		std::copy(smooth_fiber.begin(), smooth_fiber.end(), fiber.begin());
	}

	void CTAnalyzer::view_slice(int id) {
		cvNamedWindow("MicroCTViewer");
		int width = this->vol.m_dim[0], height= this->vol.m_dim[1], slicesNum = this->vol.m_dim[2];
		assert(id >= 0 && id < slicesNum);
		IplImage *img = cvCreateImage(cvSize(width, height), IPL_DEPTH_32F, 1);
#pragma omp parallel for num_threads(num_of_omp_cores)
		for (int y = 0; y < height; y++) {
			for (int x = 0; x < width; x++) {
				int index = this->vol.index(x, y, id);
				float density = this->vol.lookup(index);
				((float*)img->imageData)[y*width+x] = density;
			}
		}

		cvShowImage("MicroCTViewer", img);

		cvWaitKey(0);

		cvDestroyWindow("MicroCTViewer");

		cvReleaseImage(&img);
	}

	void CTAnalyzer::view_all_slices(float time_pause, int stride, 
		bool show_curve, bool blob_detect, bool fit_ellipse, bool verbose) {
			cvNamedWindow("MicroCTViewer");
			const int width = this->vol.m_dim[0], height= this->vol.m_dim[1], slicesNum = this->vol.m_dim[2];

			ctImg = cvCreateImage(cvSize(width, height), IPL_DEPTH_32F, 1);
			displayImg = cvCreateImage(cvSize(width, height), IPL_DEPTH_32F, 3);

			if (!show_curve) {
				// gen curve
				int K = this->curve.m_curve_num;
				for (int i = 0; i < K; i++)
					this->curve.curves[i].resize(slicesNum, 0.f);
			}


			int id = 0;
			while( id < slicesNum ) {
				/* Fill in image with density values */
#pragma omp parallel for num_threads(num_of_omp_cores)
				for (int y = 0; y < height; y++) {
					for (int x = 0; x < width; x++) {
						int index = this->vol.index(x, y, id);
						float density = this->vol.lookup(index);
						((float*)ctImg->imageData)[y*width+x] = density;
						((vec3f*)displayImg->imageData)[y*width+x] = vec3f(density,density,density);
					}
				}

				cvShowImage("MicroCTViewer", ctImg);
				cvWaitKey(0);

				std::vector<vec2f> keypoints;
				if (blob_detect) {
					/* Detect keypoints of this CT slice */
					//blob_detection();
					this->BlobDetection(ctImg, keypoints);
					if (!fit_ellipse)
						for (int i = 0; i < keypoints.size(); i++) {
							cvCircle(displayImg, cvPoint(keypoints[i].x, keypoints[i].y), 3, cvScalar(0.f, 1.f, 0.f));
						}
						if (verbose)
							std::cout << "fiber_num=" << keypoints.size() << std::endl;
				}

				if (show_curve) {
					/* Draw ply centers */
					for (int i = 0; i < this->curve.m_curve_num; i++) {
						if (id < this->curve.curves[i].size()) {
							vec2f center = vec2f(this->curve.curves[i][id]);
							cvCircle(displayImg, cvPoint(center.x, center.y), 5, cvScalar(0.f, 0.f, 1.f)); //#define CV_RGB( r, g, b )  cvScalar( (b), (g), (r), 0 )
						}
					}
					if (verbose)
						std::cout << "ply_num=" << this->curve.m_curve_num << std::endl;
				}

				if (blob_detect && fit_ellipse) {			
					/* Fit ellipse given ply curves & blob keypoints */
					int K = this->curve.m_curve_num, N = keypoints.size(), Dim = 2;

					assert(K >= 1 && K <= 3); /* For now we safely assume ply number is between 1 and 3 */

					Mat points(N, Dim, CV_32F), labels, centers(K, Dim, CV_32F);
#pragma omp parallel for num_threads(num_of_omp_cores)
					for (int i = 0; i < N; i++) 
						points.at<float>(i, 0) = keypoints[i].x, 
						points.at<float>(i, 1) = keypoints[i].y;

					/* K-means clustering for ply blobs */
					cv::kmeans(points, K, labels, TermCriteria(CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 10, 0.01), 3, KMEANS_PP_CENTERS, centers);

					std::cout << "N = " << N << std::endl;

					std::vector<int> label2curveIdx(K);
					for (int i = 0; i < K; i++) {
						vec2f labelCenter = vec2f(centers.at<float>(i, 0), centers.at<float>(i, 1));
						/* Find curve index for this label center */
						float minDist = std::numeric_limits<float>::max();

						for (int j = 0; j < K; j++) {
							vec2f curveCenter = vec2f(this->curve.curves[j][std::max(id-1,0)]);
							/*if (id == 0) {
							this->curve.curves[j][id] = vec3f(labelCenter, id);
							}*/
							float tmpDist = std::sqrtf(
								(curveCenter.x-labelCenter.x) * (curveCenter.x-labelCenter.x) + 
								(curveCenter.y-labelCenter.y) * (curveCenter.y-labelCenter.y)
								);
							if (tmpDist < minDist) {
								label2curveIdx[i] = j;	minDist = tmpDist;
							}
						}
					}

					/*if (K == 2) {
					vec2f labelCenter0 = vec2f(centers.at<float>(0, 0), centers.at<float>(0, 1));
					vec2f labelCenter1 = vec2f(centers.at<float>(1, 0), centers.at<float>(1, 1));
					vec2f curveCenter0 = vec2f(this->curve.curves[0][std::max(id-1,0)]);
					vec2f curveCenter1 = vec2f(this->curve.curves[1][std::max(id-1,0)]);
					float dist00 = std::sqrtf((curveCenter0.x-labelCenter0.x) * (curveCenter0.x-labelCenter0.x) + 
					(curveCenter0.y-labelCenter0.y) * (curveCenter0.y-labelCenter0.y));
					float dist01 = std::sqrtf((curveCenter1.x-labelCenter0.x) * (curveCenter1.x-labelCenter0.x) + 
					(curveCenter1.y-labelCenter0.y) * (curveCenter1.y-labelCenter0.y));
					float dist10 = std::sqrtf((curveCenter0.x-labelCenter1.x) * (curveCenter0.x-labelCenter1.x) + 
					(curveCenter0.y-labelCenter1.y) * (curveCenter0.y-labelCenter1.y));
					float dist11 = std::sqrtf((curveCenter1.x-labelCenter1.x) * (curveCenter1.x-labelCenter1.x) + 
					(curveCenter1.y-labelCenter1.y) * (curveCenter1.y-labelCenter1.y));

					if (dist00 < dist10) {
					label2curveIdx[0] = 0;
					label2curveIdx[1] = 1;
					this->curve.curves[0][id] = vec3f(labelCenter0, id);
					this->curve.curves[1][id] = vec3f(labelCenter1, id);
					} else {
					label2curveIdx[1] = 0;
					label2curveIdx[0] = 1;
					this->curve.curves[1][id] = vec3f(labelCenter0, id);
					this->curve.curves[0][id] = vec3f(labelCenter1, id);
					}
					}*/


					//if (1 || show_curve) {
					//	/* Draw ply centers */
					//	for (int i = 0; i < this->curve.m_curve_num; i++) {
					//		if (id < this->curve.curves[i].size()) {
					//			vec2f center = vec2f(this->curve.curves[i][id]);
					//			cvCircle(displayImg, cvPoint(center.x, center.y), 5, cvScalar(0.f, 0.f, 1.f)); //#define CV_RGB( r, g, b )  cvScalar( (b), (g), (r), 0 )
					//		}
					//	}
					//	if (verbose)
					//		std::cout << "ply_num=" << this->curve.m_curve_num << std::endl;
					//}




					/* Draw blobs with different color for visualizing clustering results */
					for (int i = 0; i < N; i++) {
						int label = label2curveIdx[labels.at<int>(i, 0)];
						switch (label) {
						case 0: 
							cvCircle(displayImg, cvPoint((int)keypoints[i].x, (int)keypoints[i].y), 2, cvScalar(1.f, 0.f, 0.f)); break;
						case 1:
							cvCircle(displayImg, cvPoint((int)keypoints[i].x, (int)keypoints[i].y), 2, cvScalar(0.f, 1.f, 0.f)); break;
						case 2:
							cvCircle(displayImg, cvPoint((int)keypoints[i].x, (int)keypoints[i].y), 2, cvScalar(1.f, 0.f, 1.f)); break;
						default:
							break;
						}
					}

					/* Fit ellipse shape and draw them */
					if (K == 2) {
						vec3f C1 = vec3f(vec2f(this->curve.curves[0][id]), 0), 
							C2 = vec3f(vec2f(this->curve.curves[1][id]), 0);
						vec3f C_mid = (C1 + C2) / 2.f;

						const float yarn_radius = nv::length(C1-C2);
						if (verbose)
							std::cout << "yarn_radius=" << yarn_radius << std::endl;

						const float s1 = sqrtf((C_mid.x-C1.x)*(C_mid.x-C1.x) + (C_mid.y-C1.y)*(C_mid.y-C1.y)), 
							s2 = sqrtf((C_mid.x-C2.x)*(C_mid.x-C2.x) + (C_mid.y-C2.y)*(C_mid.y-C2.y));
						const float s = (s1 + s2) / 2;
						if (verbose)
							std::cout << "ellipse_s=" << s << std::endl;

						const vec3f short_axis = nv::normalize(C2 - C1), 
							long_axis = nv::normalize(vec3f(short_axis.y, -short_axis.x, 0));
						const float rot_angle = long_axis.y > 0 ? std::acos(long_axis.x) * 180 / pi : std::acos(-long_axis.x) * 180 / pi;

						/* Obtain long_len by looping over and clamp at 90% blobs according to their distances to center */
						typedef struct Blob2CenterStruct {
							float dist;
							vec3f point;
							int label;
							bool operator < (Blob2CenterStruct const &rhs) {
								return this->dist < rhs.dist;
							}
						} Blob2Cen;

						std::vector<Blob2Cen> blob2cens(N);
#pragma omp parallel for num_threads(num_of_omp_cores)
						for (int i = 0; i < N; i++) {
							int label = label2curveIdx[labels.at<int>(i, 0)];
							vec3f blobPoint = vec3f(keypoints[i].x, keypoints[i].y, 0);
							blob2cens[i].point = blobPoint;    blob2cens[i].label = label;
							if (label == 0) {
								float r = sqrtf((C1.x-blobPoint.x)*(C1.x-blobPoint.x) + (C1.y-blobPoint.y)*(C1.y-blobPoint.y));
								blob2cens[i].dist = r; 
							} else if (label == 1) {
								float r = sqrtf((C2.x-blobPoint.x)*(C2.x-blobPoint.x) + (C2.y-blobPoint.y)*(C2.y-blobPoint.y));
								blob2cens[i].dist = r;
							} else {
								std::cerr << "Fit ellipse K = 2, label over range, label = " << label << std::endl;
							}
						}

						std::sort(blob2cens.begin(), blob2cens.end());

						const vec2f range = vec2f(0.75f, 0.9f); 
						float l1 = 0, l2 = 0;	int counter = 0;

						for (int i = std::floor(range.x * N); i <= std::floor(range.y * N); i++) {
							vec3f blob_pos = blob2cens[i].point;
							if (blob2cens[i].label == 0) {
								vec3f blob2center = blob_pos - C1;
								float B = abs(nv::dot(blob2center, short_axis)), 
									A = abs(nv::dot(blob2center, long_axis));
								float divTerm = sqrtf(max(1 - (B*B)/(s*s), 0.f));
								if (divTerm <= 0) continue;
								float thisL1 = A / divTerm;
								if (thisL1 <= s || thisL1 > 3 * s || BAD(thisL1)) continue;

								l1 += thisL1; 
							} else {
								vec3f blob2center = blob_pos - C2;
								float B = abs(nv::dot(blob2center, short_axis)), 
									A = abs(nv::dot(blob2center, long_axis));
								float divTerm = sqrtf(max(1 - (B*B)/(s*s), 0.f));
								if (divTerm <= 0) continue;
								float thisL2 = A / divTerm;
								if (thisL2 <= s || thisL2 > 3 * s || BAD(thisL2)) continue;

								l2 += thisL2;  
							} 
							counter++;
						}

						if (counter > 0) {  
							const float l = (l1 + l2) / counter;

							if (verbose)
								std::cout << "ellipse_l=" << l << std::endl;
							ellipse(Mat(displayImg), Point(C1.x, C1.y), Size(l, s), (double)rot_angle, 0, 360, Scalar(1.f, 0.f, 0.f));
							ellipse(Mat(displayImg), Point(C2.x, C2.y), Size(l, s), (double)rot_angle, 0, 360, Scalar(0.f, 1.f, 0.f));
						}

					} else if (K == 1) {// TODO: Fit 1 ply 
						// NO-OP
					} else if (K == 3) {// TODO: Fit 3 plys with non-uniform transform
						// NO-OP
					}

				}


				cvShowImage("MicroCTViewer", ctImg);
				cvWaitKey(0/*time_pause*/);

				cvShowImage("MicroCTViewer", /*ctImg*/displayImg);
				cvWaitKey(0/*time_pause*/);



				if ( (id += stride) >= slicesNum)	break;
			}

			this->curve.save(std::string(WORK_PATH) + "output3.txt");

			cvDestroyWindow("MicroCTViewer");

			cvReleaseImage(&ctImg);
	}

	void CTAnalyzer::BlobDetection(IplImage *img, std::vector<vec2f> &keypoints) {
		int scales[] = {1, 2, 3, 4, 5}, numScale = sizeof(scales) / sizeof(int);
		int width = img->width, height = img->height;
		cv::Mat mat = cv::Mat(img, true);

		cv::Mat minMat = cv::Mat(height, width, CV_32FC1, cvScalar(0));
		for (int i = 0; i < numScale; i++) {
			int scale = scales[i];
			cv::Mat fltMat = cv::Mat(height, width, CV_32FC1, cvScalar(0));
			cv::Mat rstMat = cv::Mat(height, width, CV_32FC1, cvScalar(0));
			cv::GaussianBlur(mat, fltMat, Size(3*2*scale+1, 3*2*scale+1), scale);
			cv::Laplacian(fltMat, rstMat, CV_32FC1);
			rstMat = rstMat * (scale * scale);
			if (i == 0)
				minMat = rstMat;
			else {
				cv::min(minMat, rstMat, minMat);
			}
		}

		double minVal, maxVal;
		cv::minMaxLoc(minMat, &minVal, &maxVal);

		minMat = minMat - minVal;
		minMat = minMat * (1.0 / (maxVal - minVal));

		int localMinNeighborSize = 1; float localMinFromAvgScale = 0.9f;

		//cvNamedWindow("DEBUG");
		//imshow("DEBUG", minMat);
		//cvWaitKey(0);
		double sumElems = 0; float temp;
		for (int y = 0; y < height; y++) {
			for (int x = 0; x < width; x++) {
				sumElems += minMat.at<float>(y, x);
			}
		}
		float avgElems = sumElems / (width * height);
		for (int y = 0; y < height; y++) {
			for (int x = 0; x < width; x++) {
				float v = minMat.at<float>(y, x);
				bool localMin = true;

				for (int dy = -localMinNeighborSize; dy <= localMinNeighborSize; dy++) {
					for (int dx = -localMinNeighborSize; dx <= localMinNeighborSize; dx++) {
						if (dx == 0 && dy == 0) continue;
						if (y + dy < 0 || y + dy >= height ||
							x + dx < 0 || x + dx >= width) {
								continue;
						}
						temp = minMat.at<float>(y+dy, x+dx);
						if (v >= temp) {
							localMin = false;
						}
					}
				}

				if (localMin && v < avgElems * localMinFromAvgScale) {
					keypoints.push_back(vec2f(x, y));
					cvCircle(img, cvPoint(x, y), 2, cvScalar(1));
				}
			}
		}

		/*cvNamedWindow("DEBUG");
		cvShowImage("DEBUG", img);
		cvWaitKey(0);*/

	}

	void CTAnalyzer::fillin_ct_slice(int id) {
		int width = this->vol.m_dim[0], height= this->vol.m_dim[1], slicesNum = this->vol.m_dim[2];
#pragma omp parallel for num_threads(num_of_omp_cores) 
		for (int y = 0; y < height; y++) {
			for (int x = 0; x < width; x++) {
				int index = this->vol.index(x, y, id);
				float density = this->vol.lookup(index);
				((float*)ctImg->imageData)[y*width+x] = density;
			}
		}
		return ;
	}

	void CTAnalyzer::fillin_ct_slice(int id, IplImage *img) {
		int width = this->vol.m_dim[0], height= this->vol.m_dim[1], slicesNum = this->vol.m_dim[2];
#pragma omp parallel for num_threads(num_of_omp_cores) 
		for (int y = 0; y < height; y++) {
			for (int x = 0; x < width; x++) {
				int index = this->vol.index(x, y, id);
				float density = this->vol.lookup(index);
				((float*)img->imageData)[y*width+x] = density;
			}
		}
		return ;
	}

	float CTAnalyzer::density_ct_slice(IplImage *img) {
		float sumDensity = 0.f;
		int width = img->width, height = img->height;
		for (int y = 0; y < height; y++) 
			for (int x = 0; x < width; x++)
				sumDensity += ((float*)img->imageData)[y*width+x];
		return sumDensity;
	}

	float CTAnalyzer::density_ct_slice(IplImage *img, std::vector<vec2f> &points) {
		float sumDensity = 0.f; points.clear();
		int width = img->width, height = img->height;
		for (int y = 0; y < height; y++) 
			for (int x = 0; x < width; x++) {
				float density = ((float*)img->imageData)[y*width+x];
				sumDensity += density;
				if (density > eps) {
					points.push_back(vec2f(x, y));
				}
			}
			return sumDensity;
	}

	void CTAnalyzer::RobustFit(Config &config, int step) {
		std::cout << "Fit step = " << step << "..." << std::endl;
		clock_t tic = clock();
		int width = this->vol.m_dim[0], height= this->vol.m_dim[1], slicesNum = this->vol.m_dim[2];

		this->num_of_omp_cores = omp_get_num_procs();
		omp_init_lock(&omp_lock);

		if (step == -1 || step == 1)
		{
			std::cout << "RobustFitPlyNum..." << std::endl;
			RobustFitPlyNum(config);			

			std::cout << "RobustFitAABB..." << std::endl;
			RobustFitAABB(config);							 

			std::cout << "RobustFitZStep..." << std::endl;
			RobustFitZStep(config);           

			std::cout << "RobustFitFiberNum..." << std::endl;
			RobustFitFiberNum(config);						   

			std::cout << "RobustFitYarnTwist..." << std::endl;
			RobustFitYarnTwist(config);

			std::cout << "RobustFitCrossSectionShape..." << std::endl;
			RobustFitCrossSectionShape(config);

			std::cout << "RobustUntieYarnToPlys..." << std::endl;
			RobustUntieYarnToPlys(config);

			std::cout << "RobustRemovePlyFlyAways..." << std::endl;
			RobustRemovePlyFlyAways(config);

			std::cout << "RobustFitFiberTwist..." << std::endl;
			RobustFitFiberTwist(config);

            std::cout << "RobustFitFlyAway..." << std::endl;
            RobustFitFlyAway(config);
		}	
		
		if (step == -1 || step == 2) 
		{
			std::cout << "RobustFitFiberMigration..." << std::endl;
			RobustFitFiberMigration(config);

			std::cout << "RobustFitFiberDistribution..." << std::endl;
			RobustFitFiberDistribution(config);
		}

		omp_destroy_lock(&omp_lock);
		clock_t toc = clock();
		std::cout << "Done. Timeuse: " << (toc-tic)/CLOCKS_PER_SEC << " s." << std::endl;
	}

	void CTAnalyzer::RobustFitPlyNum(Config &config) {
		int K = this->curve.m_curve_num;
		config.ply_num = K;
	}

	void CTAnalyzer::RobustFitFiberNum(Config &config) {
		int K = this->curve.m_curve_num;
		int width = this->vol.m_dim[0], height= this->vol.m_dim[1], slicesNum = this->vol.m_dim[2], numPixels = height * width;

		double fiber_sum = 0, blob_sum;    const int loopNum = 150;
#pragma omp parallel for num_threads(num_of_omp_cores)
		for (int i = 0; i < loopNum; i++) {
			IplImage *img = cvCreateImage(cvSize(width, height), IPL_DEPTH_32F, 1);
			IplImage *dimg = cvCreateImage(cvSize(width, height), IPL_DEPTH_32F, 3);
			
			int sliceId = rand_range(0, slicesNum);
			
			this->fillin_ct_slice(sliceId, img);
			
			std::vector<vec2f> pixel_area_points;
			this->density_ct_slice(img, pixel_area_points);
			int fiberNumPixels = pixel_area_points.size();

			std::vector<vec2f> k_ps;
			this->BlobDetection(img, k_ps);
			omp_set_lock(&this->omp_lock);
			blob_sum += k_ps.size();
			int n = fiberNumPixels / this->pixelsPerFiber;
			fiber_sum += n;
			omp_unset_lock(&this->omp_lock);
		}
		int blob_n = blob_sum / (K * loopNum), fiber_n = fiber_sum / (K * loopNum);
		std::cout << "blob_n (blob) = " << blob_n << " fiber_n (density pixel) = " << fiber_n << std::endl;
		config.fiber_num = fiber_n;

		const float per_curve_length = this->curve.m_curve_length / this->curve.m_curve_num * scalingFactor;
		const float total_fiber_length = this->fibers.m_fiber_length;
		const int num_fiber = total_fiber_length / (K * per_curve_length);
		std::cout << "num_fiber (length) = " << num_fiber << std::endl;
		//config.fiber_num = num_fiber;
		
	}

	void CTAnalyzer::RobustFitAABB(Config &config) {
		config.aabb_min = vec3f(this->vol.m_aabb[0][0], this->vol.m_aabb[0][1], this->vol.m_aabb[0][2]);
		config.aabb_max = vec3f(this->vol.m_aabb[1][0], this->vol.m_aabb[1][1], this->vol.m_aabb[1][2]);
	}

	void CTAnalyzer::RobustFitZStep(Config &config) {
		vec3f pixel_dim3 = vec3f(this->vol.m_dim[0], this->vol.m_dim[1], this->vol.m_dim[2]), world_dim3 = config.aabb_max-config.aabb_min;
		vec3f scale_dim3 = world_dim3 / pixel_dim3;
		scalingFactor = 1.f / 3 * (scale_dim3.x + scale_dim3.y + scale_dim3.z);
		config.z_step_size = 0.01f;
		config.z_step_num = ceil(world_dim3.z / config.z_step_size);
	}

	void CTAnalyzer::RobustFitYarnTwist(Config &config) {
		int width = this->vol.m_dim[0], height = this->vol.m_dim[1], slicesNum = this->vol.m_dim[2];
		const int K = this->curve.m_curve_num;	
		assert(K >= 1 && K <= 3);
		if (K == 1) {
			// NO-OP 
		} else if (K == 2) {
			const int loopNum = 30000;
			std::vector<float> yarn_radius_vec;
			double yarn_clockwise_sum = 0, yarn_radius_sum = 0;
			int counter = 0;

#pragma omp parallel for num_threads(num_of_omp_cores) 
			for (int i = 0; i < loopNum; i++) {
				const int stride = (int) rand_range(20, 40);
				const int ID = std::floor(rand01() * (slicesNum - stride)), thisK = std::floor(rand01() * K);

				vec3f thisC1 = vec3f(vec2f(this->smooth_curve.curves[0][ID]), 0), 
					thisC2 = vec3f(vec2f(this->smooth_curve.curves[1][ID]), 0);
				float yarn_radius1 = nv::length(thisC1 - thisC2);
				vec3f thisCMid = (thisC1 + thisC2) / 2.f;

				vec3f nextC1 = vec3f(vec2f(this->smooth_curve.curves[0][ID+stride]), 0), 
					nextC2 = vec3f(vec2f(this->smooth_curve.curves[1][ID+stride]), 0);
				float yarn_radius2 = nv::length(nextC1 - nextC2);
				vec3f nextCMid = (nextC1 + nextC2) / 2.f;

				vec3f thisFrame = nv::normalize(vec3f(vec2f(this->smooth_curve.curves[thisK][ID]), 0.f) - thisCMid);
				vec3f nextFrame = nv::normalize(vec3f(vec2f(this->smooth_curve.curves[thisK][ID+stride]), 0.f) - nextCMid);
				float thisAngle = thisFrame.y >= 0 ? std::acos(thisFrame.x) : 2*pi - std::acos(thisFrame.x);
				float nextAngle = nextFrame.y >= 0 ? std::acos(nextFrame.x) : 2*pi - std::acos(nextFrame.x);

				float diffAngle = std::fabs(thisAngle - nextAngle);
				if (diffAngle < 0.01f || diffAngle > pi / 2)    continue;

				omp_set_lock(&omp_lock);
				yarn_clockwise_sum += nextAngle < thisAngle ? 1 : 0;
				yarn_radius_vec.push_back(((yarn_radius1 + yarn_radius2) / 2) * scalingFactor);
				counter++;
				omp_unset_lock(&omp_lock);
			}
			config.yarn_clock_wise = (yarn_clockwise_sum / counter > 0.5f ? 1 : 0);
			/*const vec2f range = vec2f(0.25f, 0.75f);    int y_counter = 0;
			std::sort(yarn_radius_vec.begin(), yarn_radius_vec.end());
			for (int i = std::floor(range.x * counter); i <= std::floor(range.y * counter); i++, y_counter++) {
				yarn_radius_sum += yarn_radius_vec[i];
			}
			config.yarn_radius = yarn_radius_sum / y_counter;*/

			config.yarn_radius = statistic_avg(yarn_radius_vec, STAT_AVG_N);


			double yarn_alpha_sum = 0;
			int twist_cycle = 0;
			int stride = 5;
			for (int k = 0; k < K; k++) {
				//std::ofstream debug((std::string("debug")+std::to_string((long long)k)+".txt").c_str());
				float accumulate_theta = 0.f;    float accumulate_z = 0;
				
				for (int id = 0; id < slicesNum-stride; id+=stride) {
					vec3f thisC1 = vec3f(vec2f(this->smooth_curve.curves[0][id]), 0), thisC2 = vec3f(vec2f(this->smooth_curve.curves[1][id]), 0);
					vec3f thisCMid = (thisC1 + thisC2) / 2.f;
					vec3f nextC1 = vec3f(vec2f(this->smooth_curve.curves[0][id+stride]), 0), nextC2 = vec3f(vec2f(this->smooth_curve.curves[1][id+stride]), 0);
					vec3f nextCMid = (nextC1 + nextC2) / 2.f;

					vec3f thisFrame = nv::normalize(vec3f(vec2f(this->smooth_curve.curves[k][id]), 0.f) - thisCMid);
					vec3f nextFrame = nv::normalize(vec3f(vec2f(this->smooth_curve.curves[k][id+stride]), 0.f) - nextCMid);
					float thisAngle = thisFrame.y >= 0 ? std::acos(thisFrame.x) : 2*pi - std::acos(thisFrame.x);
					float nextAngle = nextFrame.y >= 0 ? std::acos(nextFrame.x) : 2*pi - std::acos(nextFrame.x);

					//debug << thisAngle * 180 / pi << std::endl;

					if (std::fabs(thisAngle - nextAngle) > pi) {
						twist_cycle += 1;
					}

					if (config.yarn_clock_wise) {
						if (thisAngle > nextAngle) {
							accumulate_theta += (thisAngle - nextAngle);
							accumulate_z += stride * scalingFactor;
						} else {
							std::cerr << "Warning: this<next clockwise=" << config.yarn_clock_wise <<
								" this=" << thisAngle * 180 /pi << " next=" << nextAngle * 180/pi << std::endl;
						}

					} else {
						if (thisAngle < nextAngle) {
							accumulate_theta += (nextAngle - thisAngle);
							accumulate_z += stride * scalingFactor;
						} else {
							std::cerr << "Warning: this>next clockwise=" << config.yarn_clock_wise <<
								" this=" << thisAngle * 180 /pi << " next=" << nextAngle * 180/pi << std::endl;
						}
					}
				}
				yarn_alpha_sum += accumulate_z * 2 * pi / accumulate_theta;
				//debug.close();
			}
			int half_twist_cycle =  round(twist_cycle / K) * 2;
			const float real_ct_mul = 1.f - half_twist_cycle * 0.015f;
			std::cout << "real_ct_mul = " << real_ct_mul << std::endl;
			config.yarn_alpha = yarn_alpha_sum / K * real_ct_mul;
		} else if (K == 3) {
			const int loopNum = 30000;
			std::vector<float> yarn_radius_vec;
			double yarn_clockwise_sum = 0, yarn_radius_sum = 0;
			int counter = 0;

#pragma omp parallel for num_threads(num_of_omp_cores) 
			for (int i = 0; i < loopNum; i++) {
				const int stride = (int) rand_range(20, 40);
				const int ID = std::floor(rand01() * (slicesNum - stride)), thisK = std::floor(rand01() * K);

				vec3f thisC1 = vec3f(vec2f(this->smooth_curve.curves[0][ID]), 0), 
					  thisC2 = vec3f(vec2f(this->smooth_curve.curves[1][ID]), 0),
					  thisC3 = vec3f(vec2f(this->smooth_curve.curves[2][ID]), 0);
				vec3f thisCMid = (thisC1 + thisC2 + thisC3) / 3.f;
				float yarn_radius1 = nv::length(thisC1 - thisCMid) + nv::length(thisC2 - thisCMid) + nv::length(thisC3 - thisCMid);
				yarn_radius1 *= 2.f / 3.f;

				vec3f nextC1 = vec3f(vec2f(this->smooth_curve.curves[0][ID+stride]), 0), 
					  nextC2 = vec3f(vec2f(this->smooth_curve.curves[1][ID+stride]), 0),
					  nextC3 = vec3f(vec2f(this->smooth_curve.curves[2][ID+stride]), 0);
				vec3f nextCMid = (nextC1 + nextC2 + nextC3) / 3.f;
				float yarn_radius2 = nv::length(nextC1 - nextCMid) + nv::length(nextC2 - nextCMid) + nv::length(nextC3 - nextCMid);
				yarn_radius2 *= 2.f / 3.f;
				

				vec3f thisFrame = nv::normalize(vec3f(vec2f(this->smooth_curve.curves[thisK][ID]), 0.f) - thisCMid);
				vec3f nextFrame = nv::normalize(vec3f(vec2f(this->smooth_curve.curves[thisK][ID+stride]), 0.f) - nextCMid);
				float thisAngle = thisFrame.y >= 0 ? std::acos(thisFrame.x) : 2*pi - std::acos(thisFrame.x);
				float nextAngle = nextFrame.y >= 0 ? std::acos(nextFrame.x) : 2*pi - std::acos(nextFrame.x);

				float diffAngle = std::fabs(thisAngle - nextAngle);
				if (diffAngle < 0.01f || diffAngle > pi / 2)    continue;

				omp_set_lock(&omp_lock);
				yarn_clockwise_sum += nextAngle < thisAngle ? 1 : 0;
				yarn_radius_vec.push_back(((yarn_radius1 + yarn_radius2) / 2) * scalingFactor);
				counter++;
				omp_unset_lock(&omp_lock);
			}
			config.yarn_clock_wise = (yarn_clockwise_sum / counter > 0.5f ? 1 : 0);
			/*const vec2f range = vec2f(0.25f, 0.75f);    int y_counter = 0;
			std::sort(yarn_radius_vec.begin(), yarn_radius_vec.end());
			for (int i = std::floor(range.x * counter); i <= std::floor(range.y * counter); i++, y_counter++) {
				yarn_radius_sum += yarn_radius_vec[i];
			}
			config.yarn_radius = yarn_radius_sum / y_counter;*/

			config.yarn_radius = statistic_avg(yarn_radius_vec, STAT_AVG_N);

			double yarn_alpha_sum = 0;
			int twist_cycle = 0;
			int stride = 5;
			for (int k = 0; k < K; k++) {
				//std::ofstream debug((std::string("debug")+std::to_string((long long)k)+".txt").c_str());
				float accumulate_theta = 0.f;    float accumulate_z = 0;

				for (int id = 0; id < slicesNum-stride; id+=stride) {
					vec3f thisC1 = vec3f(vec2f(this->smooth_curve.curves[0][id]), 0), 
						  thisC2 = vec3f(vec2f(this->smooth_curve.curves[1][id]), 0),
						  thisC3 = vec3f(vec2f(this->smooth_curve.curves[2][id]), 0);
					vec3f thisCMid = (thisC1 + thisC2 + thisC3) / 3.f;
					vec3f nextC1 = vec3f(vec2f(this->smooth_curve.curves[0][id+stride]), 0),
						  nextC2 = vec3f(vec2f(this->smooth_curve.curves[1][id+stride]), 0),
						  nextC3 = vec3f(vec2f(this->smooth_curve.curves[2][id+stride]), 0);
					vec3f nextCMid = (nextC1 + nextC2 + nextC3) / 3.f;

					vec3f thisFrame = nv::normalize(vec3f(vec2f(this->smooth_curve.curves[k][id]), 0.f) - thisCMid);
					vec3f nextFrame = nv::normalize(vec3f(vec2f(this->smooth_curve.curves[k][id+stride]), 0.f) - nextCMid);
					float thisAngle = thisFrame.y >= 0 ? std::acos(thisFrame.x) : 2*pi - std::acos(thisFrame.x);
					float nextAngle = nextFrame.y >= 0 ? std::acos(nextFrame.x) : 2*pi - std::acos(nextFrame.x);

				//	debug << thisAngle * 180 / pi << std::endl;

					if (std::fabs(thisAngle - nextAngle) > pi) {
						twist_cycle += 1;
					}

					if (config.yarn_clock_wise) {
						if (thisAngle > nextAngle) {
							accumulate_theta += (thisAngle - nextAngle);
							accumulate_z += stride * scalingFactor;
						} else {
							std::cerr << "Warning: this<next clockwise=" << config.yarn_clock_wise <<
								" this=" << thisAngle * 180 /pi << " next=" << nextAngle * 180/pi << std::endl;
						}

					} else {
						if (thisAngle < nextAngle) {
							accumulate_theta += (nextAngle - thisAngle);
							accumulate_z += stride * scalingFactor;
						} else {
							std::cerr << "Warning: this>next clockwise=" << config.yarn_clock_wise <<
								" this=" << thisAngle * 180 /pi << " next=" << nextAngle * 180/pi << std::endl;
						}
					}
				}
				yarn_alpha_sum += accumulate_z * 2 * pi / accumulate_theta;
				//debug.close();
			}
			int half_twist_cycle =  round(twist_cycle / K) * 2;
			const float real_ct_mul = 1.f - half_twist_cycle * 0.015f;
			std::cout << "real_ct_mul = " << real_ct_mul << std::endl;
			config.yarn_alpha = yarn_alpha_sum / K * real_ct_mul;
		} else {
			std::cerr << "Warning: K = " << K << " not supported!" << std::endl;
		}
	}

	void CTAnalyzer::RobustFitCrossSectionShape(Config &config) {
		int width = this->vol.m_dim[0], height= this->vol.m_dim[1], slicesNum = this->vol.m_dim[2];
		const int K = this->curve.m_curve_num;	
		assert(K >= 1 && K <= 3);

		for (int i = 0; i < K; i++) {
			slicesNum = std::min(slicesNum, (int)this->curve.curves[i].size());
		}
		std::cout << "Final slicesNum = " << slicesNum << std::endl;

		if (K == 1) {
			// NO-OP
		} else if (K == 2) {
			const int loopNum = slicesNum;
			std:vector<float> ellipse_s_vec(loopNum), ellipse_l_vec(loopNum), yarn_radius_vec(loopNum);
#ifdef CV_DEBUG
			cvNamedWindow("DEBUG");
#endif
			for (int loopID = 0; loopID < loopNum; loopID++) {
				int id = loopID;//rand_range(0, slicesNum);
				IplImage *thisCTImg = cvCreateImage(cvSize(width, height), IPL_DEPTH_32F, 1);
				IplImage *displayImg = cvCreateImage(cvSize(width, height), IPL_DEPTH_32F, 3);

				fillin_ct_slice(id, thisCTImg);

#pragma omp parallel for
				for (int y = 0; y < height; y++) {
					for (int x = 0; x < width; x++) {
						float den = ((float*)thisCTImg->imageData)[y*width + x];
						((vec3f*)displayImg->imageData)[y*width + x] = vec3f(den, den, den);
					}
				}


				std::vector<vec2f> this_keypoints;
				this->BlobDetection(thisCTImg, this_keypoints);
				int N = this_keypoints.size(), Dim = 2;

				Mat points(N, Dim, CV_32F), labels, centers(K, Dim, CV_32F);
				for (int i = 0; i < N; i++) 
					points.at<float>(i, 0) = this_keypoints[i].x, 
					points.at<float>(i, 1) = this_keypoints[i].y;
				cv::kmeans(points, K, labels, TermCriteria(CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 10, 0.01), 3, KMEANS_PP_CENTERS, centers);

				std::vector<int> label2curveIdx(K), curveIdx2label(K);
				std::vector<vec2f> labelCenters(K);

				for (int i = 0; i < K; i++) {
					vec2f labelCenter = vec2f(centers.at<float>(i, 0), centers.at<float>(i, 1));
					labelCenters[i] = labelCenter;
					/* Find curve index for this label center */
					float minDist = std::numeric_limits<float>::max();
					for (int j = 0; j < K; j++) {
						vec2f curveCenter = vec2f(this->curve.curves[j][id]);
						float tmpDist = std::sqrtf((curveCenter.x-labelCenter.x) * (curveCenter.x-labelCenter.x) + (curveCenter.y-labelCenter.y) * (curveCenter.y-labelCenter.y));
						if (tmpDist < minDist) {
							label2curveIdx[i] = j; minDist = tmpDist;
						}
					}
					curveIdx2label[label2curveIdx[i]] = i;
				}

				const vec3f C1_o = vec3f(vec2f(this->curve.curves[0][id]), 0);
				const vec3f C2_o = vec3f(vec2f(this->curve.curves[1][id]), 0);
				const float interpolate = 0.9f;//1.f;
				/*this->recover_scale = 1.f / (2 * interpolate - 1.f);*/
				const vec3f C1 = interpolate * C1_o + (1-interpolate) * C2_o, C2 = interpolate * C2_o + (1-interpolate) * C1_o;
				const vec3f C_mid = (C1 + C2) / 2.f;

				cvCircle(displayImg, cvPoint(C1.x, C1.y), 3, cvScalar(1.f, 0.f, 0.f));
				cvCircle(displayImg, cvPoint(C2.x, C2.y), 3, cvScalar(1.f, 0.f, 0.f));

				float dist_center = nv::length(C1 - C2); //nv::length(C1_o - C2_o);//
				yarn_radius_vec[loopID] = dist_center * scalingFactor;

				const vec3f short_axis = nv::normalize(C1 - C2), long_axis = nv::normalize(vec3f(short_axis.y, -short_axis.x, 0));
				const float rot_angle = long_axis.y > 0 ? std::acos(long_axis.x) * 180 / pi : std::acos(-long_axis.x) * 180 / pi;

				float e_l, e_s;

				typedef struct Blob2CenterStruct {
					float s_value, l_value;
					vec3f point; int label;
				} Blob2Cen;

				struct less_than_s {
					inline bool operator() (const Blob2CenterStruct &lhs, const Blob2CenterStruct &rhs) {
						return lhs.s_value < rhs.s_value;
					}
				};
				struct less_than_l {
					inline bool operator() (const Blob2CenterStruct &lhs, const Blob2CenterStruct &rhs) {
						return lhs.l_value < rhs.l_value;
					}
				};

				std::vector<Blob2Cen> blob2cens(N);

				for (int j = 0; j < N; j++) {
					vec3f blobPoint = vec3f(this_keypoints[j].x, this_keypoints[j].y, 0), cen2blob;
					blob2cens[j].point = blobPoint;    

					int label = (nv::length(C1-blobPoint) > nv::length(C2-blobPoint)) ? 1 : 0;

					blob2cens[j].label = label;

					if (label == 0)			cen2blob = blobPoint - C1;
					else /* label == 1*/	cen2blob = blobPoint - C2;

					float s_value = std::fabs(nv::dot(cen2blob, short_axis));
					float l_value = std::fabs(nv::dot(cen2blob, long_axis));
					blob2cens[j].s_value = s_value;    blob2cens[j].l_value = l_value;
				}


				vec2f range_low = vec2f(0.75f, 0.9f), range_high = vec2f(0.9f, 0.9999f); 
				float range_low_avg = (range_low.x + range_low.y) / 2;
				const float tolerance = 1.5f;

				/* short axis */
				std::sort(blob2cens.begin(), blob2cens.end(), less_than_s());
				
				float e_s_sum = 0; int counter = 0;
				for (int j = std::floor(range_low.x * N); j <= std::floor(range_low.y * N); j++, counter++) {
					e_s_sum += blob2cens[j].s_value;
				}
				float e_s_low = e_s_sum / counter / range_low_avg;

				float e_s_high = e_s_low;
				for (int j = std::floor(range_high.y * N); j >= std::floor(range_high.x * N); j--) {
					e_s_high = blob2cens[j].s_value;
					if (e_s_high < e_s_low * tolerance) {
						break;
					}
				}
				e_s = (e_s_low + e_s_high) / 2.f /** recover_scale*/;
				ellipse_s_vec[loopID] = e_s * scalingFactor;


				/* long axis */
				std::sort(blob2cens.begin(), blob2cens.end(), less_than_l());
				float e_l_sum = 0; counter = 0;
				for (int j = std::floor(range_low.x * N); j <= std::floor(range_low.y * N); j++, counter++) {
					e_l_sum += blob2cens[j].l_value;
				}
				float e_l_low = e_l_sum / counter / range_low_avg;
				
				float e_l_high = e_l_low;
				for (int j = std::floor(range_high.y * N); j >= std::floor(range_high.x * N); j--) {
					e_l_high = blob2cens[j].l_value;
					if (e_l_high < e_l_low * tolerance) {
						break;
					}
				}
				e_l = (e_l_low + e_l_high) / 2.f /** recover_scale*/;
				ellipse_l_vec[loopID] = e_l * scalingFactor;

				const float cvEL_low = e_l_low, cvES_low = e_s_low;
				//ellipse(Mat(displayImg), Point(C1.x, C1.y), Size(cvEL_low, cvES_low), (double)rot_angle, 0, 360, Scalar(0.f, 1.f, 1.f));
				//ellipse(Mat(displayImg), Point(C2.x, C2.y), Size(cvEL_low, cvES_low), (double)rot_angle, 0, 360, Scalar(0.f, 1.f, 1.f));

				const float cvEL_high = e_l_high, cvES_high = e_s_high;
				//ellipse(Mat(displayImg), Point(C1.x, C1.y), Size(cvEL_high, cvES_high), (double)rot_angle, 0, 360, Scalar(0.f, 0.f, 1.f));
				//ellipse(Mat(displayImg), Point(C2.x, C2.y), Size(cvEL_high, cvES_high), (double)rot_angle, 0, 360, Scalar(0.f, 0.f, 1.f));

				const float cvEL = (cvEL_low + cvEL_high) / 2.f, cvES = (cvES_low + cvES_high) / 2.f;
				ellipse(Mat(displayImg), Point(C1.x, C1.y), Size(cvEL, cvES), (double)rot_angle, 0, 360, Scalar(1.f, 1.f, 1.f));
				ellipse(Mat(displayImg), Point(C2.x, C2.y), Size(cvEL, cvES), (double)rot_angle, 0, 360, Scalar(1.f, 1.f, 1.f));
#ifdef CV_DEBUG
				cvShowImage("DEBUG", displayImg);
				cvWaitKey(1);
#endif
				cvReleaseImage(&thisCTImg);
				cvReleaseImage(&displayImg);

			}

			std::ofstream debug("debug_yarn_r.txt");
			for (int i = 0; i < loopNum; i++) {
				debug << yarn_radius_vec[i] << std::endl;
			}
			debug.close();


			config.yarn_radius = statistic_avg(yarn_radius_vec, STAT_AVG_N);  //std::accumulate(yarn_radius_vec.begin(), yarn_radius_vec.end(), 0.f) / loopNum;
			config.ellipse_long = statistic_avg(ellipse_l_vec, STAT_AVG_N);  //std::accumulate(ellipse_l_vec.begin(), ellipse_l_vec.end(), 0.f) / loopNum;
			config.ellipse_short = statistic_avg(ellipse_s_vec, STAT_AVG_N);  //std::accumulate(ellipse_s_vec.begin(), ellipse_s_vec.end(), 0.f) / loopNum;
			std::cout << "y_r = " << config.yarn_radius << " e_l = " << config.ellipse_long << " e_s = " << config.ellipse_short << std::endl;
		} else if (K == 3) {
			const int loopNum = slicesNum;
			std::vector<float> ellipse_s_vec(loopNum), ellipse_l_vec(loopNum), yarn_radius_vec(loopNum);
#ifdef CV_DEBUG			
			cvNamedWindow("DEBUG");
#endif
			for (int loopID = 0; loopID < loopNum; loopID++) {
				int id = loopID;//rand_range(0, slicesNum);
				IplImage *thisCTImg = cvCreateImage(cvSize(width, height), IPL_DEPTH_32F, 1);
				IplImage *displayImg = cvCreateImage(cvSize(width, height), IPL_DEPTH_32F, 3);

				fillin_ct_slice(id, thisCTImg);

#pragma omp parallel for
				for (int y = 0; y < height; y++) {
					for (int x = 0; x < width; x++) {
						float den = ((float*)thisCTImg->imageData)[y*width + x];
						((vec3f*)displayImg->imageData)[y*width + x] = vec3f(den, den, den);
					}
				}


				std::vector<vec2f> this_keypoints;
				this->BlobDetection(thisCTImg, this_keypoints);
				int N = this_keypoints.size(), Dim = 2;

				Mat points(N, Dim, CV_32F), labels, centers(K, Dim, CV_32F);
				for (int i = 0; i < N; i++) 
					points.at<float>(i, 0) = this_keypoints[i].x, 
					points.at<float>(i, 1) = this_keypoints[i].y;
				cv::kmeans(points, K, labels, TermCriteria(CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 10, 0.01), 3, KMEANS_PP_CENTERS, centers);

				std::vector<int> label2curveIdx(K), curveIdx2label(K);
				std::vector<vec2f> labelCenters(K);

				for (int i = 0; i < K; i++) {
					vec2f labelCenter = vec2f(centers.at<float>(i, 0), centers.at<float>(i, 1));
					labelCenters[i] = labelCenter;
					/* Find curve index for this label center */
					float minDist = std::numeric_limits<float>::max();
					for (int j = 0; j < K; j++) {
						vec2f curveCenter = vec2f(this->curve.curves[j][id]);
						float tmpDist = std::sqrtf((curveCenter.x-labelCenter.x) * (curveCenter.x-labelCenter.x) + (curveCenter.y-labelCenter.y) * (curveCenter.y-labelCenter.y));
						if (tmpDist < minDist) {
							label2curveIdx[i] = j; minDist = tmpDist;
						}
					}
					curveIdx2label[label2curveIdx[i]] = i;
				}

				const vec3f C1_o = vec3f(vec2f(this->curve.curves[0][id]), 0); 
				const vec3f C2_o = vec3f(vec2f(this->curve.curves[1][id]), 0);
				const vec3f C3_o = vec3f(vec2f(this->curve.curves[2][id]), 0);
				const float interpolate = 0.9f; 
				/*this->recover_scale = 2.f / (3 * interpolate - 1);*/
				const vec3f C1 = interpolate * C1_o + (1-interpolate) * (C2_o + C3_o) / 2, 
					        C2 = interpolate * C2_o + (1-interpolate) * (C1_o + C3_o) / 2,
							C3 = interpolate * C3_o + (1-interpolate) * (C1_o + C2_o) / 2;
				const vec3f C_mid = (C1 + C2 + C3) / 3.f;

				cvCircle(displayImg, cvPoint(C1.x, C1.y), 3, cvScalar(1.f, 0.f, 0.f));
				cvCircle(displayImg, cvPoint(C2.x, C2.y), 3, cvScalar(1.f, 0.f, 0.f));
				cvCircle(displayImg, cvPoint(C3.x, C3.y), 3, cvScalar(1.f, 0.f, 0.f));

				float dist_center = (nv::length(C1 - C_mid) + nv::length(C2 - C_mid) + nv::length(C3 - C_mid)) / 3.f;
					//(nv::length(C1_o - C_mid) + nv::length(C2_o - C_mid) + nv::length(C3_o - C_mid)) / 3.f;
				yarn_radius_vec[loopID] = dist_center * 2 * scalingFactor;
				
				typedef struct Blob2CenterStruct {
					float s_value, l_value;
					vec3f point;   int label;
				} Blob2Cen;

				struct less_than_s {
					inline bool operator() (const Blob2CenterStruct &lhs, const Blob2CenterStruct &rhs) {
						return lhs.s_value < rhs.s_value;
					}
				};

				struct less_than_l {
					inline bool operator() (const Blob2CenterStruct &lhs, const Blob2CenterStruct &rhs) {
						return lhs.l_value < rhs.l_value;
					}
				};

				std::vector<Blob2Cen> blob2cens(N);

				/*for (int j = 0; j < N; j++) {
					cvCircle(displayImg, cvPoint((int)this_keypoints[j].x, (int)this_keypoints[j].y), 3, cvScalar(1.f, 0.f, 1.f));
				}*/

				for (int j = 0; j < N; j++) {
					/*int label = label2curveIdx[labels.at<int>(j, 0)];*/
					vec3f blobPoint = vec3f(this_keypoints[j].x, this_keypoints[j].y, 0), cen2blob;
					blob2cens[j].point = blobPoint;    
					float dist1 = nv::length(blobPoint - C1), dist2 = nv::length(blobPoint - C2), dist3 = nv::length(blobPoint - C3);
					int label = dist1 > dist2 ? (dist2 > dist3 ? 2 : 1) : (dist1 > dist3 ? 2 : 0);
					blob2cens[j].label = label;

					vec3f plyC;
					if (label == 0)			{ cen2blob = blobPoint - C1; plyC = C1; }
					else if (label == 1)	{ cen2blob = blobPoint - C2; plyC = C2; }
					else /*  label == 2*/   { cen2blob = blobPoint - C3; plyC = C3; }
					
					vec3f short_axis = nv::normalize(C_mid - plyC), long_axis = nv::normalize(vec3f(short_axis.y, -short_axis.x, 0));

					float s_value = std::fabs(nv::dot(cen2blob, short_axis));
					float l_value = std::fabs(nv::dot(cen2blob, long_axis));
					blob2cens[j].s_value = s_value;    blob2cens[j].l_value = l_value;
				}


				vec2f range_low = vec2f(0.75f, 0.9f), range_high = vec2f(0.9f, 0.9999f); 
				float range_low_avg = (range_low.x + range_low.y) / 2;
				const float tolerance = 1.7f;

				float e_l, e_s;
				/* short axis */
				std::sort(blob2cens.begin(), blob2cens.end(), less_than_s());

				float e_s_sum = 0; int counter = 0;
				for (int j = std::floor(range_low.x * N); j <= std::floor(range_low.y * N); j++, counter++) {
					e_s_sum += blob2cens[j].s_value;
				}
				float e_s_low = e_s_sum / counter / range_low_avg;

				float e_s_high = e_s_low;
				for (int j = std::floor(range_high.y * N); j >= std::floor(range_high.x * N); j--) {
					e_s_high = blob2cens[j].s_value;
					if (e_s_high < e_s_low * tolerance) {
						break;
					}
				}
				e_s = (e_s_low + e_s_high) / 2.f /** recover_scale*/;
				ellipse_s_vec[loopID] = e_s * scalingFactor;


				/* long axis */
				std::sort(blob2cens.begin(), blob2cens.end(), less_than_l());
				float e_l_sum = 0; counter = 0;
				for (int j = std::floor(range_low.x * N); j <= std::floor(range_low.y * N); j++, counter++) {
					e_l_sum += blob2cens[j].l_value;
				}
				float e_l_low = e_l_sum / counter / range_low_avg;

				float e_l_high = e_l_low;
				for (int j = std::floor(range_high.y * N); j >= std::floor(range_high.x * N); j--) {
					e_l_high = blob2cens[j].l_value;
					if (e_l_high < e_l_low * tolerance) {
						break;
					}
				}
				e_l = (e_l_low + e_l_high) / 2.f /** recover_scale*/;
				ellipse_l_vec[loopID] = e_l * scalingFactor;

				float rot1, rot2, rot3;
				vec3f y_to_c1 = nv::normalize(C1 - C_mid), y_to_c2 = nv::normalize(C2 - C_mid), y_to_c3 = nv::normalize(C3 - C_mid);
				vec3f long1 = vec3f(-y_to_c1.y, y_to_c1.x, 0), long2 = vec3f(-y_to_c2.y, y_to_c2.x, 0), long3 = vec3f(-y_to_c3.y, y_to_c3.x, 0);
				rot1 = long1.y > 0 ? std::acos(long1.x) * 180 / pi : std::acos(-long1.x) * 180 / pi;
				rot2 = long2.y > 0 ? std::acos(long2.x) * 180 / pi : std::acos(-long2.x) * 180 / pi;
				rot3 = long3.y > 0 ? std::acos(long3.x) * 180 / pi : std::acos(-long3.x) * 180 / pi;


				ellipse(Mat(displayImg), Point(C1.x, C1.y), Size(e_l, e_s), (double)rot1, 0, 360, Scalar(1.f, 1.f, 1.f));
				ellipse(Mat(displayImg), Point(C2.x, C2.y), Size(e_l, e_s), (double)rot2, 0, 360, Scalar(1.f, 1.f, 1.f));
				ellipse(Mat(displayImg), Point(C3.x, C3.y), Size(e_l, e_s), (double)rot3, 0, 360, Scalar(1.f, 1.f, 1.f));
#ifdef CV_DEBUG
				cvShowImage("DEBUG", displayImg);
				cvWaitKey(1);
#endif
				cvReleaseImage(&thisCTImg);
				cvReleaseImage(&displayImg);

			}

			std::ofstream debug("debug_yarn_r.txt");
			for (int i = 0; i < loopNum; i++) {
				debug << yarn_radius_vec[i] << std::endl;
			}
			debug.close();

			config.yarn_radius = statistic_avg(yarn_radius_vec, STAT_AVG_N);  //std::accumulate(yarn_radius_vec.begin(), yarn_radius_vec.end(), 0.f) / loopNum;
			config.ellipse_long = statistic_avg(ellipse_l_vec, STAT_AVG_N);  //std::accumulate(ellipse_l_vec.begin(), ellipse_l_vec.end(), 0.f) / loopNum;
			config.ellipse_short = statistic_avg(ellipse_s_vec, STAT_AVG_N);  //std::accumulate(ellipse_s_vec.begin(), ellipse_s_vec.end(), 0.f) / loopNum;
			std::cout << "y_r = " << config.yarn_radius << " e_l = " << config.ellipse_long << " e_s = " << config.ellipse_short << std::endl;
		} else {
			std::cerr << "Warning: K = " << K << " not supported!" << std::endl;
		}
	}

	void CTAnalyzer::RobustUntieYarnToPlys(Config &config) {
		std::cout << "Untie yarn into individual plys... " << std::endl;
		const int width = this->vol.m_dim[0], height= this->vol.m_dim[1], slicesNum = this->vol.m_dim[2];

		const int fiberNum = this->fibers.fibers.size();		assert(fiberNum > 0);
		const int K = this->curve.m_curve_num;				assert(K >= 1 && K <= 3);
		const float yarn_alpha = config.yarn_alpha; 
		const int yarn_clock_wise = config.yarn_clock_wise;
		const float yarn_radius = config.yarn_radius;

		const float ellipse_long = config.ellipse_long;
		const float ellipse_short = config.ellipse_short;
		const float balance_radius = std::sqrtf(ellipse_short * ellipse_long);

		auto volume2voxel = [](const vec3f &p, const MitsubaVol *vol) -> vec3f {
			return vec3f((vol->m_dim[0]-1) * (p[0] - vol->m_aabb[0][0]) / (vol->m_aabb[1][0] - vol->m_aabb[0][0]), 
				(vol->m_dim[1]-1) * (p[1] - vol->m_aabb[0][1]) / (vol->m_aabb[1][1] - vol->m_aabb[0][1]),
				(vol->m_dim[2]-1) * (p[2] - vol->m_aabb[0][2]) / (vol->m_aabb[1][2] - vol->m_aabb[0][2])
				);
		};

		auto voxel2volume = [](const vec3f &p, const MitsubaVol *vol) -> vec3f {
			return vec3f(p[0] * (vol->m_aabb[1][0] - vol->m_aabb[0][0]) / (vol->m_dim[0] - 1) + vol->m_aabb[0][0],
				p[1] * (vol->m_aabb[1][1] - vol->m_aabb[0][1]) / (vol->m_dim[1] - 1) + vol->m_aabb[0][1],
				p[2] * (vol->m_aabb[1][2] - vol->m_aabb[0][2]) / (vol->m_dim[2] - 1) + vol->m_aabb[0][2]
			);
		};

		untied_fibers.resize(K);

		for (int f = 0; f < fiberNum; f++) {
			const Polyline &fiber = this->fibers.fibers[f];
			const int vertexNum = fiber.size(), stride = rand_range(1, vertexNum);

			float plyIndexSum = 0; int plyIndexCounter = 0;
			for (int j = 0; j < vertexNum; j += stride, plyIndexCounter++) {
				int thisPlyID = 0;
				const vec3f &vertex = fiber[j];
				vec3f voxel = volume2voxel(vertex, &this->vol);
				float minDist = std::numeric_limits<float>::max();
				for (int k = 0; k < K; k++) {
					vec3f center = vec3f(vec2f(this->curve.curves[k][(int)voxel.z]), voxel.z);
					float thisDist = nv::length(center - voxel);
					if (thisDist < minDist) {
						thisPlyID = k;	minDist = thisDist;
					}
				}
				plyIndexSum += thisPlyID;
			}
			const int plyId = round(plyIndexSum / plyIndexCounter);

			Polyline reverseFiber;
			for (int v = 0; v < vertexNum; v++) {
				const vec3f &vertex = fiber[v];
				const vec3f voxel = volume2voxel(vertex, &this->vol);
				float zValue = vertex.z;
				float yarnTheta = yarn_clock_wise ? -zValue * 2 * pi / yarn_alpha : zValue * 2 * pi / yarn_alpha;

				vec3f circleVertex = vertex, plyCenter;

				if (K == 2) {
					vec3f C1 = voxel2volume(this->curve.curves[0][voxel.z], &this->vol);
					vec3f C2 = voxel2volume(this->curve.curves[1][voxel.z], &this->vol);
					vec3f CMid = (C1 + C2) / 2;

					vec3f short_axis = nv::normalize(C1 - C2), long_axis = vec3f(short_axis.y, -short_axis.x, 0.f);
					plyCenter = plyId == 0 ? C1 : C2;
					vec3f p = vertex - plyCenter;

					float local_x = nv::dot(p, short_axis), local_y = nv::dot(p, long_axis);
					local_x = local_x / ellipse_short * balance_radius;
					local_y = local_y / ellipse_long * balance_radius;

					circleVertex = local_x * short_axis + local_y * long_axis + plyCenter;

				} else if (K == 1) {
					// NO-OP
				} else if (K == 3) {
					vec3f C[3];
					for (int c = 0; c < 3; c++)
						C[c] = voxel2volume(this->curve.curves[c][voxel.z], &this->vol);
					vec3f CMid = (C[0] + C[1] + C[2]) / 3;
					vec3f short_axis = nv::normalize(C[plyId] - CMid), long_axis = vec3f(short_axis.y, -short_axis.x, 0.f);
					plyCenter = C[plyId];
					vec3f p = vertex - plyCenter;

					float local_x = nv::dot(p, short_axis), local_y = nv::dot(p, long_axis);
					local_x = local_x / ellipse_short * balance_radius;
					local_y = local_y / ellipse_long * balance_radius;

					circleVertex = local_x * short_axis + local_y * long_axis + plyCenter;
				}


				float vx = circleVertex.x, vy = circleVertex.y, vz = zValue;
				float reverseVx = vx * std::cosf(-yarnTheta) - vy * std::sinf(-yarnTheta);
				float reverseVy = vy * std::cosf(-yarnTheta) + vx * std::sinf(-yarnTheta);

				float plyVx = plyCenter.x, plyVy = plyCenter.y, plyVz = zValue;
				float plyReverseVx = plyVx * std::cosf(-yarnTheta) - plyVy * std::sinf(-yarnTheta);
				float plyReverseVy = plyVy * std::cosf(-yarnTheta) + plyVx * std::sinf(-yarnTheta);

				reverseVx -= plyReverseVx;
				reverseVy -= plyReverseVy;

				vec3f reverseVertex = vec3f(reverseVx, reverseVy, zValue);

				reverseFiber.push_back(reverseVertex);
			}	

			if (reverseFiber.front().z > reverseFiber.back().z) {
				std::reverse(reverseFiber.begin(), reverseFiber.end());
			}
			untied_fibers[plyId].fibers.push_back(reverseFiber);
		}

		for (int i = 0; i < K; i++) {
			std::string untieYarnFilename = this->unrolled_ply_file; 
			untieYarnFilename = untieYarnFilename.substr(0, untieYarnFilename.find('.')) + std::to_string((long long)i) + ".txt";
			untied_fibers[i].m_fiber_num = untied_fibers[i].fibers.size();
			untied_fibers[i].save(untieYarnFilename);
		}

		std::cout << "Untie done." << std::endl;
	}

	void CTAnalyzer::RobustRemovePlyFlyAways(Config &config) {
		const int K = this->curve.m_curve_num;
        assert(K >= 1 && K <= 3);

		const float ellipse_long = config.ellipse_long;
		const float ellipse_short = config.ellipse_short;
		const float balance_radius = std::sqrtf(ellipse_short * ellipse_long);

		this->untied_cleaned_fibers.resize(K);
        this->untied_flyaway_fibers.resize(K);
        this->untied_flyaway_thresholds.resize(K);
		this->untied_fibers_no_garbage.resize(K);
		for (int k = 0; k < K; k++)
		{
			const Fibers &untied_fiber = this->untied_fibers[k];
			Fibers &untied_cleaned_fiber = this->untied_cleaned_fibers[k];
            Fibers &untied_flyaway_fiber = this->untied_flyaway_fibers[k];
			Fibers &untied_fibers_no_garbage = this->untied_fibers_no_garbage[k];
			float &untied_flyaway_thresholds = this->untied_flyaway_thresholds[k];

			const int numFibers = untied_fiber.fibers.size();
			
			std::vector<float> min_dists(numFibers), max_dists(numFibers);

#pragma omp parallel for num_threads(num_of_omp_cores)
			for (int f = 0; f < numFibers; f++) {
				const Polyline &fiber = untied_fiber.fibers[f];
				const int numVertex = fiber.size();

				float min_dist = 1e10f, max_dist = 0.f;
				for (int v = 0; v < numVertex; v++) {
					vec3f vertex = fiber[v];
					float dist = nv::length(vec3f(vec2f(vertex), 0.f));
                    min_dist = std::min(min_dist, dist);
					max_dist = std::max(max_dist, dist);
				}
                min_dists[f] = min_dist;
				max_dists[f] = max_dist;
			}

            //if ( k == 0 )
            //{
            //    FILE *fout = fopen("debug.txt", "wt");
            //    for ( int f = 0; f < numFibers; ++f )
            //        fprintf(fout, "%f %f\n", min_dists[f], max_dists[f]);
            //    fclose(fout);
            //}

#ifdef USE_SIGMA_THRESHOLDS
            float mean1, std1, mean2, std2;
            meanStd(min_dists, mean1, std1);
            meanStd(max_dists, mean2, std2);

            const float bound = 0.95f;
            const float bound_dist1 = mean1 + FLYAWAY_SIGMA_COUNT*std1;
            const float bound_dist2 = mean2 + FLYAWAY_SIGMA_COUNT*std2;
#else
			const float bound = 0.95f;
            const float bound_dist1 = percentile(min_dists, bound);
			const float bound_dist2 = percentile(max_dists, bound);
#endif
            printf("    bound_dist1: %f, bound_dist2: %f\n", bound_dist1, bound_dist2);
            untied_flyaway_thresholds = bound_dist2;

			for (int f = 0; f < numFibers; f++) {
                bool garbage = min_dists[f] > bound_dist1,
                     fly_away = max_dists[f] > bound_dist2;

                if ( !garbage ) {
                    const Polyline &fiber = untied_fiber.fibers[f];
                    const int numVertex = fiber.size();

                    auto rescale_fiber = [](const Polyline &fiber, Polyline &re_fiber, float balance_radius, const int n, const float bound) {
                        re_fiber.resize(n);
                        for ( int i = 0; i < n; i++ ) {
                            const vec3f &vertex = fiber[i];
                            re_fiber[i] = vec3f(vec2f(vertex) / bound, vertex.z);
                            /*float radius_ori = nv::length(vec3f(vec2f(vertex), 0)) / balance_radius;
                            float radius_sca = nv::length(vec3f(vec2f(re_fiber[i]), 0)) / balance_radius;
                            std::cout << "Radius_ori = " << radius_ori << " Radius_sca = " << radius_sca << std::endl;*/
                        }
                    };

                    Polyline re_fiber;
                    rescale_fiber(fiber, re_fiber, balance_radius, numVertex, bound);
                    if ( fly_away )
                        untied_flyaway_fiber.fibers.push_back(re_fiber);
                    else
                        untied_cleaned_fiber.fibers.push_back(re_fiber);

					untied_fibers_no_garbage.fibers.push_back(re_fiber); 
                }
			}

            printf("    Ply #%d: %lu non-flyaways, %lu flyaways, %lu garbage\n",
                k, untied_cleaned_fiber.fibers.size(), untied_flyaway_fiber.fibers.size(),
                numFibers - static_cast<int>(untied_cleaned_fiber.fibers.size()) - static_cast<int>(untied_flyaway_fiber.fibers.size())
            );
		}

		for (int k = 0; k < K; k++) {
			std::cout << "Saving cleaned plies and no-flyaway plies. k = " << k << std::endl;
			std::string untieYarnFilename;
			untieYarnFilename = this->unrolled_ply_file;
			untieYarnFilename = untieYarnFilename.substr(0, untieYarnFilename.find('.')) + "_cleaned" + std::to_string((long long)k) + ".txt";
			untied_cleaned_fibers[k].m_fiber_num = untied_cleaned_fibers[k].fibers.size();
			untied_cleaned_fibers[k].save(untieYarnFilename);

			untieYarnFilename = this->unrolled_ply_file;
			untieYarnFilename = untieYarnFilename.substr(0, untieYarnFilename.find('.')) + "_no_garbage" + std::to_string((long long)k) + ".txt";
			untied_fibers_no_garbage[k].m_fiber_num = untied_fibers_no_garbage[k].fibers.size();
			untied_fibers_no_garbage[k].save(untieYarnFilename);
		}
	}

	void CTAnalyzer::RobustFitFiberMigration(Config &config) {
		const int K = config.ply_num;
		const float ellipse_long = config.ellipse_long;
		const float ellipse_short = config.ellipse_short;
		const float balance_radius = std::sqrtf(ellipse_short * ellipse_long);
		const float alpha = config.alpha;
		const bool clockwise = config.fiber_clock_wise;


		struct MigrationParams {
			float final_rho_min, final_rho_max, final_si;
			vec2f rho_min_range, rho_max_range, si_range;
			float current_min_error;
			vec3f step_size;

			MigrationParams() {
				rho_min_range = vec2f(0.5f, 1.0f);
				rho_max_range = vec2f(1.f, 1.f);        // rho_max = 1.f, R_max = 1.f
				si_range = vec2f(0.0f, 2.0f);
				step_size = vec3f(0.05f, 0.05f, 0.1f); /* rho_min, rho_max, si*/
				current_min_error = std::numeric_limits<float>::max();
			}

			void update(float rho_min, float rho_max, float si) {
				final_rho_min = rho_min; 
				final_rho_max = rho_max;
				final_si = si;
			}

		} mp;

		std::cout << "Fit Migration start..." << std::endl;
		clock_t tic = clock();

		const int minimal_vertex_num = 70;

		/* Uniformly sample fiber migration parameter space with step_size */
		int numSid = (mp.si_range.y - mp.si_range.x) / mp.step_size.z;
#pragma omp parallel for  
		for (int sid = 0; sid <= numSid; sid ++) {																						 //for (float si = mp.si_range.x; si <= mp.si_range.y; si += mp.step_size.z) {
			float si = sid * mp.step_size.z + mp.si_range.x;
			for(float rho_min = mp.rho_min_range.x; rho_min <= mp.rho_min_range.y; rho_min += mp.step_size.x) {		
				//for (float rho_max = mp.rho_max_range.x; rho_max <= mp.rho_max_range.y; rho_max += mp.step_size.y) {
				const float rho_max = 1.f;
				{
					//std::cout << "rho_min = " << rho_min << " rho_max = " << rho_max << std::endl;

					/* For each given fiber migration parameter, we fit all fibers to get its best score and compare and update global one */
					float min_error_this_migration = 0.f;     
					for (int k = 0; k < K; k++)
					{

						Fibers &untied_cleaned_fiber = this->unrolled_ply_fibers[k]; //this->untied_cleaned_fibers[k];
						const int numFibers = untied_cleaned_fiber.fibers.size();

						for (int f = 0; f < numFibers; f++) {
							Polyline &fiber = untied_cleaned_fiber.fibers[f];
							const int numVertex = fiber.size();

							if (numVertex < minimal_vertex_num)
								continue; 

							std::vector<double> x(numVertex, 0.f), y(numVertex, 0.f);

							/* For each fiber, we fit the init_r and init_theta, then accumulate the fitting error for all fibers */
							const int numInitGuesses = 5;
							SimplexMethod simplexMethod(numInitGuesses);

							for (int v = 0; v < numVertex; v++) {
								vec3f &vertex = fiber[v];
								float z_value = vertex.z;
								float theta = clockwise ? -2 * pi * z_value / alpha : 2 * pi * z_value / alpha;
								float migration_R = nv::length(vec3f(vec2f(vertex), 0.f)) / balance_radius;
								x[v] = theta;
								y[v] = migration_R;
							}

							simplexMethod.feed(x, y, rho_min, rho_max, si);
							double migration_error_per_fiber = simplexMethod.fit();
							 
							min_error_this_migration += migration_error_per_fiber;
							
						}


					}

					/*std::cout << "GoodFit=" << good_fit_counter << " Error=" << min_error_this_migration << " rho_min = " << rho_min 
						<< " rho_max = " << rho_max << " si=" << si << std::endl;*/
					if (min_error_this_migration < mp.current_min_error) {
						omp_set_lock(&this->omp_lock);
						mp.current_min_error = min_error_this_migration;
						mp.update(rho_min, rho_max, si);
						std::cout << "update: fitting error = " << min_error_this_migration << " rho_min = " << rho_min << " rho_max = " << rho_max << " si = " << si << std::endl;
						omp_unset_lock(&this->omp_lock);
					}
				}
			}
		}

		clock_t toc = clock();

		std::cout << "Fiber Migration Done. Timeuse = " << (toc-tic) / CLOCKS_PER_SEC << " s. " << std::endl;
		std::cout << "Params: rho_min = " << mp.final_rho_min << " rho_max = " << mp.final_rho_max << " si = " << mp.final_si << std::endl;
		std::cout << "Final fitting error = " << mp.current_min_error << std::endl;


		config.use_migration = 1;
		config.rho_max = mp.final_rho_max;
		config.rho_min = mp.final_rho_min;
		config.s_i = mp.final_si;
	}

	void CTAnalyzer::RobustFitFiberTwist(Config &config) {
		const int fiberNum = this->fibers.fibers.size();		assert(fiberNum > 0);
		const int K = this->curve.m_curve_num;				assert(K >= 1 && K <= 3);
		const float yarn_alpha = config.yarn_alpha; 
		const int yarn_clock_wise = config.yarn_clock_wise;

		auto volume2voxel = [](const vec3f &p, const MitsubaVol *vol) -> vec3f {
			return vec3f((vol->m_dim[0]-1) * (p[0] - vol->m_aabb[0][0]) / (vol->m_aabb[1][0] - vol->m_aabb[0][0]), 
				(vol->m_dim[1]-1) * (p[1] - vol->m_aabb[0][1]) / (vol->m_aabb[1][1] - vol->m_aabb[0][1]),
				(vol->m_dim[2]-1) * (p[2] - vol->m_aabb[0][2]) / (vol->m_aabb[1][2] - vol->m_aabb[0][2])
				);
		};

		auto voxel2volume = [](const vec3f &p, const MitsubaVol *vol) -> vec3f {
			return vec3f(p[0] * (vol->m_aabb[1][0] - vol->m_aabb[0][0]) / (vol->m_dim[0] - 1) + vol->m_aabb[0][0],
				p[1] * (vol->m_aabb[1][1] - vol->m_aabb[0][1]) / (vol->m_dim[1] - 1) + vol->m_aabb[0][1],
				p[2] * (vol->m_aabb[1][2] - vol->m_aabb[0][2]) / (vol->m_dim[2] - 1) + vol->m_aabb[0][2]
			);
		};

		const int loopNum = 50000, minimumVerticesPerFiber = 20;
		std::vector<float> fiber_alpha_vec; double fiber_clockwise_sum = 0; int loopCounter = 0;
#pragma omp parallel for num_threads(num_of_omp_cores)
		for (int i = 0; i < loopNum; i++) {
			int fiberID = 0;
			do {
				fiberID = std::floor(rand_range(0, fiberNum));
				if (this->fibers.fibers[fiberID].size() >= minimumVerticesPerFiber)
					break;
			} while (true);

			const Polyline &thisFiber = this->fibers.fibers[fiberID];
			const int verticesNum = thisFiber.size();		assert(verticesNum >= minimumVerticesPerFiber);
			const int stride = (int) rand_range(2, 25);


			float plyIndexSum = 0; int plyIndexCounter = 0;
			for (int j = 0; j < verticesNum; j += stride, plyIndexCounter++) {
				int thisPlyID = 0;
				const vec3f thisVertex = thisFiber[j];
				const vec3f thisVoxel = volume2voxel(thisVertex, &vol);
				float minDist = std::numeric_limits<float>::max();
				for (int k = 0; k < K; k++) {
					vec3f thisCurveCenter = vec3f(vec2f(this->curve.curves[k][(int)thisVoxel.z]), thisVoxel.z);
					float thisDist = nv::length(thisCurveCenter - thisVoxel);
					if (thisDist < minDist) {
						thisPlyID = k;	minDist = thisDist;
					}
				}
				plyIndexSum += thisPlyID;
			}
			int plyID = round(plyIndexSum / plyIndexCounter);


			float this_fiber_alpha = 0, this_clock_wise = 0;
			int this_counter = 0;

			for (int j = 0; j < verticesNum - stride; j += stride) {
				const vec3f thisV = thisFiber[j], nextV = thisFiber[j+stride];
				bool goingUp = nextV.z > thisV.z;
				const vec3f &thisVertex = goingUp ? thisV : nextV,
					&nextVertex = goingUp ? nextV : thisV;

				const vec3f &thisVoxel = volume2voxel(thisVertex, &this->vol),
					&nextVoxel = volume2voxel(nextVertex, &this->vol);
				const vec3f &thisCurveCenter = vec3f(vec2f(this->curve.curves[plyID][(int)thisVoxel.z]), thisVoxel.z),
					&nextCurveCenter = vec3f(vec2f(this->curve.curves[plyID][(int)nextVoxel.z]), nextVoxel.z);

				vec3f thisYarnCenter = vec3f(0.f, 0.f, 0.f), 
					nextYarnCenter = vec3f(0.f, 0.f, 0.f);
				for (int k = 0; k < K; k++) {
					vec3f thisC = vec3f(vec2f(this->curve.curves[k][(int)thisVoxel.z]), thisVoxel.z);
					vec3f nextC = vec3f(vec2f(this->curve.curves[k][(int)nextVoxel.z]), nextVoxel.z);
					thisYarnCenter += thisC / K;
					nextYarnCenter += nextC / K;
				}					

				float deltaZ = std::fabs(thisVertex.z - nextVertex.z);
				vec3f thisCurveCen2FiberNotNormalized = thisVoxel - thisCurveCenter, 
					nextCurveCen2FiberNotNormalized = nextVoxel - nextCurveCenter;
				float thisCurveCen2FiberLen = nv::length(thisCurveCen2FiberNotNormalized) * scalingFactor,
					nextCurveCen2FiberLen = nv::length(nextCurveCen2FiberNotNormalized) * scalingFactor;
				if (thisCurveCen2FiberLen > config.ellipse_long || 
					nextCurveCen2FiberLen > config.ellipse_long ||
					thisCurveCen2FiberLen < config.ellipse_short/3 || 
					nextCurveCen2FiberLen < config.ellipse_short/3
					)	{	continue;  }

				vec3f thisCurveCen2Fiber = nv::normalize(thisCurveCen2FiberNotNormalized), 
					nextCurveCen2Fiber = nv::normalize(nextCurveCen2FiberNotNormalized);

				float thisAngleCurveCen2Fiber = thisCurveCen2Fiber.y > 0 ? std::acos(thisCurveCen2Fiber.x) : 2*pi - std::acos(thisCurveCen2Fiber.x), 
					nextAngleCurveCen2Fiber = nextCurveCen2Fiber.y > 0 ? std::acos(nextCurveCen2Fiber.x) : 2*pi - std::acos(nextCurveCen2Fiber.x);

				float angleYarnTwisted = deltaZ * 2 * pi / yarn_alpha;
				if (angleYarnTwisted < 0.01f || angleYarnTwisted > pi/2) {	continue;  }

				float angleFiberTwisted = 0.f;
				if (yarn_clock_wise) {
					angleFiberTwisted = std::fabs(nextAngleCurveCen2Fiber - (thisAngleCurveCen2Fiber - angleYarnTwisted));
					if (angleFiberTwisted < 0.01f || angleFiberTwisted > pi/2)	{	continue;  }
					this_clock_wise += thisAngleCurveCen2Fiber - angleYarnTwisted < nextAngleCurveCen2Fiber ? 0 : 1;
				} else {
					angleFiberTwisted = std::fabs(nextAngleCurveCen2Fiber - (thisAngleCurveCen2Fiber + angleYarnTwisted));
					if (angleFiberTwisted < 0.01f || angleFiberTwisted > pi/2)	{	continue;  }
					this_clock_wise += thisAngleCurveCen2Fiber + angleYarnTwisted < nextAngleCurveCen2Fiber ? 0 : 1;
				}
				this_fiber_alpha += (deltaZ * 2 * pi / angleFiberTwisted);
				this_counter ++;
			}

			if (this_counter > 0) {
				omp_set_lock(&omp_lock);
				fiber_alpha_vec.push_back(this_fiber_alpha / this_counter);
				fiber_clockwise_sum += this_clock_wise / this_counter;
				loopCounter++;
				omp_unset_lock(&omp_lock);
			}

		}

		config.fiber_clock_wise = (fiber_clockwise_sum / loopCounter > 0.5f ? 1 : 0);

		/*std::sort(fiber_alpha_vec.begin(), fiber_alpha_vec.end());
		const vec2f range = vec2f(0.25f, 0.75f); int f_counter = 0;
		double fiber_alpha_sum = 0;
		std::cout << "FiberTwist, loopCounter = " << loopCounter << std::endl;
		for (int i = std::floor(range.x * loopCounter); i <= std::floor(range.y * loopCounter); i++, f_counter++) {
			fiber_alpha_sum += fiber_alpha_vec[i];
		}
		const float real_ct_mul = 1.0f;
		config.alpha = fiber_alpha_sum / f_counter * real_ct_mul;*/

		config.alpha = statistic_avg(fiber_alpha_vec, STAT_AVG_N);
	}

	void CTAnalyzer::RobustFitFiberDistribution(Config &config) {
		const int K = config.ply_num;				assert(K >= 1 && K <= 3);
		const float ellipse_long = config.ellipse_long;
		const float ellipse_short = config.ellipse_short;
		const float balance_radius = std::sqrtf(ellipse_short * ellipse_long);
		const float alpha = config.alpha;
		const bool clockwise = config.fiber_clock_wise;

		const float rho_min = config.rho_min, rho_max = config.rho_max, si = config.s_i;
 
		const std::string r_file = "matlab\\fiber_distribution\\cross_section_r.txt";
		std::ofstream cross_section_writer(r_file.c_str());

		config.R_max = 1.0;

		for (int k = 0; k < K; k++) {
			Fibers &untied_cleaned_fiber = this->unrolled_ply_fibers[k];//this->untied_cleaned_fibers[k];
			const int numFibers = untied_cleaned_fiber.fibers.size();
#pragma omp parallel for num_threads(num_of_omp_cores)
			for (int f = 0; f < numFibers; f++) {
				Polyline &fiber = untied_cleaned_fiber.fibers[f];
				const int numVertex = fiber.size();
				std::vector<double> x(numVertex, 0.f), y(numVertex, 0.f);

				/* For each fiber, we fit the init_r and init_theta, then accumulate the fitting error for all fibers */
				const int numInitGuesses = 5;
				SimplexMethod simplexMethod(numInitGuesses);

				for (int v = 0; v < numVertex; v++) {
					vec3f &vertex = fiber[v];
					float z_value = vertex.z;
					float theta = clockwise ? -2 * pi * z_value / alpha : 2 * pi * z_value / alpha;
					float migration_R = nv::length(vec3f(vec2f(vertex), 0.f)) / balance_radius;
					x[v] = theta;
					y[v] = migration_R;
				}

				simplexMethod.feed(x, y, rho_min, rho_max, si);
				double migration_error_per_fiber = simplexMethod.fit();

				omp_set_lock(&this->omp_lock);
				float init_r = simplexMethod.init_r; 
				if (init_r > eps && init_r < 1.f - eps) 
					cross_section_writer <<  init_r  << std::endl;
				omp_unset_lock(&this->omp_lock);
			}
		}
		cross_section_writer.close();

		std::string matlab_fit_command = "matlab.exe -r \"run matlab/fiber_distribution/cross_section_r.m\"";
		std::cout << "matlab command:\n" << matlab_fit_command << std::endl;
		system(matlab_fit_command.c_str());

		std::ifstream fiber_dist_loader;
		const std::string results_filename = "matlab\\fiber_distribution\\cross_section_r_result.txt";

		const std::string del_old_result_command = "del " + results_filename;
		system(del_old_result_command.c_str());

		while (true) {
			fiber_dist_loader.open(results_filename.c_str());
			if (fiber_dist_loader.is_open()) break;
			else {
				//std::cerr << "File load failed, wait for 3 seconds and retry." << std::endl;
				wait(3);
			}
		} 
		fiber_dist_loader >> config.beta >> config.epsilon;
		fiber_dist_loader.close();

		std::cout << "Fiber distribution loaded, beta = " << config.beta << " eps = " << config.epsilon << std::endl;
	}

	void CTAnalyzer::RobustFitFlyAway(Config &config) {
        const int K = this->curve.m_curve_num;
        const float zextent = config.aabb_max.z - config.aabb_min.z;
        const float thresScale = 1.05f;

        Fibers hair, loop;
        for ( int k = 0; k < K; ++k ) {
            const auto &fibers = this->untied_flyaway_fibers[k].fibers;
            const float thres = thresScale*this->untied_flyaway_thresholds[k];

            for ( auto it = fibers.begin(); it != fibers.end(); ++it ) {
                int m = static_cast<int>(it->size());
                Polyline cur(m);
                std::vector<float> rVals(m);
                for ( int i = 0; i < m; ++i ) {
                    cur[i] = (*it)[i];
                    rVals[i] = nv::length(vec3f(vec2f(cur[i]), 0.f));
                }

                if ( rVals.front() > thres ) {
                    int j = 1;
                    while ( j < m && (rVals[j] > thres || rVals[j - 1] > rVals[j]) )
                        ++j;

                    if ( j < m ) {
                        hair.fibers.push_back(Polyline(cur.begin(), cur.begin() + j + 1));
                        cur.erase(cur.begin(), cur.begin() + j);
                        rVals.erase(rVals.begin(), rVals.begin() + j);
                    }
                    else {
                        hair.fibers.push_back(cur);
                        cur.clear();
                        rVals.clear();
                    }
                    m = static_cast<int>(cur.size());
                }

                if ( m > 1 && rVals.back() > thres ) {
                    int j = m - 2;
                    while ( j >= 0 && (rVals[j] > thres || rVals[j + 1] > rVals[j]) )
                        --j;

                    if ( j >= 0 ) {
                        hair.fibers.push_back(Polyline(cur.begin() + j, cur.end()));
                        cur.erase(cur.begin() + j + 1, cur.end());
                        rVals.erase(rVals.begin() + j + 1, rVals.end());
                    }
                    else {
                        hair.fibers.push_back(cur);
                        cur.clear();
                        rVals.clear();
                    }
                    m = static_cast<int>(cur.size());
                }

                if ( m > 1 ) {
#if 0
                    bool dead = true;
                    for ( auto it2 = rVals.begin(); it2 != rVals.end(); ++it2 )
                        if ( *it2 > thres ) {
                            dead = false; break;
                        }
                    if ( !dead ) loop.fibers.push_back(cur);
#else
                    loop.fibers.push_back(cur);
#endif
                }
            }
        }

        hair.m_fiber_num = static_cast<int>(hair.fibers.size());
        hair.save("debug_hair.txt");
        loop.m_fiber_num = static_cast<int>(loop.fibers.size());
        loop.save("debug_loop.txt");

        std::vector<float> r0, re, ze, pe;
        for ( auto it = hair.fibers.begin(); it != hair.fibers.end(); ++it ) {
            int m = static_cast<int>(it->size());
            std::vector<float> rVal(m), zVal(m);
            for ( int i = 0; i < m; ++i ) {
                rVal[i] = nv::length(vec3f(vec2f((*it)[i]), 0.0f));
                zVal[i] = (*it)[i][2];
            }

            float best = 0.0f;
            cv::Vec2i ans(-1, -1);
            for ( int i = 0; i < m; ++i )
                for ( int j = i + 1; j < m; ++j ) {
                    float val = nv::length(vec3f(rVal[i] - rVal[j], zVal[i] - zVal[j], 0.0f));
                    if ( val > best ) {
                        best = val;
                        ans[0] = i; ans[1] = j;
                    }
                }

            //printf("(%f %f) -> (%f %f)\n", rVal[ans[0]], zVal[ans[0]], rVal[ans[1]], zVal[ans[1]]);

            if ( rVal[ans[0]] > rVal[ans[1]] ) std::swap(ans[0], ans[1]);
            r0.push_back(rVal[ans[0]]);
            re.push_back(rVal[ans[1]] - rVal[ans[0]]);
            ze.push_back(zVal[ans[1]] - zVal[ans[0]]);

            float pMin = 1e10f, pMax = -1e10f;
            for ( int i = 0; i < m; ++i ) {
                float val = std::atan2((*it)[i][1], (*it)[i][0]);
                pMin = std::min(pMin, val);
                pMax = std::max(pMax, val);
            }
            float pExtent = pMax - pMin;
            if ( pExtent > pi ) pExtent = 2.0f*pi - pExtent;
            pe.push_back(pExtent);
        }

        config.use_flyaways = 1;
        config.flyaway_hair_density = static_cast<float>(hair.fibers.size())/(static_cast<float>(K)*zextent);
        meanStd(r0, config.flyaway_hair_r0_mu, config.flyaway_hair_r0_sigma);
        meanStd(re, config.flyaway_hair_re_mu, config.flyaway_hair_re_sigma);
        meanStd(ze, config.flyaway_hair_ze_mu, config.flyaway_hair_ze_sigma);
        meanStd(pe, config.flyaway_hair_pe_mu, config.flyaway_hair_pe_sigma);

        std::vector<float> r1;
        for ( auto it = loop.fibers.begin(); it != loop.fibers.end(); ++it ) {
            float rMax = 0.0f;
            for ( auto it2 = it->begin(); it2 != it->end(); ++it2 ) {
                float val = nv::length(vec3f(vec2f(*it2), 0.0f));
                rMax = std::max(rMax, val);
            }
            r1.push_back(rMax);
        }

        config.flyaway_loop_density = static_cast<float>(loop.fibers.size())/(static_cast<float>(K)*zextent);
        meanStd(r1, config.flyaway_loop_r1_mu, config.flyaway_loop_r1_sigma);
	}

    float CTAnalyzer::percentile(const std::vector<float> &data, float p) {
        std::vector<float> data0 = data;
        std::sort(data0.begin(), data0.end());
        int pi = static_cast<int>(std::floor(static_cast<float>(data.size())*p + 0.5f));
        if ( pi < 0 )
            pi = 0;
        else if ( pi >= static_cast<int>(data.size()) )
            pi = static_cast<int>(data.size()) - 1;
        return data0[pi];
    }

    void CTAnalyzer::meanStd(const std::vector<float> &data, float &mean_, float &std_)
    {
        if ( data.empty() ) {
            mean_ = std_ = 0.0f;
        }
        else if ( data.size() == 1 ) {
            mean_ = data[0];
            std_ = 0.0f;
        }
        else {
            mean_ = 0.0f;
            for ( auto it = data.begin(); it != data.end(); ++it ) mean_ += *it;
            mean_ /= static_cast<float>(data.size());

            std_ = 0.0f;
            for ( auto it = data.begin(); it != data.end(); ++it ) std_ += std::pow((*it) - mean_, 2.0f);
            std_ = std::sqrt(std_/static_cast<float>(data.size() - 1));
        }
    }
}
