function gradientCheck()
	format long
	more off
	warning ('off', 'Octave:broadcast');
    
    isOctave = exist('OCTAVE_VERSION', 'builtin') ~= 0;
    %parpool;
	%
	%
	% Remember to turn off drop-out in the objective
	%
	%
	numImages=2;
	error_threshold = 1e-4;
	maxParamsToCheck = 100000;
	useComplexDifferentials=false; % doesn't work for this objective

	lambda = 0.01;
	imageDim = 28;
	numClasses = 1;
	filterDim1 = 9;
	numFilters1 = 20;
	filterDim2 = 5;
	numFilters2 = 10;
	poolDim1 = 2;
	poolDim2 = 2;




    if isOctave
        addpath ./common/matlab/;
    else
        addpath ../common/matlab/;
    end
	method='';
	if useComplexDifferentials
		method='complex differentials';
	else
		method='central-differencing';
	end
	theta = initialize(imageDim,filterDim1,numFilters1,...
	                                 filterDim2,numFilters2,poolDim1,poolDim2,numClasses);
	layerSizes = computeLayerSizes(imageDim,filterDim1,numFilters1,filterDim2,numFilters2,poolDim1,...
	                                 poolDim2,numClasses);
	images = zeros(imageDim, imageDim, numImages);
	for idx=1:numImages
		images(:,:,idx) = rand(imageDim, imageDim);
	end
	labels = rand(numImages, 1) < 0.5 + 1;
	images = bsxfun(@minus, images, sum(sum(images))/imageDim/imageDim);
	convDim1 = imageDim - filterDim1 + 1;
	inputDim1 = convDim1/poolDim1;
	inputSize1 = inputDim1 ^ 2 * numFilters1;
	convDim2 = inputDim1 - filterDim2 + 1;
	inputDim2 = convDim2/poolDim2;
	inputSize2 = inputDim2 ^ 2 * numFilters2 * numFilters1;
	costWeights = ones(size(numClasses, 1));

	fprintf('Testing gradient using %s ', method);
	[f,~,g,~,~] = cnnCostCPU(theta,images,labels,numClasses,...
	                filterDim1, numFilters1, filterDim2, numFilters2,...
	                poolDim1, poolDim2, false, lambda, costWeights);

	p = length(theta);
	fractionOfParamsToCheck = maxParamsToCheck/p;
	if useComplexDifferentials
		mu = 1e-150;
	else
		mu = 1e-8 * (1+norm(theta));%1*sqrt(1e-12)*(1+norm(theta));
	end
	e_j_c = rand(p,1) < fractionOfParamsToCheck;
	fprintf('on %d out of %d parameters (%0.2f%%).\n', sum(e_j_c), p, sum(e_j_c)/p*100);
	numErrors=0;
	tested=0;
	for idx = 1:p
		if e_j_c(idx) == 1;
			e_j = zeros(p,1);
			e_j(idx) = 1;
			g2=0;
			if ~useComplexDifferentials
				[diff1,~,~,~,~] = cnnCostCPU(theta+mu*e_j,images,labels,numClasses,...
		                filterDim1, numFilters1, filterDim2, numFilters2,...
		                poolDim1, poolDim2, true, lambda, costWeights);
				[diff2,~,~,~,~] = cnnCostCPU(theta-mu*e_j,images,labels,numClasses,...
		                filterDim1, numFilters1, filterDim2, numFilters2,...
		                poolDim1, poolDim2, true, lambda, costWeights);
				g2 = (diff1 - diff2)/(2*mu);
			else
				[diff3,~,~,~,~] = cnnCostCPU(theta+mu*i*e_j,images,labels,numClasses,...
		                filterDim1, numFilters1, filterDim2, numFilters2,...
		                poolDim1, poolDim2, true, lambda, costWeights);			
				g2 = imag(diff3)/mu;
			end
			err = abs(g2 - g(idx, 1));
			errp = abs(err/g(idx, 1))*100;
			tested = tested + 1;
			layer = computeLayer(idx, layerSizes);
			result = 'FAILED!';
			if  ~(err > error_threshold || errp > 5)
				result = 'PASS';
			end
			fprintf('\r %d%% completed[%d | %d/%d | L%d], err:%1.2E (%0.2f%%), user-grad:%1.2E, diff-grad:%1.2E, param:%1.2E %s %10s',...
				tested/sum(e_j_c)*100, idx, tested, sum(e_j_c), layer, err, errp, g(idx, 1), g2,theta(idx, 1), result,' ');
			if  err > error_threshold || errp > 5
				fprintf('\nMax difference between user and numerical gradient exceeded threshold: %e\n',abs(g(idx,1)-g2));
				numErrors = numErrors + 1;
			end		
		end	
	end

	if numErrors==0
		fprintf('\rwell done! %120s\n', ' ');
	else
		fprintf('\rtotal errors: %d/%d  %120s\n', numErrors, sum(e_j_c), ' ');
	end
end

function layer = computeLayer(index, layerSizes)
	s=0;
	layer=0.0;
	for layer=1:size(layerSizes, 1)
		s = s + layerSizes(layer);
		if index < s
			break;
		end
	end
end