function theta = initialize(imageDim,filterDim1,numFilters1,...
                                filterDim2,numFilters2,poolDim1,poolDim2,numClasses)

Wc1 = 1e-1*randn(filterDim1,filterDim1,numFilters1);
Wc2 = 1e-1*randn(filterDim2,filterDim2,numFilters2);

convDim1 = imageDim - filterDim1 + 1;
inputDim1 = convDim1/poolDim1;
inputSize1 = inputDim1 ^ 2 * numFilters1;
convDim2 = inputDim1 - filterDim2 + 1;
inputDim2 = convDim2/poolDim2;
inputSize2 = inputDim2 ^ 2 * numFilters2 * numFilters1;
hiddenSize1 = inputSize2;% max(numClasses * 10, floor((inputSize+numClasses)/20))
hiddenSize2 = inputSize2;

% assume outDim is multiple of poolDim
assert(mod(convDim1,poolDim1)==0,...
       'poolDim must divide imageDim - filterDim + 1');
assert(mod(convDim2,poolDim2)==0,...
       'poolDim must divide imageDim - filterDim + 1');

% we'll choose weights uniformly from the interval [-r, r]
r   = 0.1;% sqrt(6) / sqrt(inputSize+hiddenSize+1);
Wd1 = rand(hiddenSize1, inputSize2) * 2 * r - r;
r   = 0.1; %sqrt(6) / sqrt(hiddenSize+numClasses+1);
Wd2 = rand(hiddenSize2, hiddenSize1) * 2 * r  - r;
r   = 0.1; %sqrt(6) / sqrt(hiddenSize+numClasses+1);
Wd3 = rand(numClasses, hiddenSize2) * 2 * r  - r;
bc1 = zeros(numFilters1, 1);
bc2 = zeros(numFilters2, 1);
bd1 = zeros(hiddenSize1, 1);
bd2 = zeros(hiddenSize2, 1);
bd3 = zeros(numClasses, 1);

theta = [Wc1(:) ; Wc2(:) ; Wd1(:) ; Wd2(:) ; Wd3(:) ; bc1(:) ; bc2(:) ; bd1(:) ; bd2(:) ; bd3(:)];

end