|
| 1 | +function [ betas ] = multinomialLogisticRegressionL1( trainData, trainLabels, lambda, alpha, verbose) |
| 2 | +%UNTITLED Summary of this function goes here |
| 3 | +% Detailed explanation goes here |
| 4 | +if nargin < 5 |
| 5 | + verbose = 0; |
| 6 | +end |
| 7 | + |
| 8 | +nclass = max(trainLabels(:)); |
| 9 | +nfeat = size(trainData, 2); |
| 10 | +nParam = (nclass-1)*nfeat; |
| 11 | +n = size(trainData, 1); |
| 12 | + |
| 13 | +X = trainData; |
| 14 | +Y = trainLabels; |
| 15 | + |
| 16 | +betaCurr = zeros(nclass-1,nfeat); |
| 17 | +funLast = 1e+9; |
| 18 | +resid = funLast; |
| 19 | + |
| 20 | +iterNum = 1; |
| 21 | +maxIter = 10000; |
| 22 | +residConvThr = 1e-3; |
| 23 | +%stepLen = 1e-1; |
| 24 | +stepLen = 0.3; |
| 25 | +probs = zeros(nclass,n); |
| 26 | + |
| 27 | +Ind = zeros(nclass-1,n); |
| 28 | + |
| 29 | +for i=1:nclass-1 |
| 30 | + Ind(i,:) = (Y==i); |
| 31 | +end |
| 32 | + |
| 33 | +%XXX |
| 34 | +%betaCurr = 100000*ones(nclass-1,nfeat); |
| 35 | + |
| 36 | +%regBalanceCoeff = n/nParam; |
| 37 | +regBalanceCoeff = 1.0; |
| 38 | + |
| 39 | +proxOp = @(x) softThreshold(x,stepLen*lambda*alpha); |
| 40 | +rho = lambda*(1-alpha); |
| 41 | + |
| 42 | +while iterNum < maxIter && resid > residConvThr |
| 43 | + |
| 44 | + probs = calculateLogisticRegressionProbs(X, betaCurr); |
| 45 | + |
| 46 | + Q0 = 0.0; |
| 47 | + for j=1:n |
| 48 | + Q0 = Q0 - log(probs(Y(j),j)); |
| 49 | + end |
| 50 | + |
| 51 | + betaFlat = reshape(betaCurr, [1,nParam]); |
| 52 | + regTerm = (rho/2)*(betaFlat*betaFlat'); |
| 53 | + gBeta = Q0 + regBalanceCoeff*regTerm; |
| 54 | + |
| 55 | + hBeta = lambda*alpha*norm(betaCurr,1); |
| 56 | + |
| 57 | + Q1 = gBeta + hBeta; |
| 58 | + |
| 59 | + %XXX testing |
| 60 | + %gBeta = regTerm; |
| 61 | + |
| 62 | + if verbose > 0 |
| 63 | + fprintf('Iteration %d, obj. func. %f, neg.log.lik. %f\n', iterNum, gBeta, Q0); |
| 64 | + end |
| 65 | + |
| 66 | + gradReg = rho*betaCurr; |
| 67 | + gradQ0 = (probs(1:nclass-1,:)-Ind)*X; |
| 68 | + grad = gradQ0 + regBalanceCoeff*gradReg; |
| 69 | + |
| 70 | + %XXX testing |
| 71 | + %grad = gradReg; |
| 72 | + |
| 73 | + betaCurr = proxOp(betaCurr - stepLen * grad); |
| 74 | + |
| 75 | + %betaCurr = betaCurr - stepLen * grad; |
| 76 | + |
| 77 | + iterNum = iterNum + 1; |
| 78 | + resid = abs(funLast - gBeta); |
| 79 | + funLast = gBeta; |
| 80 | +end |
| 81 | + |
| 82 | +betas = betaCurr; |
| 83 | + |
| 84 | +numCorrect = 0; |
| 85 | +for i=1:n |
| 86 | + [maxProb,ind] = max(probs(:,i)); |
| 87 | + if ind==Y(i) |
| 88 | + numCorrect = numCorrect + 1; |
| 89 | + end |
| 90 | +end |
| 91 | + |
| 92 | +if verbose > 0 |
| 93 | + fprintf('Overall classification accuracy (train): %d/%d=%f\n', numCorrect, n, numCorrect/n); |
| 94 | +end |
0 commit comments