duanstudy · Jul 4, 2016
diff --git a/‎mlr/mlr_from_scratch/multilogit.m
+236 b/‎mlr/mlr_from_scratch/multilogit.m
+236
diff --git a/‎mlr/mlr_from_scratch/multilogit_demo.m
+59 b/‎mlr/mlr_from_scratch/multilogit_demo.m
+59
diff --git a/‎mlr/mlr_from_scratch_prem/calculateLogisticRegressionProbs.m
+22 b/‎mlr/mlr_from_scratch_prem/calculateLogisticRegressionProbs.m
+22
diff --git a/‎mlr/mlr_from_scratch_prem/evaluateLogisticRegressionModel.m
+21 b/‎mlr/mlr_from_scratch_prem/evaluateLogisticRegressionModel.m
+21
diff --git a/‎mlr/mlr_from_scratch_prem/multinomialLogisticRegressionL1.m
+62 b/‎mlr/mlr_from_scratch_prem/multinomialLogisticRegressionL1.m
+62
diff --git a/‎mlr/mlr_matlab/mlr_example.m
+15 b/‎mlr/mlr_matlab/mlr_example.m
+15
diff --git a/‎mlr/symbolic derivatives/example1.m
+14 b/‎mlr/symbolic derivatives/example1.m
+14
diff --git a/‎mlr/symbolic derivatives/grad_funct_manual.m
+7 b/‎mlr/symbolic derivatives/grad_funct_manual.m
+7
diff --git a/‎mlr/symbolic derivatives/grad_funct_symbolic resulting expression
+17 b/‎mlr/symbolic derivatives/grad_funct_symbolic resulting expression
+17
diff --git a/‎mlr/symbolic derivatives/grad_funct_symbolic.m
+32 b/‎mlr/symbolic derivatives/grad_funct_symbolic.m
+32
diff --git a/‎mlr/symbolic derivatives/test_gradient_manual_grad_symbolic_1.PNG
30.7 KB b/‎mlr/symbolic derivatives/test_gradient_manual_grad_symbolic_1.PNG
30.7 KB
diff --git a/‎mlr/symbolic derivatives/test_gradient_manual_grad_symbolic_2.PNG
2.88 KB b/‎mlr/symbolic derivatives/test_gradient_manual_grad_symbolic_2.PNG
2.88 KB
diff --git a/‎mlr/symbolic derivatives/test_manual.m
+9 b/‎mlr/symbolic derivatives/test_manual.m
+9
diff --git a/‎mlr/symbolic derivatives/y_grad_symbolic.mat
192 Bytes b/‎mlr/symbolic derivatives/y_grad_symbolic.mat
192 Bytes
diff --git a/‎mlr/symbolic derivatives/y_manual.mat
183 Bytes b/‎mlr/symbolic derivatives/y_manual.mat
183 Bytes
@@ -0,0 +1,236 @@
+function results = multilogit(y,x,beta0,maxit,tol);
+% PURPOSE: implements multinomial logistic regression
+% Pr(y_i=j) = exp(x_i'beta_j)/sum_l[exp(x_i'beta_l)]
+%   where:
+%   i    =   1,2,...,nobs
+%   j,l  = 0,1,2,...,ncat
+%-------------------------------------------------------------------------%
+% USAGE: results = multilogit(y,x,beta)
+% where: y = response variable vector (nobs x 1)
+%            the response variable should be coded sequentially from 0 to
+%            ncat, i.e., y in {0,1,2,...,ncat}
+%        x = matrix of covariates (nobs x nvar)
+%            NOTE: to include a constant term in each beta_j,
+%            include a column of ones in x
+%    beta0 = optional starting values for beta (nvar x ncat+1) (default=0)
+%    maxit = optional maximum number of iterations (default=100)
+%      tol = optional convergence tolerance (default=1e-6)
+%-------------------------------------------------------------------------%
+% RETURNS: a structure
+%        results.meth = 'multilogit'
+%    results.beta_mat = (nvar x ncat) matrix of beta coefficients:
+%                       [beta_1 beta_2 ... beta_ncat] under the 
+%                       normalization beta_0 = 0
+%    results.beta_vec = (nvar*ncat x 1) vector of beta coefficients:
+%                       [beta_1 ; beta_2 ; ... ; beta_ncat] under
+%                       normalization beta_0 = 0
+%        results.covb = (nvar*ncat x nvar*ncat) covariance matrix
+%                       of results.beta_vec
+%   results.tstat_mat = matrix of t-statistics conformable to 
+%                       results.beta_mat
+% results.tstat_vec   = vector of t-statistics conformable to
+%                       results.beta_vec
+%        results.yfit = (nobs x ncat+1) matrix of fitted 
+%                       probabilities: [P_0 P_1 ... P_ncat]
+%                       where P_j = [P_1j ; P_2j ; ... ; P_nobsj]
+%         results.lik = unrestricted log likelihood
+%        results.cnvg = convergence criterion
+%        results.iter = number of iterations
+%        results.nobs = number of observations
+%        results.nvar = number of variables
+%        results.ncat = number of categories of dependent variable
+%                       (including the reference category j = 0)
+%       results.count = vector of counts of each value taken by y, i.e., 
+%                       count = [#y=0 #y=1 ... #y=ncat]
+%           results.y = y vector
+%      results.lratio = LR test statistic against intercept-only model (all 
+%                       betas=0), distributed chi-squared with (nvar-1)*ncat
+%                       degrees of freedom
+%        results.rsqr = McFadden pseudo-R^2
+%      
+%-------------------------------------------------------------------------%
+% A NOTE: Since users might prefer results (coefficients and tstats) in 
+%   either a vector or matrix format, and since there is no single natural
+%   representation for these in the multinomial logit model, the results
+%   structure returns both.  Note that the input arguments require that 
+%   (optional) starting values in matrix (nvar x ncat) format.
+%
+%-------------------------------------------------------------------------%
+% SEE ALSO: prt_multilogit, multilogit_lik
+%-------------------------------------------------------------------------%
+% References: Greene (1997), p.914
+
+% written by:
+% Simon D. Woodcock
+% CISER / Economics
+% Cornell University
+% Ithaca, NY 
+% sdw9@cornell.edu
+
+%---------------------------------------------------------%
+%       ERROR CHECKING AND PRELIMINARY CALCULATIONS       %
+%---------------------------------------------------------%
+
+if nargin < 2, error('multilogit: wrong # of input arguments'); end;
+y = round(y(:)); [nobs cy]=size(y); [rx nvar]=size(x);
+
+if (rx~=nobs), error('multilogit: row dimensions of x and y must agree'); end;
+
+% initial calculations
+xstd = [1 std(x(:,2:nvar))];
+x = x ./ ( ones(nobs,1)*xstd );                           % standardize x
+ymin = min(y);
+ymax = max(y);
+ncat = ymax - ymin;
+d0 = ( y*ones(1,ncat+1) ) == ( ones(nobs,1)*(ymin:ymax) );  % put y in dummy format
+d = d0(:,2:ncat+1);                                         % normalize beta_0 = 0
+
+% starting values
+
+if nargin < 3
+    beta0 = zeros(nvar,ncat+1);
+else 
+    [a b] = size(beta0);
+    if a == 0
+        beta0 = zeros(nvar,ncat+1);
+    else for j = 1:ncat;
+            beta0(:,j) = beta0(:,j) .* xstd';
+         end;
+    end;
+end;
+
+beta = beta0(:,2:ncat+1);
+
+% default max iterations and tolerance
+if nargin < 4 , maxit = 100; tol = 1e-6; end;
+if nargin < 5 , tol = 1e-6; end;
+
+if nargin > 6 , error('multilogit: wrong # of arguments'); end;
+
+% check nvar and ncat are consistently defined;
+[rbeta cbeta] = size(beta);
+if nvar ~= rbeta
+    error('multilogit: rows of beta and columns of x do not agree')
+end;
+if ncat ~= cbeta
+    error(['multilogit: number of columns in beta and categories in y do not agree. ' ...
+        'check that y is numbered continuously, i.e., y takes values in {0,1,2,3,4,5}' ...
+        ' is ok, y takes values in {0,1,2,3,4,99} is not.'])
+end;
+
+%----------------------------------------------------%
+% MAXIMUM LIKELIHOOD ESTIMATION OF MULTINOMIAL LOGIT %
+%----------------------------------------------------%
+
+% likelihood and derivatives at starting values
+[P,lnL] = multilogit_lik(y,x,beta,d);
+[g H] = multilogit_deriv(x,d,P,nvar,ncat,nobs);
+
+iter=0;
+
+for j = 1:ncat % vectorize beta and gradient for newton-raphson update
+    f = (j-1)*nvar + 1;
+    l = j*nvar;
+    vb(f:l,1) = beta(:,j);
+    vg(f:l,1) = g(:,j);
+end;
+
+% newton-raphson update
+while (abs(vg'*(H\vg)/length(vg)) > tol) & (iter < maxit)
+    iter = iter + 1;
+    betaold = beta;
+    vbold = vb;
+    vb = vbold - H\vg;
+    for j = 1:ncat                                   % de-vectorize updated beta for pass to multilogit_lik
+        f = (j-1)*nvar + 1;
+        l = j*nvar;
+        beta(:,j) = vb(f:l,1);
+    end;
+    [P,lnL] = multilogit_lik(y,x,beta,d);            % update P, lnL
+    [g H] = multilogit_deriv(x,d,P,nvar,ncat,nobs);  % update g,H
+    for j = 1:ncat;                                  % vectorize updated g for next N-R update
+        f = (j-1)*nvar + 1;
+        l = j*nvar;
+        vg(f:l,1) = g(:,j);
+    end;
+    disp(['iteration: ' num2str(iter)]);
+    disp(['log-likelihood: ' num2str(lnL)]);
+end;
+
+%---------------------------------------------------------%
+%               GENERATE RESULTS STRUCTURE                %
+%---------------------------------------------------------%
+
+results.meth = 'multilogit';
+for j = 1:ncat
+    results.beta_mat(:,j) = beta(:,j) ./ xstd';        % restore original scale
+end;
+for j = 1:ncat
+    f = (j-1)*nvar + 1;
+    l = j*nvar;
+    results.beta_vec(f:l,1) = results.beta_mat(:,j);
+end;
+results.covb = -inv(H)./kron(ones(ncat),(xstd'*xstd)); % restore original scale
+stdb = sqrt(diag(results.covb));
+results.tstat_vec = results.beta_vec./stdb;
+for j = 1:ncat                                  
+    f = (j-1)*nvar + 1;
+    l = j*nvar;
+    results.tstat_mat(:,j) = results.tstat_vec(f:l,1);
+end;
+P_0 = ones(nobs,1) - sum(P')';
+results.yfit = [P_0 P];
+results.lik = lnL;
+results.cnvg = tol;
+results.iter = iter;
+results.nobs = nobs;
+results.nvar = nvar;
+results.ncat = ncat;
+results.count = [nobs-sum(sum(d)') sum(d)];
+results.y = y;
+
+% basic specification testing;
+p = results.count / nobs;
+lnLr = nobs*sum((p.*log(p))'); % restricted log-likelihood: intercepts only
+results.lratio = -2*(lnLr - results.lik);
+results.rsqr = 1 - (results.lik / lnLr); % McFadden pseudo-R^2
+
+
+
+
+%---------------------------------------------------------%
+%     SUPPLEMENTARY FUNCTION FOR COMPUTING DERIVATIVES    %
+%---------------------------------------------------------%
+
+function [g,H] = multilogit_deriv(x,d,P,nvar,ncat,nobs);
+% PURPOSE: Computes gradient and Hessian of multinomial logit 
+% model
+% ---------------------------------------------------------
+% References: Greene (1997), p.914
+
+% written by:
+% Simon D. Woodcock
+% CISER / Economics
+% 201 Caldwell Hall
+% Cornell University
+% Ithaca, NY 14850
+% sdw9@cornell.edu
+
+% compute gradient matrix (nvar x ncat)
+tmp = d - P;
+g = x'*tmp;
+
+% compute Hessian, which has (ncat)^2 blocks of size (nvar x nvar)
+% this algorithm builds each block individually, m&n are block indices
+H = zeros(nvar*ncat);
+for m = 1:ncat; 
+    for n = 1:ncat;
+        fr = (m-1)*nvar + 1;
+        lr = m*nvar;
+        fc = (n-1)*nvar + 1;
+        lc = n*nvar;       
+        index = (n==m);
+        index = repmat(index,nobs,1);
+        H(fr:lr,fc:lc) = -( ( x.*( P(:,m)*ones(1,nvar) ) )' * ( x.*( (index-P(:,n))*ones(1,nvar) ) ) ) ;
+    end;
+end;
@@ -0,0 +1,59 @@
+% http://www.spatial-econometrics.com/regress/
+% http://www.spatial-econometrics.com/regress/contents.html
+% PURPOSE: demonstrates the use of multilogit.m
+% author: simon d.woodcock
+% 9/13/2002
+
+clear; clc;
+
+% be sure to add the econometrics toolbox to your search path
+
+%---- CREATE SOME DEMO DATA ----%
+
+% specify the size of the demo 
+nobs = 1000;        % number of observations
+nvar = 15;          % number of covariates
+numcat = 5;         % number of categories
+
+% specify the parameter vector
+% note the beta vector associated with category 0 is normalized to zero
+beta = [zeros(nvar,1),ones(nvar,numcat-1)];
+
+% specify the covariates: x must include a column of ones if there 
+% is a constant term
+xmat = randn(nobs,nvar-1);
+x = [ones(nobs,1),xmat]; 
+
+% generate the response variable y
+xbeta = x*beta;
+e = 0.1*randn(nobs,numcat);
+xb = xbeta + e;
+exp_xb = exp(xb);
+sum_exp_xb = sum(exp_xb');
+for j = 1:numcat;
+    P(:,j) = exp_xb(:,j) ./ sum_exp_xb';
+end;
+cum_P = [cumsum(P')]';
+u = rand(nobs,1);
+yt = ones(nobs,1)*99;
+for i = 1:nobs;
+    for j = 1:numcat;
+        if ((u(i,1) <= cum_P(i,j)) & (yt(i,1) == 99)) 
+            yt(i,1) = j;
+        end;
+    end;
+end;
+y = yt - ones(nobs,1);      % y takes values in {0,1,2,...,numcat-1}
+
+%---- CALL MULTILOGIT.M AND PRINT RESULTS ----%
+
+% call multilogit using default starting values, convergence criterion, and
+% maximum iterations
+results = multilogit(y,x);
+
+% assign variable and category names to arrays
+vnames = strvcat('y','constant','x1','x2','x3','x4','x5','x6','x7','x8','x9','x10','x11','x12','x13','x14');
+cnames = strvcat('j=0','j=1','j=2','j=3','j=4');
+
+% print results
+prt_multilogit(results,vnames,cnames)
@@ -0,0 +1,22 @@
+function [ probs ] = calculateLogisticRegressionProbs( X, beta )
+%UNTITLED Summary of this function goes here
+%   Detailed explanation goes here
+    n = size(X,1);
+    nclass = size(beta,1)+1;
+    probs = zeros(nclass,n);
+    for j=1:n
+        ps = zeros(nclass-1,1);
+        for i=1:nclass-1
+            ps(i) = exp(beta(i,:)*X(j,:)');
+        end
+        
+        sumP = sum(ps(:))+1;
+        
+        for i=1:nclass-1
+            probs(i,j) = ps(i) / sumP;
+        end
+        
+         probs(nclass,j) = 1-sum(probs(1:nclass-1,j));
+    end
+
+end
@@ -0,0 +1,21 @@
+function [ ] = evaluateLogisticRegressionModel( testData, testLabels, beta )
+%UNTITLED2 Summary of this function goes here
+%   Detailed explanation goes here
+theProbs = calculateLogisticRegressionProbs(testData, beta);
+n = size(testData,1);
+
+nclass = size(beta,1) + 1;
+confMatrix = zeros(nclass,nclass);
+
+for i=1:n
+    [maxProb,ind] = max(theProbs(:,i));
+    confMatrix(ind,testLabels(i)) = confMatrix(ind,testLabels(i)) + 1;
+end
+
+ovAcc = sum(diag(confMatrix)) / n;
+
+fprintf('Overall accuracy on test data: %f, confusion matrix: \n', ovAcc);
+
+disp(confMatrix);
+
+end
@@ -0,0 +1,62 @@
+function [ betas ] = multinomialLogisticRegressionL1( trainData, trainLabels, rho)
+%UNTITLED Summary of this function goes here
+%   Detailed explanation goes here
+nclass = max(trainLabels(:));
+nfeat = size(trainData, 2);
+nParam = (nclass-1)*nfeat; % total number of beta parameters
+n = size(trainData, 1);
+
+X = trainData;
+Y = trainLabels;
+
+betaCurr = zeros(nclass-1,nfeat);
+funLast = 1e+9;
+resid = funLast;
+
+iterNum = 1;
+maxIter = 10000;
+residConvThr = 1e-3;
+%stepLen = 1e-1;
+stepLen = 0.3;
+probs = zeros(nclass,n);
+
+Ind = zeros(nclass-1,n);
+
+for i=1:nclass-1
+    Ind(i,:) = (Y==i);
+end
+
+while iterNum < maxIter && resid > residConvThr
+    
+    probs = calculateLogisticRegressionProbs(X, betaCurr);
+
+    Q0 = 0.0;
+    for j=1:n
+        Q0 = Q0 - log(probs(Y(j),j));
+    end
+    
+    betaFlat = reshape(betaCurr, [1,nParam]);
+    gBeta = Q0 + (rho/2)*(betaFlat*betaFlat');
+    
+    fprintf('Iteration %d, obj. func. %f, neg.log.lik. %f\n', iterNum, gBeta, Q0);
+    
+    grad = (probs(1:nclass-1,:)-Ind)*X;
+    
+    betaCurr = betaCurr - stepLen * grad;
+    
+    iterNum = iterNum + 1;
+    resid = abs(funLast - gBeta);
+    funLast = gBeta;
+end
+
+betas = betaCurr;
+
+numCorrect = 0;
+for i=1:n
+    [maxProb,ind] = max(probs(:,i));
+    if ind==Y(i)
+        numCorrect = numCorrect + 1;
+    end
+end
+
+fprintf('Overall classification accuracy: %d/%d=%f\n', numCorrect, n, numCorrect/n);
@@ -0,0 +1,15 @@
+function mlr_example()
+    load fisheriris
+    %Define the nominal response variable.
+    sp = categorical(species);
+    %Fit a nominal model to estimate the species using the flower measurements 
+    %as the predictor variables.
+    [B,dev,stats] = mnrfit(meas,sp);
+    
+    %Estimate the probability of being a certain kind of species 
+    %for an iris flower having the measurements (6.2, 3.7, 5.8, 0.2).
+    x = [6.2, 3.7, 5.8, 0.2];
+    pihat = mnrval(B,x);
+    %pihat(1) = prob_setosa; pihat(2) = prob_versicolor; 
+    %pihat(3) = prob_virginica
+end
@@ -0,0 +1,14 @@
+%beta = sym('b', [1 3]);
+x_1 = sym('x', [1 3]);
+
+%beta_mat = sym('b_l', [5 3]);
+beta_mat = sym('b', [5 3]);
+
+%expr = log(exp(beta*x_1')/(1+sum(exp(beta_mat * x_1'))));
+expr = log(exp(beta_mat(1,:)*x_1')/(1+sum(exp(beta_mat * x_1'))))
+
+%grad = gradient(expr, beta);
+%grad_1 = gradient(expr, beta(1));
+
+
+grad = gradient(expr, beta_mat(1,1)); % w.r.t. b_1_1
@@ -0,0 +1,7 @@
+function y = grad_funct_manual(x_1,beta_mat)
+    %x_1 = sym('x', [1 3]);
+    %beta_mat = sym('b', [5 3]);
+    y = x_1(1) - (exp(beta_mat(1,:)*x_1')/(1+sum(exp(beta_mat * x_1'))))*x_1(1);
+end
+
+ 
@@ -0,0 +1,17 @@
+exp(- b1_1*conj(x1) - b1_2*conj(x2) - b1_3*conj(x3))*((exp(b1_1*conj(x1) + b1_2*conj(x2) + b1_3*conj(x3))*conj(x1))
+-------------------------------------------------------------------------------------------------------------------
+(exp(b1_1*conj(x1) + b1_2*conj(x2) + b1_3*conj(x3)) + 
+ exp(b2_1*conj(x1) + b2_2*conj(x2) + b2_3*conj(x3)) + 
+ exp(b3_1*conj(x1) + b3_2*conj(x2) + b3_3*conj(x3)) + 
+ exp(b4_1*conj(x1) + b4_2*conj(x2) + b4_3*conj(x3)) + 
+ exp(b5_1*conj(x1) + b5_2*conj(x2) + b5_3*conj(x3)) + 1) - (exp(2*b1_1*conj(x1) + 2*b1_2*conj(x2) + 2*b1_3*conj(x3))*conj(x1))
+															------------------------------------------------------------------								
+														   (exp(b1_1*conj(x1) + b1_2*conj(x2) + b1_3*conj(x3)) + 
+														    exp(b2_1*conj(x1) + b2_2*conj(x2) + b2_3*conj(x3)) + 
+															exp(b3_1*conj(x1) + b3_2*conj(x2) + b3_3*conj(x3)) + 
+															exp(b4_1*conj(x1) + b4_2*conj(x2) + b4_3*conj(x3)) + 
+															exp(b5_1*conj(x1) + b5_2*conj(x2) + b5_3*conj(x3)) + 1)^2)*(exp(b1_1*conj(x1) + b1_2*conj(x2) + b1_3*conj(x3)) +
+																														exp(b2_1*conj(x1) + b2_2*conj(x2) + b2_3*conj(x3)) + 
+																														exp(b3_1*conj(x1) + b3_2*conj(x2) + b3_3*conj(x3)) + 
+																														exp(b4_1*conj(x1) + b4_2*conj(x2) + b4_3*conj(x3)) + 
+																														exp(b5_1*conj(x1) + b5_2*conj(x2) + b5_3*conj(x3)) + 1)
@@ -0,0 +1,32 @@
+function grad_funct_symbolic()
+    %beta = sym('b', [1 3]);
+    x_1 = sym('x', [1 3]);
+
+    %beta_mat = sym('b_l', [5 3]);
+    beta_mat = sym('b', [5 3]);
+
+    %expr = log(exp(beta*x_1')/(1+sum(exp(beta_mat * x_1'))));
+    expr = log(exp(beta_mat(1,:)*x_1')/(1+sum(exp(beta_mat * x_1'))))
+
+    %grad = gradient(expr, beta);
+    %grad_1 = gradient(expr, beta(1));
+
+
+    grad = gradient(expr, beta_mat(1,1)); % w.r.t. b_1_1
+    ht = matlabFunction(grad);
+    
+    x_1 = [1 1 1];
+    beta_mat = zeros(5,3);
+
+    beta_mat(1,1) = 2;
+    beta_mat(1,2) = 3;
+    beta_mat(1,3) = 4;
+
+    y = feval(ht,beta_mat(1,1),beta_mat(1,2),beta_mat(1,3),...
+                 beta_mat(2,1),beta_mat(2,2),beta_mat(2,3),...
+                 beta_mat(3,1),beta_mat(3,2),beta_mat(3,3),...
+                 beta_mat(4,1),beta_mat(4,2),beta_mat(4,3),... 
+                 beta_mat(5,1),beta_mat(5,2),beta_mat(5,3),... 
+                 x_1(1),x_1(2),x_1(3));
+    
+end
@@ -0,0 +1,9 @@
+fh = @grad_funct_manual;
+x_1 = [1 1 1];
+beta_mat = zeros(5,3);
+
+beta_mat(1,1) = 2;
+beta_mat(1,2) = 3;
+beta_mat(1,3) = 4;
+
+y = feval(fh,x_1,beta_mat);