Skip to content

Commit f280879

Browse files
committedJul 4, 2016
Add my MLR from scratch and the other MLR's to this repository
1 parent ed9ac0c commit f280879

15 files changed

+494
-0
lines changed
 

‎mlr/mlr_from_scratch/multilogit.m

+236
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,236 @@
1+
function results = multilogit(y,x,beta0,maxit,tol);
2+
% PURPOSE: implements multinomial logistic regression
3+
% Pr(y_i=j) = exp(x_i'beta_j)/sum_l[exp(x_i'beta_l)]
4+
% where:
5+
% i = 1,2,...,nobs
6+
% j,l = 0,1,2,...,ncat
7+
%-------------------------------------------------------------------------%
8+
% USAGE: results = multilogit(y,x,beta)
9+
% where: y = response variable vector (nobs x 1)
10+
% the response variable should be coded sequentially from 0 to
11+
% ncat, i.e., y in {0,1,2,...,ncat}
12+
% x = matrix of covariates (nobs x nvar)
13+
% NOTE: to include a constant term in each beta_j,
14+
% include a column of ones in x
15+
% beta0 = optional starting values for beta (nvar x ncat+1) (default=0)
16+
% maxit = optional maximum number of iterations (default=100)
17+
% tol = optional convergence tolerance (default=1e-6)
18+
%-------------------------------------------------------------------------%
19+
% RETURNS: a structure
20+
% results.meth = 'multilogit'
21+
% results.beta_mat = (nvar x ncat) matrix of beta coefficients:
22+
% [beta_1 beta_2 ... beta_ncat] under the
23+
% normalization beta_0 = 0
24+
% results.beta_vec = (nvar*ncat x 1) vector of beta coefficients:
25+
% [beta_1 ; beta_2 ; ... ; beta_ncat] under
26+
% normalization beta_0 = 0
27+
% results.covb = (nvar*ncat x nvar*ncat) covariance matrix
28+
% of results.beta_vec
29+
% results.tstat_mat = matrix of t-statistics conformable to
30+
% results.beta_mat
31+
% results.tstat_vec = vector of t-statistics conformable to
32+
% results.beta_vec
33+
% results.yfit = (nobs x ncat+1) matrix of fitted
34+
% probabilities: [P_0 P_1 ... P_ncat]
35+
% where P_j = [P_1j ; P_2j ; ... ; P_nobsj]
36+
% results.lik = unrestricted log likelihood
37+
% results.cnvg = convergence criterion
38+
% results.iter = number of iterations
39+
% results.nobs = number of observations
40+
% results.nvar = number of variables
41+
% results.ncat = number of categories of dependent variable
42+
% (including the reference category j = 0)
43+
% results.count = vector of counts of each value taken by y, i.e.,
44+
% count = [#y=0 #y=1 ... #y=ncat]
45+
% results.y = y vector
46+
% results.lratio = LR test statistic against intercept-only model (all
47+
% betas=0), distributed chi-squared with (nvar-1)*ncat
48+
% degrees of freedom
49+
% results.rsqr = McFadden pseudo-R^2
50+
%
51+
%-------------------------------------------------------------------------%
52+
% A NOTE: Since users might prefer results (coefficients and tstats) in
53+
% either a vector or matrix format, and since there is no single natural
54+
% representation for these in the multinomial logit model, the results
55+
% structure returns both. Note that the input arguments require that
56+
% (optional) starting values in matrix (nvar x ncat) format.
57+
%
58+
%-------------------------------------------------------------------------%
59+
% SEE ALSO: prt_multilogit, multilogit_lik
60+
%-------------------------------------------------------------------------%
61+
% References: Greene (1997), p.914
62+
63+
% written by:
64+
% Simon D. Woodcock
65+
% CISER / Economics
66+
% Cornell University
67+
% Ithaca, NY
68+
% sdw9@cornell.edu
69+
70+
%---------------------------------------------------------%
71+
% ERROR CHECKING AND PRELIMINARY CALCULATIONS %
72+
%---------------------------------------------------------%
73+
74+
if nargin < 2, error('multilogit: wrong # of input arguments'); end;
75+
y = round(y(:)); [nobs cy]=size(y); [rx nvar]=size(x);
76+
77+
if (rx~=nobs), error('multilogit: row dimensions of x and y must agree'); end;
78+
79+
% initial calculations
80+
xstd = [1 std(x(:,2:nvar))];
81+
x = x ./ ( ones(nobs,1)*xstd ); % standardize x
82+
ymin = min(y);
83+
ymax = max(y);
84+
ncat = ymax - ymin;
85+
d0 = ( y*ones(1,ncat+1) ) == ( ones(nobs,1)*(ymin:ymax) ); % put y in dummy format
86+
d = d0(:,2:ncat+1); % normalize beta_0 = 0
87+
88+
% starting values
89+
90+
if nargin < 3
91+
beta0 = zeros(nvar,ncat+1);
92+
else
93+
[a b] = size(beta0);
94+
if a == 0
95+
beta0 = zeros(nvar,ncat+1);
96+
else for j = 1:ncat;
97+
beta0(:,j) = beta0(:,j) .* xstd';
98+
end;
99+
end;
100+
end;
101+
102+
beta = beta0(:,2:ncat+1);
103+
104+
% default max iterations and tolerance
105+
if nargin < 4 , maxit = 100; tol = 1e-6; end;
106+
if nargin < 5 , tol = 1e-6; end;
107+
108+
if nargin > 6 , error('multilogit: wrong # of arguments'); end;
109+
110+
% check nvar and ncat are consistently defined;
111+
[rbeta cbeta] = size(beta);
112+
if nvar ~= rbeta
113+
error('multilogit: rows of beta and columns of x do not agree')
114+
end;
115+
if ncat ~= cbeta
116+
error(['multilogit: number of columns in beta and categories in y do not agree. ' ...
117+
'check that y is numbered continuously, i.e., y takes values in {0,1,2,3,4,5}' ...
118+
' is ok, y takes values in {0,1,2,3,4,99} is not.'])
119+
end;
120+
121+
%----------------------------------------------------%
122+
% MAXIMUM LIKELIHOOD ESTIMATION OF MULTINOMIAL LOGIT %
123+
%----------------------------------------------------%
124+
125+
% likelihood and derivatives at starting values
126+
[P,lnL] = multilogit_lik(y,x,beta,d);
127+
[g H] = multilogit_deriv(x,d,P,nvar,ncat,nobs);
128+
129+
iter=0;
130+
131+
for j = 1:ncat % vectorize beta and gradient for newton-raphson update
132+
f = (j-1)*nvar + 1;
133+
l = j*nvar;
134+
vb(f:l,1) = beta(:,j);
135+
vg(f:l,1) = g(:,j);
136+
end;
137+
138+
% newton-raphson update
139+
while (abs(vg'*(H\vg)/length(vg)) > tol) & (iter < maxit)
140+
iter = iter + 1;
141+
betaold = beta;
142+
vbold = vb;
143+
vb = vbold - H\vg;
144+
for j = 1:ncat % de-vectorize updated beta for pass to multilogit_lik
145+
f = (j-1)*nvar + 1;
146+
l = j*nvar;
147+
beta(:,j) = vb(f:l,1);
148+
end;
149+
[P,lnL] = multilogit_lik(y,x,beta,d); % update P, lnL
150+
[g H] = multilogit_deriv(x,d,P,nvar,ncat,nobs); % update g,H
151+
for j = 1:ncat; % vectorize updated g for next N-R update
152+
f = (j-1)*nvar + 1;
153+
l = j*nvar;
154+
vg(f:l,1) = g(:,j);
155+
end;
156+
disp(['iteration: ' num2str(iter)]);
157+
disp(['log-likelihood: ' num2str(lnL)]);
158+
end;
159+
160+
%---------------------------------------------------------%
161+
% GENERATE RESULTS STRUCTURE %
162+
%---------------------------------------------------------%
163+
164+
results.meth = 'multilogit';
165+
for j = 1:ncat
166+
results.beta_mat(:,j) = beta(:,j) ./ xstd'; % restore original scale
167+
end;
168+
for j = 1:ncat
169+
f = (j-1)*nvar + 1;
170+
l = j*nvar;
171+
results.beta_vec(f:l,1) = results.beta_mat(:,j);
172+
end;
173+
results.covb = -inv(H)./kron(ones(ncat),(xstd'*xstd)); % restore original scale
174+
stdb = sqrt(diag(results.covb));
175+
results.tstat_vec = results.beta_vec./stdb;
176+
for j = 1:ncat
177+
f = (j-1)*nvar + 1;
178+
l = j*nvar;
179+
results.tstat_mat(:,j) = results.tstat_vec(f:l,1);
180+
end;
181+
P_0 = ones(nobs,1) - sum(P')';
182+
results.yfit = [P_0 P];
183+
results.lik = lnL;
184+
results.cnvg = tol;
185+
results.iter = iter;
186+
results.nobs = nobs;
187+
results.nvar = nvar;
188+
results.ncat = ncat;
189+
results.count = [nobs-sum(sum(d)') sum(d)];
190+
results.y = y;
191+
192+
% basic specification testing;
193+
p = results.count / nobs;
194+
lnLr = nobs*sum((p.*log(p))'); % restricted log-likelihood: intercepts only
195+
results.lratio = -2*(lnLr - results.lik);
196+
results.rsqr = 1 - (results.lik / lnLr); % McFadden pseudo-R^2
197+
198+
199+
200+
201+
%---------------------------------------------------------%
202+
% SUPPLEMENTARY FUNCTION FOR COMPUTING DERIVATIVES %
203+
%---------------------------------------------------------%
204+
205+
function [g,H] = multilogit_deriv(x,d,P,nvar,ncat,nobs);
206+
% PURPOSE: Computes gradient and Hessian of multinomial logit
207+
% model
208+
% ---------------------------------------------------------
209+
% References: Greene (1997), p.914
210+
211+
% written by:
212+
% Simon D. Woodcock
213+
% CISER / Economics
214+
% 201 Caldwell Hall
215+
% Cornell University
216+
% Ithaca, NY 14850
217+
% sdw9@cornell.edu
218+
219+
% compute gradient matrix (nvar x ncat)
220+
tmp = d - P;
221+
g = x'*tmp;
222+
223+
% compute Hessian, which has (ncat)^2 blocks of size (nvar x nvar)
224+
% this algorithm builds each block individually, m&n are block indices
225+
H = zeros(nvar*ncat);
226+
for m = 1:ncat;
227+
for n = 1:ncat;
228+
fr = (m-1)*nvar + 1;
229+
lr = m*nvar;
230+
fc = (n-1)*nvar + 1;
231+
lc = n*nvar;
232+
index = (n==m);
233+
index = repmat(index,nobs,1);
234+
H(fr:lr,fc:lc) = -( ( x.*( P(:,m)*ones(1,nvar) ) )' * ( x.*( (index-P(:,n))*ones(1,nvar) ) ) ) ;
235+
end;
236+
end;
+59
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
% http://www.spatial-econometrics.com/regress/
2+
% http://www.spatial-econometrics.com/regress/contents.html
3+
% PURPOSE: demonstrates the use of multilogit.m
4+
% author: simon d.woodcock
5+
% 9/13/2002
6+
7+
clear; clc;
8+
9+
% be sure to add the econometrics toolbox to your search path
10+
11+
%---- CREATE SOME DEMO DATA ----%
12+
13+
% specify the size of the demo
14+
nobs = 1000; % number of observations
15+
nvar = 15; % number of covariates
16+
numcat = 5; % number of categories
17+
18+
% specify the parameter vector
19+
% note the beta vector associated with category 0 is normalized to zero
20+
beta = [zeros(nvar,1),ones(nvar,numcat-1)];
21+
22+
% specify the covariates: x must include a column of ones if there
23+
% is a constant term
24+
xmat = randn(nobs,nvar-1);
25+
x = [ones(nobs,1),xmat];
26+
27+
% generate the response variable y
28+
xbeta = x*beta;
29+
e = 0.1*randn(nobs,numcat);
30+
xb = xbeta + e;
31+
exp_xb = exp(xb);
32+
sum_exp_xb = sum(exp_xb');
33+
for j = 1:numcat;
34+
P(:,j) = exp_xb(:,j) ./ sum_exp_xb';
35+
end;
36+
cum_P = [cumsum(P')]';
37+
u = rand(nobs,1);
38+
yt = ones(nobs,1)*99;
39+
for i = 1:nobs;
40+
for j = 1:numcat;
41+
if ((u(i,1) <= cum_P(i,j)) & (yt(i,1) == 99))
42+
yt(i,1) = j;
43+
end;
44+
end;
45+
end;
46+
y = yt - ones(nobs,1); % y takes values in {0,1,2,...,numcat-1}
47+
48+
%---- CALL MULTILOGIT.M AND PRINT RESULTS ----%
49+
50+
% call multilogit using default starting values, convergence criterion, and
51+
% maximum iterations
52+
results = multilogit(y,x);
53+
54+
% assign variable and category names to arrays
55+
vnames = strvcat('y','constant','x1','x2','x3','x4','x5','x6','x7','x8','x9','x10','x11','x12','x13','x14');
56+
cnames = strvcat('j=0','j=1','j=2','j=3','j=4');
57+
58+
% print results
59+
prt_multilogit(results,vnames,cnames)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
function [ probs ] = calculateLogisticRegressionProbs( X, beta )
2+
%UNTITLED Summary of this function goes here
3+
% Detailed explanation goes here
4+
n = size(X,1);
5+
nclass = size(beta,1)+1;
6+
probs = zeros(nclass,n);
7+
for j=1:n
8+
ps = zeros(nclass-1,1);
9+
for i=1:nclass-1
10+
ps(i) = exp(beta(i,:)*X(j,:)');
11+
end
12+
13+
sumP = sum(ps(:))+1;
14+
15+
for i=1:nclass-1
16+
probs(i,j) = ps(i) / sumP;
17+
end
18+
19+
probs(nclass,j) = 1-sum(probs(1:nclass-1,j));
20+
end
21+
22+
end
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
function [ ] = evaluateLogisticRegressionModel( testData, testLabels, beta )
2+
%UNTITLED2 Summary of this function goes here
3+
% Detailed explanation goes here
4+
theProbs = calculateLogisticRegressionProbs(testData, beta);
5+
n = size(testData,1);
6+
7+
nclass = size(beta,1) + 1;
8+
confMatrix = zeros(nclass,nclass);
9+
10+
for i=1:n
11+
[maxProb,ind] = max(theProbs(:,i));
12+
confMatrix(ind,testLabels(i)) = confMatrix(ind,testLabels(i)) + 1;
13+
end
14+
15+
ovAcc = sum(diag(confMatrix)) / n;
16+
17+
fprintf('Overall accuracy on test data: %f, confusion matrix: \n', ovAcc);
18+
19+
disp(confMatrix);
20+
21+
end
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
function [ betas ] = multinomialLogisticRegressionL1( trainData, trainLabels, rho)
2+
%UNTITLED Summary of this function goes here
3+
% Detailed explanation goes here
4+
nclass = max(trainLabels(:));
5+
nfeat = size(trainData, 2);
6+
nParam = (nclass-1)*nfeat; % total number of beta parameters
7+
n = size(trainData, 1);
8+
9+
X = trainData;
10+
Y = trainLabels;
11+
12+
betaCurr = zeros(nclass-1,nfeat);
13+
funLast = 1e+9;
14+
resid = funLast;
15+
16+
iterNum = 1;
17+
maxIter = 10000;
18+
residConvThr = 1e-3;
19+
%stepLen = 1e-1;
20+
stepLen = 0.3;
21+
probs = zeros(nclass,n);
22+
23+
Ind = zeros(nclass-1,n);
24+
25+
for i=1:nclass-1
26+
Ind(i,:) = (Y==i);
27+
end
28+
29+
while iterNum < maxIter && resid > residConvThr
30+
31+
probs = calculateLogisticRegressionProbs(X, betaCurr);
32+
33+
Q0 = 0.0;
34+
for j=1:n
35+
Q0 = Q0 - log(probs(Y(j),j));
36+
end
37+
38+
betaFlat = reshape(betaCurr, [1,nParam]);
39+
gBeta = Q0 + (rho/2)*(betaFlat*betaFlat');
40+
41+
fprintf('Iteration %d, obj. func. %f, neg.log.lik. %f\n', iterNum, gBeta, Q0);
42+
43+
grad = (probs(1:nclass-1,:)-Ind)*X;
44+
45+
betaCurr = betaCurr - stepLen * grad;
46+
47+
iterNum = iterNum + 1;
48+
resid = abs(funLast - gBeta);
49+
funLast = gBeta;
50+
end
51+
52+
betas = betaCurr;
53+
54+
numCorrect = 0;
55+
for i=1:n
56+
[maxProb,ind] = max(probs(:,i));
57+
if ind==Y(i)
58+
numCorrect = numCorrect + 1;
59+
end
60+
end
61+
62+
fprintf('Overall classification accuracy: %d/%d=%f\n', numCorrect, n, numCorrect/n);

‎mlr/mlr_matlab/mlr_example.m

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
function mlr_example()
2+
load fisheriris
3+
%Define the nominal response variable.
4+
sp = categorical(species);
5+
%Fit a nominal model to estimate the species using the flower measurements
6+
%as the predictor variables.
7+
[B,dev,stats] = mnrfit(meas,sp);
8+
9+
%Estimate the probability of being a certain kind of species
10+
%for an iris flower having the measurements (6.2, 3.7, 5.8, 0.2).
11+
x = [6.2, 3.7, 5.8, 0.2];
12+
pihat = mnrval(B,x);
13+
%pihat(1) = prob_setosa; pihat(2) = prob_versicolor;
14+
%pihat(3) = prob_virginica
15+
end

‎mlr/symbolic derivatives/example1.m

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
%beta = sym('b', [1 3]);
2+
x_1 = sym('x', [1 3]);
3+
4+
%beta_mat = sym('b_l', [5 3]);
5+
beta_mat = sym('b', [5 3]);
6+
7+
%expr = log(exp(beta*x_1')/(1+sum(exp(beta_mat * x_1'))));
8+
expr = log(exp(beta_mat(1,:)*x_1')/(1+sum(exp(beta_mat * x_1'))))
9+
10+
%grad = gradient(expr, beta);
11+
%grad_1 = gradient(expr, beta(1));
12+
13+
14+
grad = gradient(expr, beta_mat(1,1)); % w.r.t. b_1_1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
function y = grad_funct_manual(x_1,beta_mat)
2+
%x_1 = sym('x', [1 3]);
3+
%beta_mat = sym('b', [5 3]);
4+
y = x_1(1) - (exp(beta_mat(1,:)*x_1')/(1+sum(exp(beta_mat * x_1'))))*x_1(1);
5+
end
6+
7+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
exp(- b1_1*conj(x1) - b1_2*conj(x2) - b1_3*conj(x3))*((exp(b1_1*conj(x1) + b1_2*conj(x2) + b1_3*conj(x3))*conj(x1))
2+
-------------------------------------------------------------------------------------------------------------------
3+
(exp(b1_1*conj(x1) + b1_2*conj(x2) + b1_3*conj(x3)) +
4+
exp(b2_1*conj(x1) + b2_2*conj(x2) + b2_3*conj(x3)) +
5+
exp(b3_1*conj(x1) + b3_2*conj(x2) + b3_3*conj(x3)) +
6+
exp(b4_1*conj(x1) + b4_2*conj(x2) + b4_3*conj(x3)) +
7+
exp(b5_1*conj(x1) + b5_2*conj(x2) + b5_3*conj(x3)) + 1) - (exp(2*b1_1*conj(x1) + 2*b1_2*conj(x2) + 2*b1_3*conj(x3))*conj(x1))
8+
------------------------------------------------------------------
9+
(exp(b1_1*conj(x1) + b1_2*conj(x2) + b1_3*conj(x3)) +
10+
exp(b2_1*conj(x1) + b2_2*conj(x2) + b2_3*conj(x3)) +
11+
exp(b3_1*conj(x1) + b3_2*conj(x2) + b3_3*conj(x3)) +
12+
exp(b4_1*conj(x1) + b4_2*conj(x2) + b4_3*conj(x3)) +
13+
exp(b5_1*conj(x1) + b5_2*conj(x2) + b5_3*conj(x3)) + 1)^2)*(exp(b1_1*conj(x1) + b1_2*conj(x2) + b1_3*conj(x3)) +
14+
exp(b2_1*conj(x1) + b2_2*conj(x2) + b2_3*conj(x3)) +
15+
exp(b3_1*conj(x1) + b3_2*conj(x2) + b3_3*conj(x3)) +
16+
exp(b4_1*conj(x1) + b4_2*conj(x2) + b4_3*conj(x3)) +
17+
exp(b5_1*conj(x1) + b5_2*conj(x2) + b5_3*conj(x3)) + 1)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
function grad_funct_symbolic()
2+
%beta = sym('b', [1 3]);
3+
x_1 = sym('x', [1 3]);
4+
5+
%beta_mat = sym('b_l', [5 3]);
6+
beta_mat = sym('b', [5 3]);
7+
8+
%expr = log(exp(beta*x_1')/(1+sum(exp(beta_mat * x_1'))));
9+
expr = log(exp(beta_mat(1,:)*x_1')/(1+sum(exp(beta_mat * x_1'))))
10+
11+
%grad = gradient(expr, beta);
12+
%grad_1 = gradient(expr, beta(1));
13+
14+
15+
grad = gradient(expr, beta_mat(1,1)); % w.r.t. b_1_1
16+
ht = matlabFunction(grad);
17+
18+
x_1 = [1 1 1];
19+
beta_mat = zeros(5,3);
20+
21+
beta_mat(1,1) = 2;
22+
beta_mat(1,2) = 3;
23+
beta_mat(1,3) = 4;
24+
25+
y = feval(ht,beta_mat(1,1),beta_mat(1,2),beta_mat(1,3),...
26+
beta_mat(2,1),beta_mat(2,2),beta_mat(2,3),...
27+
beta_mat(3,1),beta_mat(3,2),beta_mat(3,3),...
28+
beta_mat(4,1),beta_mat(4,2),beta_mat(4,3),...
29+
beta_mat(5,1),beta_mat(5,2),beta_mat(5,3),...
30+
x_1(1),x_1(2),x_1(3));
31+
32+
end
Loading
Loading
+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
fh = @grad_funct_manual;
2+
x_1 = [1 1 1];
3+
beta_mat = zeros(5,3);
4+
5+
beta_mat(1,1) = 2;
6+
beta_mat(1,2) = 3;
7+
beta_mat(1,3) = 4;
8+
9+
y = feval(fh,x_1,beta_mat);
192 Bytes
Binary file not shown.

‎mlr/symbolic derivatives/y_manual.mat

183 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)
Please sign in to comment.