This example uses a data set from Draper and Smith (1981, pp. 629-630). Class SelectionRegression
is invoked to find the best regression for each subset size using the
criterion.
import java.text.*;
import com.imsl.stat.*;
import com.imsl.math.PrintMatrix;
import com.imsl.math.PrintMatrixFormat;
public class SelectionRegressionEx1 {
public static void main(String[] args) throws Exception {
double x[][] = { {7., 26., 6., 60.},
{1., 29., 15., 52.},
{11., 56., 8., 20.},
{11., 31., 8., 47.},
{7., 52., 6., 33.},
{11., 55., 9., 22.},
{3., 71., 17., 6.},
{1., 31., 22., 44.},
{2., 54., 18., 22.},
{21., 47., 4., 26},
{1., 40., 23., 34.},
{11., 66., 9., 12.},
{10.0, 68., 8., 12.}};
double y[] = { 78.5, 74.3, 104.3, 87.6,
95.9, 109.2, 102.7, 72.5,
93.1, 115.9, 83.8, 113.3, 109.4};
String criterionOption;
MessageFormat critMsg =
new MessageFormat("Regressions with {0} variable(s) ({1})");
MessageFormat critLabel =
new MessageFormat(" Criterion Variables");
MessageFormat coefMsg =
new MessageFormat("Best Regressions with {0} variable(s) ({1})");
MessageFormat coefLabel = new MessageFormat("Variable Coefficient" +
" Standard Error t-statistic p-value");
MessageFormat critData = new MessageFormat("{0} {1} {2} {3}" +
" {4} {5}");
SelectionRegression sr = new SelectionRegression(4);
sr.compute(x, y);
SelectionRegression.Statistics stats =
sr.getStatistics();
criterionOption = new String("R-squared");
for (int i=1; i <= 4 ; i++) {
double[] tmpCrit = stats.getCriterionValues(i);
int[][] indvar = stats.getIndependentVariables(i);
Object p[] = {new Integer(i), criterionOption};
System.out.println(critMsg.format(p));
Object p1[] = {null};
System.out.println(critLabel.format(p1));
for (int j=0; j< tmpCrit.length; j++) {
System.out.print(" "+tmpCrit[j]+" ");
for (int k = 0; k < indvar[j].length ; k++) {
System.out.print(indvar[j][k]+" ");
}
System.out.println("");
}
System.out.println("");
}
for (int i=0; i < 4; i++) {
System.out.println("");
Object p[] = {new Integer(i+1), criterionOption};
System.out.println(coefMsg.format(p));
Object p2[] = {null};
System.out.println(coefLabel.format(p2));
double[][] tmpCoef= stats.getCoefficientStatistics(i);
PrintMatrix pm = new PrintMatrix();
pm.setColumnSpacing(10);
PrintMatrixFormat tst = new PrintMatrixFormat();
tst.setNoColumnLabels();
tst.setNoRowLabels();
pm.print(tst, tmpCoef);
System.out.println("");
System.out.println("");
}
}
}
Regressions with 1 variable(s) (R-squared)
Criterion Variables
67.45419641316093 4
66.6268257633294 2
53.39480238350336 1
28.587273122981173 3
Regressions with 2 variable(s) (R-squared)
Criterion Variables
97.86783745356321 1 2
97.24710477169315 1 4
93.52896406158075 3 4
68.00604079500503 2 4
54.81667488448235 1 3
Regressions with 3 variable(s) (R-squared)
Criterion Variables
98.23354512004268 1 2 4
98.22846792190867 1 2 3
98.12810925873437 1 3 4
97.28199593862732 2 3 4
Regressions with 4 variable(s) (R-squared)
Criterion Variables
98.23756204076803 1 2 3 4
Best Regressions with 1 variable(s) (R-squared)
Variable Coefficient Standard Error t-statistic p-value
4 -0.738 0.155 -4.775 0.001
Best Regressions with 2 variable(s) (R-squared)
Variable Coefficient Standard Error t-statistic p-value
1 1.468 0.121 12.105 0
2 0.662 0.046 14.442 0
Best Regressions with 3 variable(s) (R-squared)
Variable Coefficient Standard Error t-statistic p-value
1 1.452 0.117 12.41 0
2 0.416 0.186 2.242 0.052
4 -0.237 0.173 -1.365 0.205
Best Regressions with 4 variable(s) (R-squared)
Variable Coefficient Standard Error t-statistic p-value
1 1.551 0.745 2.083 0.071
2 0.51 0.724 0.705 0.501
3 0.102 0.755 0.135 0.896
4 -0.144 0.709 -0.203 0.844
Link to Java source.