SummaryNumberCodeLeanCustomBinSize portal 02-19-2014d1130
2015-01-13import java.util.ArrayList;
import java.util.Arrays;
public class SummaryNumberCodeLean
UsefulTools useful_tools = new UsefulTools();
int[] numbers;
int[] values; //this will be a copied version of the numbers for sorting later on (for the median)
double[] dnumbers;
int size = 0; //number of elements in dataset
long sum = 0; //sum of elements in dataset
double entropy = 0;
double normalized_entropy = 0;
double cv = 0;
double stdev = 0;
double mean = 0;
double median = 0;
int min = 0;
int max = 0;
double dmin = 0;
double dmax = 0;
double kurtosis = 0;
double skew = 0;
double ninety_fifth_percentile = 0;
double fifth_percentile = 0;
double dynamic_range = 0;
double nonn_entropy = 0; //nonn indicates that this values was calculated from non-normalized data
double nonn_normalized_entropy = 0;
double nonn_cv = 0;
double nonn_stdev = 0;
double nonn_mean = 0;
double nonn_median = 0;
double nonn_min = 0;
double nonn_max = 0;
double nonn_ninety_fifth_percentile = 0;
double nonn_fifth_percentile = 0;
double nonn_dynamic_range = 0;
double n_mean = 0;
double n_stdev =0;
double n_min = 0;
double n_max = 0;
double n_ninety_fifth_percentile = 0;
double n_fifth_percentile =0;
public static void main(String[] args) {
// TODO Auto-generated method stub
SummaryNumberCodeLean sn = new SummaryNumberCodeLean();
System.out.println(sn.getTime());
sn.TestEntropyCalculation_100513d1153();
System.out.println(sn.getTime());
public void TestEntropyCalculation_100513d1103()
;
int[] iarray = {3,5,10,12,5,5
System.out.println(calculateEntropy(iarray));
//value was 1.24 which was correct
}
public void TestEntropyCalculation_100513d1153()
//I'll store a text file with 330,000 values into an int array
int[] iarray = useful_tools.storeTextFiletoIntArray("F:\\kurt\\storage\\CIM Research Folder\\DR\\2013\\10-5-13\\330k_numbers_for_testing.txt");
ArrayList test_list = useful_tools.storeTextFiletoArrayList("F:\\kurt\\storage\\CIM Research Folder\\DR\\2013\\10-5-13\\330k_numbers_for_testing.txt");
calculateEntropy(iarray);
calculateSTDEV(iarray);
cv = stdev/mean;
median = calculateMedian(iarray);
kurtosis = useful_tools.kurtosis(iarray, mean, stdev);
skew = useful_tools.Skew(iarray, mean, stdev);
ninety_fifth_percentile = useful_tools.percentile(values, 0.95);
fifth_percentile = useful_tools.percentile(values, 0.05);
dynamic_range = ninety_fifth_percentile/fifth_percentile;
nonn_entropy = entropy; //nonn indicates that this values was calculated from non-normalized data
nonn_normalized_entropy = normalized_entropy;
nonn_cv = cv;
nonn_stdev = stdev;
nonn_mean = mean;
nonn_median = median;
nonn_min = min;
nonn_max = max;
nonn_ninety_fifth_percentile = ninety_fifth_percentile;
nonn_fifth_percentile = fifth_percentile;
nonn_dynamic_range = dynamic_range;
//now median normalize the list
double[] normalized_array = median_normalize(iarray, median);
//now get a few more numbers
n_mean = getMean(normalized_array);
getMinAndMax(normalized_array);
n_min = dmin;
n_max = dmax;
n_stdev = calculateSTDEV(normalized_array, n_mean);
n_ninety_fifth_percentile = useful_tools.percentile(normalized_array, 0.95);
n_fifth_percentile = useful_tools.percentile(normalized_array, 0.05);
iarray=multiply_by_factor_and_convert_to_int(normalized_array,10000);
calculateEntropy(iarray);
outputValues();
public void getSummaryNumbers(int[] iarray)
calculateEntropy(iarray);
calculateSTDEV(iarray);
cv = stdev/mean;
median = calculateMedian(iarray);
kurtosis = useful_tools.kurtosis(iarray, mean, stdev);
skew = useful_tools.Skew(iarray, mean, stdev);
ninety_fifth_percentile = useful_tools.percentile(values, 0.95);
fifth_percentile = useful_tools.percentile(values, 0.05);
dynamic_range = ninety_fifth_percentile/fifth_percentile;
nonn_entropy = entropy; //nonn indicates that this values was calculated from non-normalized data
nonn_normalized_entropy = normalized_entropy;
nonn_cv = cv;
nonn_stdev = stdev;
nonn_mean = mean;
nonn_median = median;
nonn_min = min;
nonn_max = max;
nonn_ninety_fifth_percentile = ninety_fifth_percentile;
nonn_fifth_percentile = fifth_percentile;
nonn_dynamic_range = dynamic_range;
//now median normalize the list
double[] normalized_array = median_normalize(iarray, median);
//now get a few more numbers
n_mean = getMean(normalized_array);
getMinAndMax(normalized_array);
n_min = dmin;
n_max = dmax;
n_stdev = calculateSTDEV(normalized_array, n_mean);
n_ninety_fifth_percentile = useful_tools.percentile(normalized_array, 0.95);
n_fifth_percentile = useful_tools.percentile(normalized_array, 0.05);
iarray=multiply_by_factor_and_convert_to_int(normalized_array,10000);
calculateEntropy(iarray);
public double calculateEntropy(int[] numbers)
this.numbers = numbers;
size = numbers.length;
values = new int[size];
max = numbers[0]; //not necessary for entropy calculation
min = numbers[0];
//first get the min and max
for(int i=0; i<size; i++)
{
sum+=numbers[i]; //for the calculation of the mean
values[i] = numbers[i]; //copying the list to a new list that will be sorted later on
if(numbers[i]<min) //not necessary for entropy calculation
{
min = numbers[i]; //not necessary for entropy calculation
if(numbersi>max) //not necessary for entropy calculation
max = numbers[i]; //not necessary for entropy calculation
}
int[] counts = new intmax+1;
for(int i=0; i<size; i++)
try
{
counts[numbers[i]]++;
catch(Exception e)
int k=0;
}
double running_sum = 0;
for(int i=0; i<=max; i++)
int value = counts[i];
if(value!=0)
{
double ratio = ((double)value)/size;
running_sum+=ratio*Math.log(ratio);
}
entropy = -running_sum;
normalized_entropy = entropy/Math.log(size);
mean = ((double)sum)/size;
return entropy;
}
public double calculateSTDEV(int[] numbers)
double sum = 0.0;
for (int i = 0; i <size; i++) {
sum += (numbers[i] - mean) * (numbers[i] - mean);
stdev = Math.sqrt(sum / (size - 1));
return stdev;
}
public double calculateSTDEV(double[] numbers, double mean)
double sum = 0.0;
for (int i = 0; i <size; i++) {
sum += (numbers[i] - mean) * (numbers[i] - mean);
stdev = Math.sqrt(sum / (size - 1));
return stdev;
}
public String getTime()
return useful_tools.getTime();
public double calculateMedian(int[] numbers)
Arrays.sort(values);
if (size % 2 == 1)
{
median = values[(size+1)/2-1];
else
double lower = values[size/2-1];
double upper = values[size/2];
median = (lower + upper) / 2.0;
return median;
}
public double[] median_normalize(int[] array, double median)
double[] return_list = new double[array.length];
for(int i=0; i<array.length; i++)
{
return_list[i]=(double)array[i]/median;
return return_list;
}
public int[] multiply_by_factor_and_convert_to_int(double[] darray, int factor)
int[] return_array = new int[darray.length];
for(int i=0; i<darray.length; i++)
{
return_array[i] = Double.valueOf(darray[i]*factor).intValue();
return return_array;
}
public double getMean(double[] values)
double sum = 0.0;
for(int i=0; i<values.length; i++)
{
sum+=values[i];
return sum/(double)values.length;
}
public void getMinAndMax(double[] values)
dmin = values[0];
dmax = values[0];
for(int i=0; i<values.length; i++)
{
if(values[i]<dmin)
{
dmin = values[i];
if(valuesi>dmax)
dmax = values[i];
}
}
public void outputValues()
System.out.println("Entropy: " +nonn_entropy+"\r\n");
System.out.println("Normalized Entropy: " +nonn_normalized_entropy+"\r\n");
System.out.println("Mean: " + nonn_mean);
System.out.println("STDEV: " + nonn_stdev);
System.out.println("CV: " + nonn_cv);
System.out.println("Median: " + nonn_median);
System.out.println("Min: " + nonn_min);
System.out.println("Max: " + nonn_max);
System.out.println("Kurtosis: " + kurtosis);
System.out.println("Skew: " + skew);
System.out.println("Ninetey_fifth_percentile: " + nonn_ninety_fifth_percentile);
System.out.println("Fifth_percentile: " + nonn_fifth_percentile);
System.out.println("Dynamic_range: "+ dynamic_range);
System.out.println("Entropy_Normalized_Data: " + entropy);
System.out.println("Normalized_Entropy_Normalized_Data: " + normalized_entropy);
System.out.println("Mean_normalized: " + n_mean);
System.out.println("STDEV_normalized: " + n_stdev);
System.out.println("Min_normalized: "+ n_min);
System.out.println("Max_normalized: " + n_max);
System.out.println("Ninetey_fifth_percentile_normalized: " +n_ninety_fifth_percentile);
System.out.println("Fifth_percentile_normalized: " + n_fifth_percentile);
public double getEntropy()
return nonn_entropy;
public double getNormalizedEntropy()
return nonn_normalized_entropy;
public double getMax()
return nonn_max;
public double getMin()
return nonn_min;
public double getCV()
return nonn_cv;
public double getSTDEV()
return nonn_stdev;
public double getMean()
return nonn_mean;
public double getMedian()
return nonn_median;
public double getFifthPercentile()
return nonn_fifth_percentile;
public double getNinetyFifthPercentile()
return nonn_ninety_fifth_percentile;
public double getEntropyNormalizedData()
return entropy;
public double getNormalizedEntropyNormalizedData()
return normalized_entropy;
public double getMaxNormalized()
return n_max;
public double getMinNormalized()
return n_min;
public double getSTDEVNormalized()
return n_stdev;
public double getMeanNormalized()
return n_mean;
public double getFifthPercentileNormalized()
return n_fifth_percentile;
public double getNinetyFifthPercentileNormalized()
return n_ninety_fifth_percentile;
public double getKurtosis()
return kurtosis;
public double getSkew()
return skew;
public double getDynamicRange()
return dynamic_range;
public long getSum()
return sum;
}