added Scorer class. Tweaking the code atm.

author: Feideus <erwan.ulrich@gmail.com> 2018-07-19 21:04:52 +0200
committer: Feideus <erwan.ulrich@gmail.com> 2018-07-19 21:04:52 +0200
commit: c43811ea2bd6cab7b35e5dca3665a799ab2fb8fa (patch)
tree: 570ad2ce8a8542864bc605a0b81ea0000a402ae5 /src
parent: 5866abefab72e2ea99f4bf3b23328937e2c0622a (diff)
download: schemafuzz-c43811ea2bd6cab7b35e5dca3665a799ab2fb8fa.tar.gz
schemafuzz-c43811ea2bd6cab7b35e5dca3665a799ab2fb8fa.tar.bz2
schemafuzz-c43811ea2bd6cab7b35e5dca3665a799ab2fb8fa.zip
5 files changed, 316 insertions, 9 deletions
diff --git a/src/main/java/org/schemaspy/DBFuzzer.java b/src/main/java/org/schemaspy/DBFuzzer.java
index f6bd878..6b9e1db 100644
--- a/src/main/java/org/schemaspy/DBFuzzer.java
+++ b/src/main/java/org/schemaspy/DBFuzzer.java
@@ -197,7 +197,8 @@ public class DBFuzzer
                 evaluatorProcess.waitFor();
                 ReportVector mutationReport = new ReportVector(currentMutation);
                 mutationReport.parseFile("errorReports/parsedStackTrace_"+currentMutation.getId());
-
+                currentMutation.setReportVector(mutationReport);
+                LOGGER.info(mutationReport.toString());
             }
             catch(Exception e)
             {
diff --git a/src/main/java/org/schemaspy/model/GenericTreeNode.java b/src/main/java/org/schemaspy/model/GenericTreeNode.java
index 97e333b..5bcc794 100644
--- a/src/main/java/org/schemaspy/model/GenericTreeNode.java
+++ b/src/main/java/org/schemaspy/model/GenericTreeNode.java
@@ -24,6 +24,7 @@ public class GenericTreeNode {
     private SingleChange chosenChange;
     private boolean isFirstApperance;
     private HashMap<TableColumn, FkGenericTreeNode> fkMutations = new HashMap<TableColumn, FkGenericTreeNode>();
+    private ReportVector rpv;
 
     /**
      * Default GenericTreeNode constructor
@@ -70,6 +71,10 @@ public class GenericTreeNode {
         this.subTreeWeight = subTreeWeight;
     }
 
+    public ReportVector getReportVector() { return rpv; }
+
+    public void setReportVector(ReportVector rpv) { this.rpv = rpv; }
+
     public void setPotential_changes(ArrayList<SingleChange> potCh) //used in tests
     {
         this.potential_changes = potCh;
diff --git a/src/main/java/org/schemaspy/model/ReportVector.java b/src/main/java/org/schemaspy/model/ReportVector.java
index b4b3b31..80da4a6 100644
--- a/src/main/java/org/schemaspy/model/ReportVector.java
+++ b/src/main/java/org/schemaspy/model/ReportVector.java
@@ -14,6 +14,7 @@ public class ReportVector {
     public ReportVector(GenericTreeNode parentMutation)
     {
         this.parentMutation = parentMutation;
+        stackTrace = new ArrayList<StackTraceLine>();
     }
 
     public ArrayList<StackTraceLine> getStackTrace() {
@@ -64,7 +65,7 @@ public class ReportVector {
 
                     key = data.replace(":", "");
                 } else {
-                    currentArray.add(data);
+                    currentArray.add(data.replace(",",""));
                 }
             }
 
@@ -76,6 +77,7 @@ public class ReportVector {
         }
     }
 
+
     public void storeLines(HashMap<String,ArrayList<String>> allLists)
     {
         int maxSize=0;
@@ -111,7 +113,29 @@ public class ReportVector {
             StackTraceLine stl = new StackTraceLine(functionName,fileName,lineNumber);
             stackTrace.add(stl);
         }
+    }
 
+    @Override
+    public String toString() {
+        return "ReportVector{" +
+                "stackTrace=" + stackTrace +
+                ", parentMutation=" + parentMutation +
+                '}';
+    }
 
+    public boolean compareStackTrace (ReportVector rpv)
+    {
+        if(rpv.stackTrace.size() != this.stackTrace.size())
+            return false;
+
+        int i = 0;
+        for(StackTraceLine stl : rpv.stackTrace)
+        {
+            if(!stl.compare(this.stackTrace.get(i)))
+                return false;
+            
+            i++;
+        }
+        return true;
     }
 }
diff --git a/src/main/java/org/schemaspy/model/Scorer.java b/src/main/java/org/schemaspy/model/Scorer.java
new file mode 100644
index 0000000..317c62a
--- /dev/null
+++ b/src/main/java/org/schemaspy/model/Scorer.java
@@ -0,0 +1,261 @@
+package org.schemaspy.model;
+
+import org.apache.commons.math3.linear.MatrixUtils;
+import org.apache.commons.math3.linear.RealMatrix;
+import org.apache.commons.math3.linear.RealVector;
+import org.apache.commons.math3.stat.StatUtils;
+
+
+import java.lang.reflect.Array;
+import java.util.*;
+
+/**
+ * Created by LASFE using IntelliJ on 7/20/2014.
+ */
+public class Scorer {
+    //Test set comes from http://www.jiaaro.com/KNN-for-humans/
+    /*
+          red         1
+          orange      2
+          yellow      3
+          green       4
+          blue        5
+          purple      6
+
+    */
+
+    //The results of the program can be found at http://www.jiaaro.com/KNN-for-humans/
+    public double[][] input =  {
+            {371, 3, 6},
+            {378, 3, 4},
+            //{355, 3, 4},
+            //{362, 3, 2},
+            //{379, 3, 4}
+    };
+
+    //public String[] label;
+    public double[] predict =  {371, 3, 6}; //mutations hash to be processed
+    public HashMap clusters = new HashMap();
+    HashMap<Double, Integer> euclideanDistances = new HashMap();
+
+    public int k = 6; //Number of clusters
+    public int max_iterations = 1000;
+
+    //Turn out RealMatrix into a hash with each key being set to each row
+    private static HashMap matrixToHash(RealMatrix mat){
+        HashMap hash = new HashMap();
+
+        for(int i=0; i<mat.getRowDimension(); i++){
+            hash.put(i, mat.getRow(i));
+        }
+
+        return hash;
+    }
+
+    public int score () {
+
+        Scorer sc = new Scorer();
+        //Lets create the centroids or 'average' locations of center for our points
+        double[][] centroids;
+
+        //Lets standardize our input array
+        sc.input = MatrixUtils.createRealMatrix(sc.input).getData();
+
+        //Lets put an array in each of the clusters to append the each {weight, color, # of seeds} to
+        for(int i=0; i<sc.k; i++)
+            sc.clusters.put(i, new double[sc.input[0].length]);
+
+        sc.solve();
+
+        //Now lets predict our test array
+        sc.closestClusterIndex(euclideanDistances);
+
+
+    }
+
+
+    /*private void predictClass(double[][] centroids){
+
+        int index = euclideanDistance( this.predict,  centroids );
+        System.out.println(Arrays.toString(this.predict) + " is closest to Centroid " + index);
+    }*/
+
+    public int closestClusterIndex(HashMap<Double,Integer> map)
+    {
+        SortedSet<Double> keys = new TreeSet<Double>(map.keySet());
+        return map.get( keys.first());
+    }
+
+
+    private void solve(){
+        //Let create two random sets of centroids to compare for convergence later
+        double [][] centroids = createRandomCentroids(this.k, this.input);
+        double [][] oldCentroids = createRandomCentroids(this.k,  this.input);
+
+        int iterations = 0;
+
+        //We need a dynamic array to store our points
+        HashMap<Integer, ArrayList<double[]>> clusters = new HashMap<Integer, ArrayList<double[]>>();
+
+        //Lets run the algorithm until it converges or reaches max iterations
+        while( this.converged(oldCentroids, centroids, iterations) != true ){
+
+            oldCentroids = centroids;
+
+            clusters = this.findClosestCentroids( this.input, centroids);
+            centroids = this.getNewCentroids( clusters);
+
+            //System.out.println( Arrays.deepToString(this.clusters.values().toArray()) );
+            iterations += 1;
+        }
+
+    }
+
+    //Lets assign 'labels' or 'outputs' to each of our 'clusters' or grouped set of points
+   /* private void assignLabels( HashMap<Integer, ArrayList<double[]>> clusters ){
+        //Lets turn out list of outputs into a unique set
+        Set mySet = new HashSet(Arrays.asList(this.output));
+        this.label = new String[clusters.size()];
+
+        //Lets take the first point in each cluster, see the its index in the input and use
+        //that index to get the label from the output
+        for(int i=0; i< clusters.size(); i++){
+            int index =  ArrayUtils.indexOf(this.input , clusters.get(i).get(0) );
+
+            this.label[i] = this.output[index];
+        }
+
+        //System.out.println(Arrays.deepToString( this.label ) );
+    }*/
+
+    //Calculates the mean of the new centroids via the clusters in each group
+    private double[][] getNewCentroids( HashMap<Integer, ArrayList<double[]>> hash){
+        double[][] newCentroids = new double[hash.size()][];
+
+        for(Map.Entry entry: hash.entrySet())
+        {
+            ArrayList tmp = new ArrayList();
+            tmp = (ArrayList) entry.getValue();
+            System.out.println(tmp);
+            if(tmp.isEmpty())
+            {
+                double[] dummy = {0,0,0};
+                ((ArrayList) entry.getValue()).add(dummy);
+            }
+        }
+
+        System.out.println("hash"+hash);
+
+        for(int i=0;i<hash.size();i++){
+            //Lets create a matrix of each groups points to index them by column easier
+            RealMatrix mat = MatrixUtils.createRealMatrix(hash.get(i).toArray(new double[][]{}));
+            double[] mean = new double[mat.getColumnDimension()];
+            //Now lets iterate through each column(weight, color, type) and set that value to the mean
+            //of our centroid
+            for (int j = 0; j < mat.getColumnDimension(); j++) {
+                mean[j] = StatUtils.mean(mat.getColumn(j));
+
+            }
+
+            newCentroids[i] = mean;//Setting the centroids new mean
+        }
+
+        return newCentroids;
+    }
+
+    private boolean converged(double [][] oldCentroids, double [][] centroids, int iterations){
+        //Dont want to iterate forever.  Break of the algorithm at 'max_iterations'
+        if(iterations>this.max_iterations ) {
+            System.out.println("Max iterations reached. Returning...");
+            return true;
+        }
+        //If my old and new centroids are equal after comparing which centroid each data point was equal
+        //to then we have converged
+        if( Arrays.deepEquals(oldCentroids, centroids) ) {
+            System.out.println("Centroids have converged. Returning...");
+            return true;
+        }
+
+        return false;
+    }
+
+    //Creating Random Centroids with values that are in the range of our data points
+    private double[][] createRandomCentroids(int row,  double[][] input){
+
+        RealMatrix mat =  MatrixUtils.createRealMatrix(input) ;
+        int column = input[0].length;
+        //Lets create k centroids that have the same number of indices as our inputs
+        double[][] centroids = new double[row][column];
+        Random rand = new Random();
+
+        for(int i=0;i<row;i++) {
+            for (int j = 0; j < mat.getColumnDimension(); j++) {
+                //Lets get the max and min of each columns
+                double max = mat.getColumnVector(j).getMaxValue(),
+                        min = mat.getColumnVector(j).getMinValue();
+
+                //Now lets create a random point in between the max and min values of the column
+                centroids[i][j] =min + (max - min) * rand.nextDouble();
+
+            }
+        }
+
+
+        return  centroids;
+    }
+
+    //We need to find the centroids that have the shortest Euclidean distance to each input
+    private HashMap findClosestCentroids(double[][] input, double [][] centroids){
+
+        HashMap<Integer, ArrayList> clusters = new HashMap();
+        for(int i=0;i<centroids.length;i++)//Lets prepopulate our hash with Arraylists to add arrays
+            clusters.put(i, new ArrayList<double[]>() );
+
+        for(double[] arr: input){
+
+            //Index of centroid with shorted distance to this input
+            int index = closestClusterIndex(euclideanDistance( arr,  centroids ));
+
+            //Now lets add the input to the centroids cluster grouping
+            clusters.get(index).add(arr);
+
+        }
+        return clusters;
+    }
+
+    //Perform Euclidean distance formula to find out the distance
+    //between our prediction value and each row in the matrix
+    public HashMap<Double,Integer> euclideanDistance( double[] input, double[][] centroids ){
+
+        RealMatrix m = MatrixUtils.createRealMatrix( centroids );
+
+        //Lets turn out 'y' value or label into vector for easier math operations
+        RealVector Y = MatrixUtils.createRealVector( input);
+
+        for (int i=0; i<m.getRowDimension(); i++){
+            RealVector vec = m.getRowVector(i);
+
+
+            RealVector sub = vec.subtract( Y );
+
+            //Take square root of sum of square values that were subtracted a line above
+            double distance = Math.sqrt(StatUtils.sumSq(sub.toArray()));
+            //Use the distance to each data point(or row) as key with the 'default' option as value
+            euclideanDistances.put( distance  , i/*cluster number*/ );
+        }
+        System.out.println(euclideanDistances);
+        //Now lets sort the map's keys into a set
+        SortedSet<Double> keys = new TreeSet<Double>(euclideanDistances.keySet());
+        List<Integer> neighbors = new ArrayList<Integer>();
+
+        return euclideanDistances;//Return cluster index of shortest distance
+    }
+
+    public int computeScore()
+    {
+
+        return 0;
+    }
+
+}
+
diff --git a/src/main/java/org/schemaspy/model/StackTraceLine.java b/src/main/java/org/schemaspy/model/StackTraceLine.java
index 36271cb..328024a 100644
--- a/src/main/java/org/schemaspy/model/StackTraceLine.java
+++ b/src/main/java/org/schemaspy/model/StackTraceLine.java
@@ -2,22 +2,22 @@ package org.schemaspy.model;
 
 public class StackTraceLine {
 
-    private String functionname;
+    private String functionName;
     private String fileName;
     private int lineNumber;
 
-    public StackTraceLine(String functionname, String fileName, int lineNumber) {
-        this.functionname = functionname;
+    public StackTraceLine(String functionName, String fileName, int lineNumber) {
+        this.functionName = functionName;
         this.fileName = fileName;
         this.lineNumber = lineNumber;
     }
 
-    public String getFunctionname() {
-        return functionname;
+    public String getfunctionName() {
+        return functionName;
     }
 
-    public void setFunctionname(String functionname) {
-        this.functionname = functionname;
+    public void setfunctionName(String functionName) {
+        this.functionName = functionName;
     }
 
     public String getFileName() {
@@ -35,4 +35,20 @@ public class StackTraceLine {
     public void setLineNumber(int lineNumber) {
         this.lineNumber = lineNumber;
     }
+
+    @Override
+    public String toString() {
+        return "StackTraceLine{" +
+                "functionName='" + functionName + '\'' +
+                ", fileName='" + fileName + '\'' +
+                ", lineNumber=" + lineNumber +
+                '}';
+    }
+
+    public boolean compare(StackTraceLine stl)
+    {
+        if(stl.fileName.equals(this.fileName) && stl.functionName.equals(this.functionName) && stl.lineNumber == this.lineNumber)
+            return true;
+        return false;
+    }
 }
author	Feideus <erwan.ulrich@gmail.com>	2018-07-19 21:04:52 +0200
committer	Feideus <erwan.ulrich@gmail.com>	2018-07-19 21:04:52 +0200
commit	c43811ea2bd6cab7b35e5dca3665a799ab2fb8fa (patch)
tree	570ad2ce8a8542864bc605a0b81ea0000a402ae5 /src
parent	5866abefab72e2ea99f4bf3b23328937e2c0622a (diff)
download	schemafuzz-c43811ea2bd6cab7b35e5dca3665a799ab2fb8fa.tar.gz schemafuzz-c43811ea2bd6cab7b35e5dca3665a799ab2fb8fa.tar.bz2 schemafuzz-c43811ea2bd6cab7b35e5dca3665a799ab2fb8fa.zip