Saturday, 8 September 2018

Preprocessing of missing values. Data Mining and Business Intelligence GTU 2170715


Replace the missing values for given automobile dataset “imports-85.data” with mean, median and mode value of numeric attribute.

Download Dataset From:
https://github.com/nyuvis/datasets/blob/master/auto/imports-85.data
Dataset Information:
               https://archive.ics.uci.edu/ml/machine-learning-databases/autos/imports-85.names

       Code :
Mean
               import java.sql.*;
               import java.util.TimeZone;
                
               public class dm2 {
                
                    public static void main(String[] args) throws Exception{
                                // TODO Auto-generated method stub
                                Class.forName("oracle.jdbc.driver.OracleDriver");
                                TimeZone t1;
                                t1 = TimeZone.getTimeZone("yourtimezone");
                                TimeZone.setDefault(t1);
                               
                                Connection con = DriverManager.getConnection("jdbc:oracle:thin:@localhost:1521:xe", "s16cos140", "student");
                
                        PreparedStatement ps = con.prepareStatement("select avg(NORMALIZED_LOSSES),avg(BORE),avg(STROKE),avg(PRICE) from dm1 where NORMALIZED_LOSSES not LIKE '?%' and BORE not LIKE '?%' and STROKE not LIKE '?%' and PRICE not LIKE '?%'");
                        ResultSet rs = ps.executeQuery();
                        int norm=0,price=0;
                        float bore=0,stroke=0;
                        while (rs.next())
                        {
                                norm = rs.getInt(1);
                                bore = rs.getFloat(2);
                                stroke = rs.getFloat(3);
                                price = rs.getInt(4);
                        }
                        System.out.println("Norm is:"+norm+"\nPrice: "+price+"\nBore: "+bore+"\nStroke: "+stroke);
                        
                        PreparedStatement ps1 = con.prepareStatement("update DM1 set NORMALIZED_LOSSES='"+norm+"',BORE='"+bore+"',STROKE='"+stroke+"',PRICE='"+price+"' where NORMALIZED_LOSSES='?' OR BORE='?' OR STROKE='?' OR HORSEPOWER='?' OR PEAK_RPM='?' OR PRICE='?'");
                        ps1.executeUpdate();
                       
                    }
                
               }

Median
import java.sql.*;
import java.util.Arrays;
import java.util.TimeZone;

public class dm2 {

     public static void main(String[] args) throws Exception{
                 // TODO Auto-generated method stub
                 Class.forName("oracle.jdbc.driver.OracleDriver");
                 TimeZone t1;
                 t1 = TimeZone.getTimeZone("yourtimezone");
                 TimeZone.setDefault(t1);
                
                 Connection con = DriverManager.getConnection("jdbc:oracle:thin:@localhost:1521:xe", "s16cos140", "student");

         PreparedStatement ps = con.prepareStatement("select NORMALIZED_LOSSES from dm2 where NORMALIZED_LOSSES not LIKE '?'");
         ResultSet rs = ps.executeQuery();
         int norm[] = new int[300];     
         int i=0;
         while (rs.next())
         {
                 norm[i] = rs.getInt(1);
                 i++;
         }
         i = i / 2;
         System.out.println("\ni is:"+norm[i]);
        
         PreparedStatement ps1 = con.prepareStatement("update DM2 set NORMALIZED_LOSSES='"+norm[i]+"' where NORMALIZED_LOSSES='?'");
         ps1.executeUpdate();
     }

}


Mode
import java.sql.*;
import java.util.Arrays;
import java.util.TimeZone;
public class dm2 {
     public static void main(String[] args) throws Exception{
                 // TODO Auto-generated method stub
                 Class.forName("oracle.jdbc.driver.OracleDriver");
                 TimeZone t1;
                 t1 = TimeZone.getTimeZone("yourtimezone");
                 TimeZone.setDefault(t1);
                 Connection con = DriverManager.getConnection("jdbc:oracle:thin:@localhost:1521:xe", "s16cos140", "student");

         PreparedStatement ps = con.prepareStatement("select NORMALIZED_LOSSES from dm3 where NORMALIZED_LOSSES not LIKE '?'");
         ResultSet rs = ps.executeQuery();
         int norm[] = new int[300];     
         int n=0;
         while (rs.next())
         {
                 norm[n] = rs.getInt(1);
                 n++;
         }
         int i=0;int j=0,maxCount=0,mode=0;
         System.out.println("\nN is:"+n);
         for (i = 0; i < n; ++i)
         {
             int count = 0;
            
             for (j = 0; j < n; ++j)
             {
                if (norm[j] == norm[i])
                {
                    ++count;
                }
             }
            
             if (count > maxCount)
             {
                maxCount = count;
                mode = norm[i];
             }
          }
         PreparedStatement ps1 = con.prepareStatement("update DM3 set NORMALIZED_LOSSES='"+mode+"' where NORMALIZED_LOSSES='?'");
         ps1.executeUpdate(); 
     }
}

No comments:

Post a Comment

It's time To increase blogging capability. To have a chance to contribute in digital world. Any Interested People who want to make t...