Evaluating Difficulty of Multi-class Imbalanced Data


Mateusz Lango, Krystyna Napierała, Jerzy Stefanowski



Datasets
Histograms

Detailed results


Real data sets

         
          

Data set

Yeast

        

Majority class

MIT

        
          

Safeness Report

         
 

 SAFETY1-med

 SAFETY1-avg

SAFETY2-med

SAFETY2-avg

SAFETY3-med

 SAFETY3-avg

 SAFETY0-med

 SAFETY0-avg

 

MIT

1,000

0,947

1,000

0,958

1,000

0,947

1,000

0,947

 

ME1

0,880

0,849

0,840

0,837

0,800

0,759

0,600

0,609

 

EXC

0,880

0,689

0,820

0,714

0,700

0,609

0,600

0,474

 

ME2

0,400

0,467

0,520

0,533

0,400

0,380

0,200

0,235

 

VAC

0,160

0,184

0,300

0,322

0,100

0,130

0,000

0,040

 

POX

0,620

0,520

0,680

0,602

0,550

0,490

0,500

0,440

 
          
          

Recall Table

         
 

MIT

ME1

EXC

ME2

VAC

POX

G-mean

  

Counts

1299

44

35

51

30

20

   

CART

0,929

0,727

0,486

0,412

0,067

0,350

0,383

  

NB

0,043

0,614

0,771

0,039

0,667

0,550

0,258

  

3NN

0,978

0,727

0,600

0,176

0,000

0,500

0,000

  

SVM

0,999

0,000

0,000

0,000

0,000

0,100

0,000

  
          

Data set

Winequality-red

        

Majority class

4

        
          

Safeness Report

         
 

 SAFETY1-med

 SAFETY1-avg

SAFETY2-med

SAFETY2-avg

SAFETY3-med

 SAFETY3-avg

 SAFETY0-med

 SAFETY0-avg

 

4

1,000

0,870

1,000

0,896

1,000

0,870

1,000

0,870

 

5

0,400

0,384

0,520

0,499

0,400

0,367

0,400

0,340

 

2

0,000

0,128

0,200

0,286

0,000

0,092

0,000

0,034

 

6

0,320

0,273

0,400

0,377

0,200

0,183

0,000

0,033

 

1

0,320

0,276

0,400

0,376

0,200

0,180

0,000

0,020

 
          
          

Recall Table

         
 

4

5

2

6

1

G-mean

   

Counts

1319

199

53

18

10

    

CART

0,820

0,317

0,113

0,056

0,100

0,175

   

NB

0,803

0,528

0,075

0,000

0,100

0,000

   

3NN

0,908

0,216

0,057

0,000

0,100

0,000

   

SVM

0,974

0,111

0,000

0,000

0,000

0,000

   
          
          

Data set

New-thyroid

        

Majority class

1

        
          

Safeness Report

         
 

 SAFETY1-med

 SAFETY1-avg

SAFETY2-med

SAFETY2-avg

SAFETY3-med

 SAFETY3-avg

 SAFETY0-med

 SAFETY0-avg

 

1

1,000

0,971

1,000

0,977

1,000

0,971

1,000

0,971

 

2

1,000

0,777

1,000

0,822

1,000

0,777

1,000

0,777

 

3

1,000

0,780

1,000

0,824

1,000

0,780

1,000

0,780

 
          
          

Recall Table

         
 

1

2

3

G-mean

     

Counts

150

35

30

      

CART

0,953

0,943

0,833

0,908

     

NB

0,993

0,943

0,867

0,933

     

3NN

0,993

0,714

0,800

0,828

     

SVM

1,000

0,171

0,000

0,000

     
          

Data set

Ecoli

        

Majority class

cp

        
          

Safeness Report

         
 

 SAFETY1-med

 SAFETY1-avg

SAFETY2-med

SAFETY2-avg

SAFETY3-med

 SAFETY3-avg

 SAFETY0-med

 SAFETY0-avg

 

cp

1,000

0,889

1,000

0,911

1,000

0,889

1,000

0,889

 

cpU

0,600

0,575

0,680

0,658

0,600

0,571

0,600

0,566

 

om

0,980

0,908

0,970

0,897

0,950

0,845

0,900

0,740

 

pp

1,000

0,862

1,000

0,881

1,000

0,844

1,000

0,815

 
          
          

Recall Table

         
 

cp

cpU

om

pp

G-mean

    

Counts

220

35

20

52

     

CART

0,845

0,600

0,850

0,788

0,764

    

NB

0,482

0,686

0,300

0,904

0,547

    

3NN

0,882

0,486

0,750

0,846

0,722

    

SVM

1,000

0,000

0,000

0,365

0,000

    
          

Data set

Cleveland

        

Majority class

0

        
          

Safeness Report

         
 

 SAFETY1-med

 SAFETY1-avg

SAFETY2-med

SAFETY2-avg

SAFETY3-med

 SAFETY3-avg

 SAFETY0-med

 SAFETY0-avg

 

0

0,800

0,772

0,840

0,818

0,800

0,772

0,800

0,772

 

2

0,200

0,293

0,360

0,407

0,200

0,234

0,200

0,137

 

3

0,360

0,322

0,460

0,423

0,300

0,249

0,200

0,126

 

4

0,320

0,335

0,400

0,423

0,200

0,238

0,000

0,077

 
          
          

Recall Table

         
 

0

2

3

4

G-mean

    

Counts

214

35

35

13

     

CART

0,794

0,286

0,114

0,077

0,211

    

NB

0,864

0,143

0,257

0,154

0,264

    

3NN

0,930

0,086

0,000

0,000

0,000

    

SVM

1,000

0,000

0,000

0,000

0,000

    
          

Artificial data sets

         
          

Data set

A1

        

Majority class

MAJ

        
          

Safeness Report

         
 

 SAFETY1-med

 SAFETY1-avg

SAFETY2-med

SAFETY2-avg

SAFETY3-med

 SAFETY3-avg

 SAFETY0-med

 SAFETY0-avg

 

MIN1

1,000

0,912

1,000

0,929

1,000

0,912

1,000

0,912

 

MAJ

1,000

0,964

1,000

0,971

1,000

0,964

1,000

0,964

 

MIN2

1,000

0,956

1,000

0,965

1,000

0,956

1,000

0,956

 
          
          

Recall Table

         
 

MIN1

MAJ

MIN2

G-mean

     

Counts

120

840

240

      

CART

0,933

0,973

0,933

0,946

     

NB

0,475

0,992

0,804

0,724

     

3NN

0,942

0,976

0,979

0,966

     

SVM

0,000

1,000

0,000

0,000

     
          

Data set

A2

        

Majority class

MAJ

        
          

Safeness Report

         
 

 SAFETY1-med

 SAFETY1-avg

SAFETY2-med

SAFETY2-avg

SAFETY3-med

 SAFETY3-avg

 SAFETY0-med

 SAFETY0-avg

 

MIN1

0,840

0,740

0,820

0,762

0,700

0,676

0,600

0,568

 

MAJ

1,000

0,927

1,000

0,942

1,000

0,927

1,000

0,927

 

MIN2

1,000

0,845

1,000

0,862

1,000

0,815

1,000

0,766

 
          
          

Recall Table

         
 

MIN1

MAJ

MIN2

G-mean

     

Counts

120

840

240

      

CART

0,567

0,927

0,738

0,729

     

NB

0,325

0,999

0,567

0,569

     

3NN

0,542

0,957

0,792

0,743

     

SVM

0,000

1,000

0,000

0,000

     
          

Data set

A3

        

Majority class

MAJ

        
          

Safeness Report

         
 

 SAFETY1-med

 SAFETY1-avg

SAFETY2-med

SAFETY2-avg

SAFETY3-med

 SAFETY3-avg

 SAFETY0-med

 SAFETY0-avg

 

MIN1

0,600

0,548

0,680

0,598

0,500

0,461

0,200

0,315

 

MAJ

0,800

0,822

0,840

0,858

0,800

0,822

0,800

0,822

 

MIN2

0,600

0,590

0,680

0,650

0,600

0,543

0,400

0,466

 
          
          

Recall Table

         
 

MIN1

MAJ

MIN2

G-mean

     

Counts

120

840

240

      

CART

0,258

0,813

0,421

0,445

     

NB

0,000

1,000

0,025

0,000

     

3NN

0,200

0,902

0,392

0,413

     

SVM

0,000

1,000

0,000

0,000

     
          

Data set

A1b

        

Majority class

MAJ

        
          

Safeness Report

         
 

 SAFETY1-med

 SAFETY1-avg

SAFETY2-med

SAFETY2-avg

SAFETY3-med

 SAFETY3-avg

 SAFETY0-med

 SAFETY0-avg

 

MIN1

1,000

0,750

1,000

0,800

1,000

0,750

1,000

0,750

 

MAJ

1,000

0,910

1,000

0,928

1,000

0,910

1,000

0,910

 

MIN2

1,000

0,853

1,000

0,883

1,000

0,853

1,000

0,853

 
          
          

Recall Table

         
 

MIN1

MAJ

MIN2

G-mean

     

Counts

120

840

240

      

CART

0,742

0,918

0,825

0,825

     

NB

0,600

0,974

0,783

0,771

     

3NN

0,775

0,935

0,842

0,848

     

SVM

0,000

1,000

0,000

0,000