import numpy as np

from matplotlib import pyplot as plt
from matplotlib.colors import ListedColormap

from sklearn import preprocessing 
from sklearn.preprocessing import StandardScaler

# Load Iris classification dataset
from sklearn.datasets import load_iris
iris = load_iris()

# Print all 150 examples
print("(Number_of_examples, example_size) = " , iris.data.shape, "\n")
for i in range(0, 150) :
    print('Input = ', iris.data[i], ' , Label = ', iris.target[i] )

(Number_of_examples, example_size) =  (150, 4) 

Input =  [5.1 3.5 1.4 0.2]  , Label =  0
Input =  [4.9 3.  1.4 0.2]  , Label =  0
Input =  [4.7 3.2 1.3 0.2]  , Label =  0
Input =  [4.6 3.1 1.5 0.2]  , Label =  0
Input =  [5.  3.6 1.4 0.2]  , Label =  0
Input =  [5.4 3.9 1.7 0.4]  , Label =  0
Input =  [4.6 3.4 1.4 0.3]  , Label =  0
Input =  [5.  3.4 1.5 0.2]  , Label =  0
Input =  [4.4 2.9 1.4 0.2]  , Label =  0
Input =  [4.9 3.1 1.5 0.1]  , Label =  0
Input =  [5.4 3.7 1.5 0.2]  , Label =  0
Input =  [4.8 3.4 1.6 0.2]  , Label =  0
Input =  [4.8 3.  1.4 0.1]  , Label =  0
Input =  [4.3 3.  1.1 0.1]  , Label =  0
Input =  [5.8 4.  1.2 0.2]  , Label =  0
Input =  [5.7 4.4 1.5 0.4]  , Label =  0
Input =  [5.4 3.9 1.3 0.4]  , Label =  0
Input =  [5.1 3.5 1.4 0.3]  , Label =  0
Input =  [5.7 3.8 1.7 0.3]  , Label =  0
Input =  [5.1 3.8 1.5 0.3]  , Label =  0
Input =  [5.4 3.4 1.7 0.2]  , Label =  0
Input =  [5.1 3.7 1.5 0.4]  , Label =  0
Input =  [4.6 3.6 1.  0.2]  , Label =  0
Input =  [5.1 3.3 1.7 0.5]  , Label =  0
Input =  [4.8 3.4 1.9 0.2]  , Label =  0
Input =  [5.  3.  1.6 0.2]  , Label =  0
Input =  [5.  3.4 1.6 0.4]  , Label =  0
Input =  [5.2 3.5 1.5 0.2]  , Label =  0
Input =  [5.2 3.4 1.4 0.2]  , Label =  0
Input =  [4.7 3.2 1.6 0.2]  , Label =  0
Input =  [4.8 3.1 1.6 0.2]  , Label =  0
Input =  [5.4 3.4 1.5 0.4]  , Label =  0
Input =  [5.2 4.1 1.5 0.1]  , Label =  0
Input =  [5.5 4.2 1.4 0.2]  , Label =  0
Input =  [4.9 3.1 1.5 0.2]  , Label =  0
Input =  [5.  3.2 1.2 0.2]  , Label =  0
Input =  [5.5 3.5 1.3 0.2]  , Label =  0
Input =  [4.9 3.6 1.4 0.1]  , Label =  0
Input =  [4.4 3.  1.3 0.2]  , Label =  0
Input =  [5.1 3.4 1.5 0.2]  , Label =  0
Input =  [5.  3.5 1.3 0.3]  , Label =  0
Input =  [4.5 2.3 1.3 0.3]  , Label =  0
Input =  [4.4 3.2 1.3 0.2]  , Label =  0
Input =  [5.  3.5 1.6 0.6]  , Label =  0
Input =  [5.1 3.8 1.9 0.4]  , Label =  0
Input =  [4.8 3.  1.4 0.3]  , Label =  0
Input =  [5.1 3.8 1.6 0.2]  , Label =  0
Input =  [4.6 3.2 1.4 0.2]  , Label =  0
Input =  [5.3 3.7 1.5 0.2]  , Label =  0
Input =  [5.  3.3 1.4 0.2]  , Label =  0
Input =  [7.  3.2 4.7 1.4]  , Label =  1
Input =  [6.4 3.2 4.5 1.5]  , Label =  1
Input =  [6.9 3.1 4.9 1.5]  , Label =  1
Input =  [5.5 2.3 4.  1.3]  , Label =  1
Input =  [6.5 2.8 4.6 1.5]  , Label =  1
Input =  [5.7 2.8 4.5 1.3]  , Label =  1
Input =  [6.3 3.3 4.7 1.6]  , Label =  1
Input =  [4.9 2.4 3.3 1. ]  , Label =  1
Input =  [6.6 2.9 4.6 1.3]  , Label =  1
Input =  [5.2 2.7 3.9 1.4]  , Label =  1
Input =  [5.  2.  3.5 1. ]  , Label =  1
Input =  [5.9 3.  4.2 1.5]  , Label =  1
Input =  [6.  2.2 4.  1. ]  , Label =  1
Input =  [6.1 2.9 4.7 1.4]  , Label =  1
Input =  [5.6 2.9 3.6 1.3]  , Label =  1
Input =  [6.7 3.1 4.4 1.4]  , Label =  1
Input =  [5.6 3.  4.5 1.5]  , Label =  1
Input =  [5.8 2.7 4.1 1. ]  , Label =  1
Input =  [6.2 2.2 4.5 1.5]  , Label =  1
Input =  [5.6 2.5 3.9 1.1]  , Label =  1
Input =  [5.9 3.2 4.8 1.8]  , Label =  1
Input =  [6.1 2.8 4.  1.3]  , Label =  1
Input =  [6.3 2.5 4.9 1.5]  , Label =  1
Input =  [6.1 2.8 4.7 1.2]  , Label =  1
Input =  [6.4 2.9 4.3 1.3]  , Label =  1
Input =  [6.6 3.  4.4 1.4]  , Label =  1
Input =  [6.8 2.8 4.8 1.4]  , Label =  1
Input =  [6.7 3.  5.  1.7]  , Label =  1
Input =  [6.  2.9 4.5 1.5]  , Label =  1
Input =  [5.7 2.6 3.5 1. ]  , Label =  1
Input =  [5.5 2.4 3.8 1.1]  , Label =  1
Input =  [5.5 2.4 3.7 1. ]  , Label =  1
Input =  [5.8 2.7 3.9 1.2]  , Label =  1
Input =  [6.  2.7 5.1 1.6]  , Label =  1
Input =  [5.4 3.  4.5 1.5]  , Label =  1
Input =  [6.  3.4 4.5 1.6]  , Label =  1
Input =  [6.7 3.1 4.7 1.5]  , Label =  1
Input =  [6.3 2.3 4.4 1.3]  , Label =  1
Input =  [5.6 3.  4.1 1.3]  , Label =  1
Input =  [5.5 2.5 4.  1.3]  , Label =  1
Input =  [5.5 2.6 4.4 1.2]  , Label =  1
Input =  [6.1 3.  4.6 1.4]  , Label =  1
Input =  [5.8 2.6 4.  1.2]  , Label =  1
Input =  [5.  2.3 3.3 1. ]  , Label =  1
Input =  [5.6 2.7 4.2 1.3]  , Label =  1
Input =  [5.7 3.  4.2 1.2]  , Label =  1
Input =  [5.7 2.9 4.2 1.3]  , Label =  1
Input =  [6.2 2.9 4.3 1.3]  , Label =  1
Input =  [5.1 2.5 3.  1.1]  , Label =  1
Input =  [5.7 2.8 4.1 1.3]  , Label =  1
Input =  [6.3 3.3 6.  2.5]  , Label =  2
Input =  [5.8 2.7 5.1 1.9]  , Label =  2
Input =  [7.1 3.  5.9 2.1]  , Label =  2
Input =  [6.3 2.9 5.6 1.8]  , Label =  2
Input =  [6.5 3.  5.8 2.2]  , Label =  2
Input =  [7.6 3.  6.6 2.1]  , Label =  2
Input =  [4.9 2.5 4.5 1.7]  , Label =  2
Input =  [7.3 2.9 6.3 1.8]  , Label =  2
Input =  [6.7 2.5 5.8 1.8]  , Label =  2
Input =  [7.2 3.6 6.1 2.5]  , Label =  2
Input =  [6.5 3.2 5.1 2. ]  , Label =  2
Input =  [6.4 2.7 5.3 1.9]  , Label =  2
Input =  [6.8 3.  5.5 2.1]  , Label =  2
Input =  [5.7 2.5 5.  2. ]  , Label =  2
Input =  [5.8 2.8 5.1 2.4]  , Label =  2
Input =  [6.4 3.2 5.3 2.3]  , Label =  2
Input =  [6.5 3.  5.5 1.8]  , Label =  2
Input =  [7.7 3.8 6.7 2.2]  , Label =  2
Input =  [7.7 2.6 6.9 2.3]  , Label =  2
Input =  [6.  2.2 5.  1.5]  , Label =  2
Input =  [6.9 3.2 5.7 2.3]  , Label =  2
Input =  [5.6 2.8 4.9 2. ]  , Label =  2
Input =  [7.7 2.8 6.7 2. ]  , Label =  2
Input =  [6.3 2.7 4.9 1.8]  , Label =  2
Input =  [6.7 3.3 5.7 2.1]  , Label =  2
Input =  [7.2 3.2 6.  1.8]  , Label =  2
Input =  [6.2 2.8 4.8 1.8]  , Label =  2
Input =  [6.1 3.  4.9 1.8]  , Label =  2
Input =  [6.4 2.8 5.6 2.1]  , Label =  2
Input =  [7.2 3.  5.8 1.6]  , Label =  2
Input =  [7.4 2.8 6.1 1.9]  , Label =  2
Input =  [7.9 3.8 6.4 2. ]  , Label =  2
Input =  [6.4 2.8 5.6 2.2]  , Label =  2
Input =  [6.3 2.8 5.1 1.5]  , Label =  2
Input =  [6.1 2.6 5.6 1.4]  , Label =  2
Input =  [7.7 3.  6.1 2.3]  , Label =  2
Input =  [6.3 3.4 5.6 2.4]  , Label =  2
Input =  [6.4 3.1 5.5 1.8]  , Label =  2
Input =  [6.  3.  4.8 1.8]  , Label =  2
Input =  [6.9 3.1 5.4 2.1]  , Label =  2
Input =  [6.7 3.1 5.6 2.4]  , Label =  2
Input =  [6.9 3.1 5.1 2.3]  , Label =  2
Input =  [5.8 2.7 5.1 1.9]  , Label =  2
Input =  [6.8 3.2 5.9 2.3]  , Label =  2
Input =  [6.7 3.3 5.7 2.5]  , Label =  2
Input =  [6.7 3.  5.2 2.3]  , Label =  2
Input =  [6.3 2.5 5.  1.9]  , Label =  2
Input =  [6.5 3.  5.2 2. ]  , Label =  2
Input =  [6.2 3.4 5.4 2.3]  , Label =  2
Input =  [5.9 3.  5.1 1.8]  , Label =  2


from sklearn.model_selection import train_test_split

# Split dataset into training and test part
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.3)

# Learn a Decision Tree
from sklearn import tree
clf = tree.DecisionTreeClassifier(criterion='entropy', splitter='best', max_depth=5, 
                                  min_samples_split=2, min_samples_leaf=1, 
                                  min_weight_fraction_leaf=0.0, max_features=None, 
                                  random_state=None, max_leaf_nodes=None, 
                                  min_impurity_split=1e-07, class_weight=None, presort=False)
clf = clf.fit(X_train, y_train)

# Graphical view of learnt Decision Tree
tree.plot_tree(clf) 

# Evaluate acuracy on test data
print(clf)
score = clf.score(X_test, y_test)
print("Acuracy (on test set) = ", score)
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
y_true, y_pred = y_test, clf.predict(X_test)
print( classification_report(y_true, y_pred) )
print("\n CONFUSION MATRIX")
print( confusion_matrix(y_true, y_pred) )

C:\Users\fabien\anaconda3_2020-07\envs\envML2020\lib\site-packages\sklearn\tree\_classes.py:310: FutureWarning: The min_impurity_split parameter is deprecated. Its default value has changed from 1e-7 to 0 in version 0.23, and it will be removed in 0.25. Use the min_impurity_decrease parameter instead.
  FutureWarning)
C:\Users\fabien\anaconda3_2020-07\envs\envML2020\lib\site-packages\sklearn\tree\_classes.py:327: FutureWarning: The parameter 'presort' is deprecated and has no effect. It will be removed in v0.24. You can suppress this warning by not passing any value to the 'presort' parameter.
  FutureWarning)

DecisionTreeClassifier(criterion='entropy', max_depth=5,
                       min_impurity_split=1e-07, presort=False)
Acuracy (on test set) =  0.9111111111111111
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        20
           1       0.85      0.85      0.85        13
           2       0.83      0.83      0.83        12

    accuracy                           0.91        45
   macro avg       0.89      0.89      0.89        45
weighted avg       0.91      0.91      0.91        45


 CONFUSION MATRIX
[[20  0  0]
 [ 0 11  2]
 [ 0  2 10]]


from sklearn.datasets import load_digits
digits = load_digits()
n_samples = len(digits.images)
print("Number_of-examples = ", n_samples)

import matplotlib.pyplot as plt
print("\n Plot of first example")
plt.gray() 
plt.matshow(digits.images[0]) 
plt.show() 

# Flatten the images, to turn data in a (samples, feature) matrix:
data = digits.images.reshape((n_samples, -1))

# Split dataset into training and test part
X = data
y = digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

# Create and train a Decision Tree Classifier
clf = tree.DecisionTreeClassifier(criterion='gini', splitter='best', max_depth=5, 
                                  min_samples_split=4, min_samples_leaf=1, 
                                  min_weight_fraction_leaf=0.0, max_features=None, 
                                  random_state=None, max_leaf_nodes=None, 
                                  min_impurity_split=1e-07, class_weight=None, presort=False)
clf = clf.fit(X_train, y_train)


# Evaluate acuracy on test data
print(clf)
score = clf.score(X_test, y_test)
print("Acuracy (on test set) = ", score)
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
y_true, y_pred = y_test, clf.predict(X_test)
print( classification_report(y_true, y_pred) )
print("\n CONFUSION MATRIX")
print( confusion_matrix(y_true, y_pred) )

Number_of-examples =  1797

 Plot of first example

<Figure size 432x288 with 0 Axes>

DecisionTreeClassifier(max_depth=5, min_impurity_split=1e-07,
                       min_samples_split=4, presort=False)
Acuracy (on test set) =  0.728587319243604
              precision    recall  f1-score   support

           0       0.99      0.90      0.94        91
           1       0.65      0.57      0.61        89
           2       0.72      0.80      0.76        74
           3       0.77      0.70      0.74        91
           4       0.75      0.77      0.76        96
           5       0.94      0.80      0.86        90
           6       0.97      0.79      0.88        97
           7       0.97      0.77      0.86        92
           8       0.37      0.28      0.32        93
           9       0.45      0.92      0.61        86

    accuracy                           0.73       899
   macro avg       0.76      0.73      0.73       899
weighted avg       0.76      0.73      0.73       899


 CONFUSION MATRIX
[[82  0  0  0  4  3  0  0  0  2]
 [ 0 51  4  6  6  0  0  0 18  4]
 [ 1  3 59  1  2  0  0  0  7  1]
 [ 0  2  3 64  0  1  0  2  6 13]
 [ 0  6  1  0 74  0  0  0  2 13]
 [ 0  3  1  0  5 72  0  0  3  6]
 [ 0  1  5  0  7  0 77  0  7  0]
 [ 0  4  3  0  1  0  0 71  2 11]
 [ 0  6  5  9  0  0  2  0 26 45]
 [ 0  2  1  3  0  1  0  0  0 79]]

C:\Users\fabien\anaconda3_2020-07\envs\envML2020\lib\site-packages\sklearn\tree\_classes.py:310: FutureWarning: The min_impurity_split parameter is deprecated. Its default value has changed from 1e-7 to 0 in version 0.23, and it will be removed in 0.25. Use the min_impurity_decrease parameter instead.
  FutureWarning)
C:\Users\fabien\anaconda3_2020-07\envs\envML2020\lib\site-packages\sklearn\tree\_classes.py:327: FutureWarning: The parameter 'presort' is deprecated and has no effect. It will be removed in v0.24. You can suppress this warning by not passing any value to the 'presort' parameter.
  FutureWarning)


from sklearn.ensemble import RandomForestClassifier

# Create and train a Random Forest classifier
clf = RandomForestClassifier(n_estimators=10, criterion='gini', max_depth=None,
                             min_samples_split=2, min_samples_leaf=1, 
                             min_weight_fraction_leaf=0.0, max_features='auto', 
                             max_leaf_nodes=None, min_impurity_split=1e-07, bootstrap=True, 
                             oob_score=False, n_jobs=1, random_state=None, 
                             verbose=0, warm_start=False, class_weight=None)
clf = clf.fit(X_train, y_train)
print("n_estimators=", clf.n_estimators, " max_depth=",clf.max_depth,
      "max_features=", clf.max_features)

# Evaluate acuracy on test data
print(clf)
score = clf.score(X_test, y_test)
print("Acuracy (on test set) = ", score)
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
y_true, y_pred = y_test, clf.predict(X_test)
print( classification_report(y_true, y_pred) )
print("\n CONFUSION MATRIX")
print( confusion_matrix(y_true, y_pred) )

C:\Users\fabien\anaconda3_2020-07\envs\envML2020\lib\site-packages\sklearn\tree\_classes.py:310: FutureWarning: The min_impurity_split parameter is deprecated. Its default value has changed from 1e-7 to 0 in version 0.23, and it will be removed in 0.25. Use the min_impurity_decrease parameter instead.
  FutureWarning)
C:\Users\fabien\anaconda3_2020-07\envs\envML2020\lib\site-packages\sklearn\tree\_classes.py:310: FutureWarning: The min_impurity_split parameter is deprecated. Its default value has changed from 1e-7 to 0 in version 0.23, and it will be removed in 0.25. Use the min_impurity_decrease parameter instead.
  FutureWarning)
C:\Users\fabien\anaconda3_2020-07\envs\envML2020\lib\site-packages\sklearn\tree\_classes.py:310: FutureWarning: The min_impurity_split parameter is deprecated. Its default value has changed from 1e-7 to 0 in version 0.23, and it will be removed in 0.25. Use the min_impurity_decrease parameter instead.
  FutureWarning)
C:\Users\fabien\anaconda3_2020-07\envs\envML2020\lib\site-packages\sklearn\tree\_classes.py:310: FutureWarning: The min_impurity_split parameter is deprecated. Its default value has changed from 1e-7 to 0 in version 0.23, and it will be removed in 0.25. Use the min_impurity_decrease parameter instead.
  FutureWarning)
C:\Users\fabien\anaconda3_2020-07\envs\envML2020\lib\site-packages\sklearn\tree\_classes.py:310: FutureWarning: The min_impurity_split parameter is deprecated. Its default value has changed from 1e-7 to 0 in version 0.23, and it will be removed in 0.25. Use the min_impurity_decrease parameter instead.
  FutureWarning)
C:\Users\fabien\anaconda3_2020-07\envs\envML2020\lib\site-packages\sklearn\tree\_classes.py:310: FutureWarning: The min_impurity_split parameter is deprecated. Its default value has changed from 1e-7 to 0 in version 0.23, and it will be removed in 0.25. Use the min_impurity_decrease parameter instead.
  FutureWarning)
C:\Users\fabien\anaconda3_2020-07\envs\envML2020\lib\site-packages\sklearn\tree\_classes.py:310: FutureWarning: The min_impurity_split parameter is deprecated. Its default value has changed from 1e-7 to 0 in version 0.23, and it will be removed in 0.25. Use the min_impurity_decrease parameter instead.
  FutureWarning)
C:\Users\fabien\anaconda3_2020-07\envs\envML2020\lib\site-packages\sklearn\tree\_classes.py:310: FutureWarning: The min_impurity_split parameter is deprecated. Its default value has changed from 1e-7 to 0 in version 0.23, and it will be removed in 0.25. Use the min_impurity_decrease parameter instead.
  FutureWarning)
C:\Users\fabien\anaconda3_2020-07\envs\envML2020\lib\site-packages\sklearn\tree\_classes.py:310: FutureWarning: The min_impurity_split parameter is deprecated. Its default value has changed from 1e-7 to 0 in version 0.23, and it will be removed in 0.25. Use the min_impurity_decrease parameter instead.
  FutureWarning)
C:\Users\fabien\anaconda3_2020-07\envs\envML2020\lib\site-packages\sklearn\tree\_classes.py:310: FutureWarning: The min_impurity_split parameter is deprecated. Its default value has changed from 1e-7 to 0 in version 0.23, and it will be removed in 0.25. Use the min_impurity_decrease parameter instead.
  FutureWarning)

n_estimators= 10  max_depth= None max_features= auto
RandomForestClassifier(min_impurity_split=1e-07, n_estimators=10, n_jobs=1)
Acuracy (on test set) =  0.9399332591768632
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        91
           1       0.89      0.96      0.92        89
           2       0.90      0.99      0.94        74
           3       0.94      0.89      0.92        91
           4       0.97      0.97      0.97        96
           5       0.95      0.98      0.96        90
           6       0.99      0.98      0.98        97
           7       0.96      0.93      0.95        92
           8       0.86      0.88      0.87        93
           9       0.95      0.83      0.88        86

    accuracy                           0.94       899
   macro avg       0.94      0.94      0.94       899
weighted avg       0.94      0.94      0.94       899


 CONFUSION MATRIX
[[91  0  0  0  0  0  0  0  0  0]
 [ 0 85  2  0  0  0  0  0  2  0]
 [ 0  1 73  0  0  0  0  0  0  0]
 [ 0  2  3 81  0  0  0  0  3  2]
 [ 0  1  0  0 93  0  1  1  0  0]
 [ 0  1  0  0  0 88  0  0  1  0]
 [ 0  1  0  0  0  0 95  0  1  0]
 [ 0  1  1  0  3  1  0 86  0  0]
 [ 0  2  2  2  0  1  0  2 82  2]
 [ 0  2  0  3  0  3  0  1  6 71]]


from sklearn.ensemble import AdaBoostClassifier

# Create and train an adaBoost classifier using SMALL Decision Trees as weak classifiers
weak_learner = tree.DecisionTreeClassifier(max_depth=6)
clf = AdaBoostClassifier(weak_learner, n_estimators=15, learning_rate=1.0, algorithm='SAMME', 
                         random_state=None)
clf = clf.fit(X_train, y_train)
print("Weak_learner:", clf.base_estimator)
print("Weights of weak classifiers: ", clf.estimator_weights_)
      
# Plot training curves (error = f(iterations))
n_iter = clf.n_estimators
from sklearn.metrics import zero_one_loss
ada_train_err = np.zeros((clf.n_estimators,))
for i, y_pred in enumerate(clf.staged_predict(X_train)):
    ada_train_err[i] = zero_one_loss(y_pred, y_train)
ada_test_err = np.zeros((clf.n_estimators,))
for i, y_pred in enumerate(clf.staged_predict(X_test)):
    ada_test_err[i] = zero_one_loss(y_pred, y_test)
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(np.arange(n_iter) + 1, ada_train_err,
        label='Training Error',
        color='green')
ax.plot(np.arange(n_iter) + 1, ada_test_err,
        label='Test Error',
        color='orange')
ax.set_ylim((0.0, 0.5))
ax.set_xlabel('boosting iterations')
ax.set_ylabel('error rate')
leg = ax.legend(loc='upper right', fancybox=True)
plt.show()

# Evaluate acuracy on test data
print("n_estimators=", clf.n_estimators)
score = clf.score(X_test, y_test)
print("Acuracy (on test set) = ", score)
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
y_true, y_pred = y_test, clf.predict(X_test)
print( classification_report(y_true, y_pred) )
print("\n CONFUSION MATRIX")
print( confusion_matrix(y_true, y_pred) )

Weak_learner: DecisionTreeClassifier(max_depth=6)
Weights of weak classifiers:  [4.1662423  5.41474306 4.7238877  4.45785944 4.16955192 4.55695778
 5.33768772 5.42409322 5.7337812  4.21523304 4.44342062 4.90185404
 4.98322751 5.54716878 4.83245137]

n_estimators= 15
Acuracy (on test set) =  0.9210233592880979
              precision    recall  f1-score   support

           0       0.99      0.98      0.98        91
           1       0.86      0.96      0.90        89
           2       0.96      0.97      0.97        74
           3       0.97      0.81      0.89        91
           4       0.97      0.93      0.95        96
           5       0.93      0.94      0.94        90
           6       0.93      0.93      0.93        97
           7       0.93      0.97      0.95        92
           8       0.85      0.85      0.85        93
           9       0.84      0.88      0.86        86

    accuracy                           0.92       899
   macro avg       0.92      0.92      0.92       899
weighted avg       0.92      0.92      0.92       899


 CONFUSION MATRIX
[[89  0  0  0  0  0  2  0  0  0]
 [ 0 85  0  0  0  0  0  0  0  4]
 [ 1  0 72  0  0  0  0  0  1  0]
 [ 0  1  0 74  0  3  1  2  6  4]
 [ 0  3  0  0 89  0  3  1  0  0]
 [ 0  1  0  0  1 85  0  0  2  1]
 [ 0  4  1  0  1  0 90  0  1  0]
 [ 0  0  0  0  1  0  0 89  0  2]
 [ 0  3  2  1  0  2  1  2 79  3]
 [ 0  2  0  1  0  1  0  2  4 76]]

Training a Decision Tree or a Random Forest on a classification problem, and compare the latter with using adaBoost¶

1. Decision Trees with SciKit-Learn on a very simple dataset¶

2. Decision Trees on a MORE REALISTIC DATASET: HANDWRITTEN DIGITS¶

3. Building, training and evaluating a Random Forest classifier¶

3. Building, training and evaluating an AdaBoost classifier¶