Skip to content

Commit b0636c4

Browse files
committed
ENH Use color histograms with SVMs+Grid Search
This achieves better classification than the Sobel example, while also being easier to explain.
1 parent 8209239 commit b0636c4

2 files changed

Lines changed: 48 additions & 9 deletions

File tree

ch10/README.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ features.py
3333
Contains the ``edginess_sobel`` function from the book as well as a simple
3434
wrapper around ``mahotas.texture.haralick``
3535
simple_classification.py
36-
Classify SimpleImageDataset with texture features + sobel feature
36+
Classify SimpleImageDataset with texture features + color histogram features
3737
figure18.py
3838
Classify ``AnimTransDistr`` with both texture and SURF features.
3939

ch10/simple_classification.py

Lines changed: 47 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,23 @@
77

88
import mahotas as mh
99
from sklearn import cross_validation
10-
from sklearn.linear_model.logistic import LogisticRegression
10+
from sklearn.pipeline import Pipeline
11+
from sklearn.preprocessing import StandardScaler
12+
from sklearn.svm import SVC
13+
from sklearn.grid_search import GridSearchCV
14+
15+
1116
import numpy as np
1217
from glob import glob
13-
from features import texture, edginess_sobel
18+
from features import texture, edginess_sobel, color_histogram
1419

1520
basedir = '../SimpleImageDataset/'
1621

1722

1823
haralicks = []
1924
sobels = []
2025
labels = []
26+
chists = []
2127

2228
print('This script will test (with cross-validation) classification of the simple 3 class dataset')
2329
print('Computing features...')
@@ -29,8 +35,10 @@
2935
# ordering that the filesystem uses
3036
for fname in sorted(images):
3137
im = mh.imread(fname, as_grey=True)
38+
imc = mh.imread(fname)
3239
haralicks.append(texture(im))
3340
sobels.append(edginess_sobel(im))
41+
chists.append(color_histogram(imc))
3442

3543
# Files are named like building00.jpg, scene23.jpg...
3644
labels.append(fname[:-len('xx.jpg')])
@@ -40,25 +48,56 @@
4048
haralicks = np.array(haralicks)
4149
sobels = np.array(sobels)
4250
labels = np.array(labels)
51+
chists = np.array(chists)
52+
53+
haralick_plus_sobel = np.hstack([np.atleast_2d(sobels).T, haralicks])
54+
haralick_plus_chists = np.hstack([chists, haralicks])
55+
haralick_plus_chists_plus_sobel = np.hstack([chists, haralicks, np.atleast_2d(sobels).T])
4356

44-
# We use logistic regression because it is very fast.
57+
cv=cross_validation.LeaveOneOut(len(images))
58+
59+
# We use SVM because it achieves high accuracy on small(ish) datasets
4560
# Feel free to experiment with other classifiers
61+
C_range = 10.0 ** np.arange(-2, 9)
62+
gamma_range = 10.0 ** np.arange(-5, 4)
63+
param_grid = dict(gamma=gamma_range, C=C_range)
64+
grid = GridSearchCV(SVC(), param_grid=param_grid)
65+
66+
p = Pipeline([('preproc', StandardScaler()),
67+
('classifier', grid)])
68+
69+
scores = cross_validation.cross_val_score(
70+
p, haralicks, labels, cv=cv)
71+
print('Accuracy (5 fold x-val) with Logistic Regrssion [haralick features]: {}%'.format(
72+
0.1 * round(1000 * scores.mean())))
73+
74+
scores = cross_validation.cross_val_score(
75+
p, chists, labels, cv=cv)
76+
print('Accuracy (5 fold x-val) with Logistic Regrssion [color histograms]: {}%'.format(
77+
0.1 * round(1000 * scores.mean())))
78+
4679
scores = cross_validation.cross_val_score(
47-
LogisticRegression(), haralicks, labels, cv=5)
48-
print('Accuracy (5 fold x-val) with Logistic Regrssion [std features]: {}%'.format(
80+
p, haralick_plus_chists, labels, cv=cv)
81+
print('Accuracy (5 fold x-val) with Logistic Regrssion [texture features + chists]: {}%'.format(
4982
0.1 * round(1000 * scores.mean())))
5083

5184
haralick_plus_sobel = np.hstack([np.atleast_2d(sobels).T, haralicks])
5285
scores = cross_validation.cross_val_score(
53-
LogisticRegression(), haralick_plus_sobel, labels, cv=5).mean()
54-
print('Accuracy (5 fold x-val) with Logistic Regrssion [std features + sobel]: {}%'.format(
86+
p, haralick_plus_sobel, labels, cv=cv)
87+
print('Accuracy (5 fold x-val) with Logistic Regrssion [texture features + sobel]: {}%'.format(
88+
0.1 * round(1000 * scores.mean())))
89+
90+
haralick_plus_chists_plus_sobel = np.hstack([np.atleast_2d(sobels).T, haralicks])
91+
scores = cross_validation.cross_val_score(
92+
p, haralick_plus_chists_plus_sobel, labels, cv=cv)
93+
print('Accuracy (5 fold x-val) with Logistic Regrssion [texture features + color histograms + sobel]: {}%'.format(
5594
0.1 * round(1000 * scores.mean())))
5695

5796

5897
# We can try to just use the sobel feature. The result is almost completely
5998
# random.
6099
scores = cross_validation.cross_val_score(
61-
LogisticRegression(), np.atleast_2d(sobels).T, labels, cv=5).mean()
100+
p, np.atleast_2d(sobels).T, labels, cv=cv).mean()
62101
print('Accuracy (5 fold x-val) with Logistic Regrssion [only using sobel feature]: {}%'.format(
63102
0.1 * round(1000 * scores.mean())))
64103

0 commit comments

Comments
 (0)