Skip to content

Commit 9d3ea50

Browse files
committed
ENH Finalize large classification method
No longer output a figure as the results as the results will instead be discussed at greater length in the text.
1 parent e4403a9 commit 9d3ea50

2 files changed

Lines changed: 110 additions & 123 deletions

File tree

ch10/figure18.py

Lines changed: 0 additions & 123 deletions
This file was deleted.

ch10/large_classification.py

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
# This code is supporting material for the book
2+
# Building Machine Learning Systems with Python
3+
# by Willi Richert and Luis Pedro Coelho
4+
# published by PACKT Publishing
5+
#
6+
# It is made available under the MIT License
7+
8+
from __future__ import print_function
9+
import mahotas as mh
10+
from glob import glob
11+
from sklearn import cross_validation
12+
from sklearn.linear_model import LogisticRegression
13+
from sklearn.pipeline import Pipeline
14+
from sklearn.preprocessing import StandardScaler
15+
from sklearn.grid_search import GridSearchCV
16+
import numpy as np
17+
18+
basedir = 'AnimTransDistr'
19+
print('This script will test classification of the AnimTransDistr dataset')
20+
21+
C_range = 10.0 ** np.arange(-4, 3)
22+
grid = GridSearchCV(LogisticRegression(), param_grid={'C' : C_range})
23+
clf = Pipeline([('preproc', StandardScaler()),
24+
('classifier', grid)])
25+
26+
def features_for(im):
27+
from features import color_histogram
28+
im = mh.imread(im)
29+
img = mh.colors.rgb2grey(im).astype(np.uint8)
30+
return np.concatenate([mh.features.haralick(img).ravel(),
31+
color_histogram(im)])
32+
33+
def images():
34+
'''Iterate over all (image,label) pairs
35+
36+
This function will return
37+
'''
38+
for ci, cl in enumerate(classes):
39+
images = glob('{}/{}/*.jpg'.format(basedir, cl))
40+
for im in sorted(images):
41+
yield im, ci
42+
43+
classes = [
44+
'Anims',
45+
'Cars',
46+
'Distras',
47+
'Trans',
48+
]
49+
50+
print('Computing whole-image texture features...')
51+
ifeatures = []
52+
labels = []
53+
for im, ell in images():
54+
ifeatures.append(features_for(im))
55+
labels.append(ell)
56+
57+
ifeatures = np.array(ifeatures)
58+
labels = np.array(labels)
59+
60+
cv = cross_validation.KFold(len(ifeatures), 5, shuffle=True, random_state=123)
61+
scores0 = cross_validation.cross_val_score(
62+
clf, ifeatures, labels, cv=cv)
63+
print('Accuracy (5 fold x-val) with Logistic Regression [image features]: {:.1%}'.format(
64+
scores0.mean()))
65+
66+
67+
from sklearn.cluster import KMeans
68+
from mahotas.features import surf
69+
70+
71+
print('Computing SURF descriptors...')
72+
alldescriptors = []
73+
for im,_ in images():
74+
im = mh.imread(im, as_grey=True)
75+
im = im.astype(np.uint8)
76+
77+
# To use dense sampling, you can try the following line:
78+
# alldescriptors.append(surf.dense(im, spacing=16))
79+
alldescriptors.append(surf.surf(im, descriptor_only=True))
80+
81+
print('Descriptor computation complete.')
82+
k = 256
83+
km = KMeans(k)
84+
85+
concatenated = np.concatenate(alldescriptors)
86+
print('Number of descriptors: {}'.format(
87+
len(concatenated)))
88+
concatenated = concatenated[::64]
89+
print('Clustering with K-means...')
90+
km.fit(concatenated)
91+
sfeatures = []
92+
for d in alldescriptors:
93+
c = km.predict(d)
94+
sfeatures.append(
95+
np.array([np.sum(c == i) for i in range(k)])
96+
)
97+
sfeatures = np.array(sfeatures, dtype=float)
98+
print('predicting...')
99+
score_SURF = cross_validation.cross_val_score(
100+
clf, sfeatures, labels, cv=cv).mean()
101+
print('Accuracy (5 fold x-val) with Logistic Regression [SURF features]: {:.1%}'.format(
102+
score_SURF.mean()))
103+
104+
105+
print('Performing classification with all features combined...')
106+
allfeatures = np.hstack([sfeatures, ifeatures])
107+
score_SURF_global = cross_validation.cross_val_score(
108+
clf, allfeatures, labels, cv=cv).mean()
109+
print('Accuracy (5 fold x-val) with Logistic Regression [All features]: {:.1%}'.format(
110+
score_SURF_global.mean()))

0 commit comments

Comments
 (0)