|
7 | 7 |
|
8 | 8 | import mahotas as mh |
9 | 9 | from sklearn import cross_validation |
10 | | -from sklearn.linear_model.logistic import LogisticRegression |
| 10 | +from sklearn.pipeline import Pipeline |
| 11 | +from sklearn.preprocessing import StandardScaler |
| 12 | +from sklearn.svm import SVC |
| 13 | +from sklearn.grid_search import GridSearchCV |
| 14 | + |
| 15 | + |
11 | 16 | import numpy as np |
12 | 17 | from glob import glob |
13 | | -from features import texture, edginess_sobel |
| 18 | +from features import texture, edginess_sobel, color_histogram |
14 | 19 |
|
15 | 20 | basedir = '../SimpleImageDataset/' |
16 | 21 |
|
17 | 22 |
|
18 | 23 | haralicks = [] |
19 | 24 | sobels = [] |
20 | 25 | labels = [] |
| 26 | +chists = [] |
21 | 27 |
|
22 | 28 | print('This script will test (with cross-validation) classification of the simple 3 class dataset') |
23 | 29 | print('Computing features...') |
|
29 | 35 | # ordering that the filesystem uses |
30 | 36 | for fname in sorted(images): |
31 | 37 | im = mh.imread(fname, as_grey=True) |
| 38 | + imc = mh.imread(fname) |
32 | 39 | haralicks.append(texture(im)) |
33 | 40 | sobels.append(edginess_sobel(im)) |
| 41 | + chists.append(color_histogram(imc)) |
34 | 42 |
|
35 | 43 | # Files are named like building00.jpg, scene23.jpg... |
36 | 44 | labels.append(fname[:-len('xx.jpg')]) |
|
40 | 48 | haralicks = np.array(haralicks) |
41 | 49 | sobels = np.array(sobels) |
42 | 50 | labels = np.array(labels) |
| 51 | +chists = np.array(chists) |
| 52 | + |
| 53 | +haralick_plus_sobel = np.hstack([np.atleast_2d(sobels).T, haralicks]) |
| 54 | +haralick_plus_chists = np.hstack([chists, haralicks]) |
| 55 | +haralick_plus_chists_plus_sobel = np.hstack([chists, haralicks, np.atleast_2d(sobels).T]) |
43 | 56 |
|
44 | | -# We use logistic regression because it is very fast. |
| 57 | +cv=cross_validation.LeaveOneOut(len(images)) |
| 58 | + |
| 59 | +# We use SVM because it achieves high accuracy on small(ish) datasets |
45 | 60 | # Feel free to experiment with other classifiers |
| 61 | +C_range = 10.0 ** np.arange(-2, 9) |
| 62 | +gamma_range = 10.0 ** np.arange(-5, 4) |
| 63 | +param_grid = dict(gamma=gamma_range, C=C_range) |
| 64 | +grid = GridSearchCV(SVC(), param_grid=param_grid) |
| 65 | + |
| 66 | +p = Pipeline([('preproc', StandardScaler()), |
| 67 | + ('classifier', grid)]) |
| 68 | + |
| 69 | +scores = cross_validation.cross_val_score( |
| 70 | + p, haralicks, labels, cv=cv) |
| 71 | +print('Accuracy (5 fold x-val) with Logistic Regrssion [haralick features]: {}%'.format( |
| 72 | + 0.1 * round(1000 * scores.mean()))) |
| 73 | + |
| 74 | +scores = cross_validation.cross_val_score( |
| 75 | + p, chists, labels, cv=cv) |
| 76 | +print('Accuracy (5 fold x-val) with Logistic Regrssion [color histograms]: {}%'.format( |
| 77 | + 0.1 * round(1000 * scores.mean()))) |
| 78 | + |
46 | 79 | scores = cross_validation.cross_val_score( |
47 | | - LogisticRegression(), haralicks, labels, cv=5) |
48 | | -print('Accuracy (5 fold x-val) with Logistic Regrssion [std features]: {}%'.format( |
| 80 | + p, haralick_plus_chists, labels, cv=cv) |
| 81 | +print('Accuracy (5 fold x-val) with Logistic Regrssion [texture features + chists]: {}%'.format( |
49 | 82 | 0.1 * round(1000 * scores.mean()))) |
50 | 83 |
|
51 | 84 | haralick_plus_sobel = np.hstack([np.atleast_2d(sobels).T, haralicks]) |
52 | 85 | scores = cross_validation.cross_val_score( |
53 | | - LogisticRegression(), haralick_plus_sobel, labels, cv=5).mean() |
54 | | -print('Accuracy (5 fold x-val) with Logistic Regrssion [std features + sobel]: {}%'.format( |
| 86 | + p, haralick_plus_sobel, labels, cv=cv) |
| 87 | +print('Accuracy (5 fold x-val) with Logistic Regrssion [texture features + sobel]: {}%'.format( |
| 88 | + 0.1 * round(1000 * scores.mean()))) |
| 89 | + |
| 90 | +haralick_plus_chists_plus_sobel = np.hstack([np.atleast_2d(sobels).T, haralicks]) |
| 91 | +scores = cross_validation.cross_val_score( |
| 92 | + p, haralick_plus_chists_plus_sobel, labels, cv=cv) |
| 93 | +print('Accuracy (5 fold x-val) with Logistic Regrssion [texture features + color histograms + sobel]: {}%'.format( |
55 | 94 | 0.1 * round(1000 * scores.mean()))) |
56 | 95 |
|
57 | 96 |
|
58 | 97 | # We can try to just use the sobel feature. The result is almost completely |
59 | 98 | # random. |
60 | 99 | scores = cross_validation.cross_val_score( |
61 | | - LogisticRegression(), np.atleast_2d(sobels).T, labels, cv=5).mean() |
| 100 | + p, np.atleast_2d(sobels).T, labels, cv=cv).mean() |
62 | 101 | print('Accuracy (5 fold x-val) with Logistic Regrssion [only using sobel feature]: {}%'.format( |
63 | 102 | 0.1 * round(1000 * scores.mean()))) |
64 | 103 |
|
0 commit comments