Multi-class, Multi-label, Ordinal Classification With Sklearn
Solution 1:
This may not be the precise answer you're looking for, this article outlines a technique as follows:
We can take advantage of the ordered class value by transforming a k-class ordinal regression problem to a k-1 binary classification problem, we convert an ordinal attribute A* with ordinal value V1, V2, V3, … Vk into k-1 binary attributes, one for each of the original attribute’s first k − 1 values. The ith binary attribute represents the test A* > Vi
Essentially, aggregate multiple binary classifiers (predict target > 1, target > 2, target > 3, target > 4) to be able to predict whether a target is 1, 2, 3, 4 or 5. The author creates an OrdinalClassifier class that stores multiple binary classifiers in a Python dictionary.
class OrdinalClassifier():
def __init__(self, clf):
self.clf = clf
self.clfs = {}
def fit(self, X, y):
self.unique_class = np.sort(np.unique(y))
if self.unique_class.shape[0] > 2:
for i in range(self.unique_class.shape[0]-1):
# for each k - 1 ordinal value we fit a binary classification problem
binary_y = (y > self.unique_class[i]).astype(np.uint8)
clf = clone(self.clf)
clf.fit(X, binary_y)
self.clfs[i] = clf
def predict_proba(self, X):
clfs_predict = {k:self.clfs[k].predict_proba(X) for k in self.clfs}
predicted = []
for i,y in enumerate(self.unique_class):
if i == 0:
# V1 = 1 - Pr(y > V1)
predicted.append(1 - clfs_predict[y][:,1])
elif y in clfs_predict:
# Vi = Pr(y > Vi-1) - Pr(y > Vi)
predicted.append(clfs_predict[y-1][:,1] - clfs_predict[y][:,1])
else:
# Vk = Pr(y > Vk-1)
predicted.append(clfs_predict[y-1][:,1])
return np.vstack(predicted).T
def predict(self, X):
return np.argmax(self.predict_proba(X), axis=1)
The technique originates in A Simple Approach to Ordinal Classification
Solution 2:
Here is an example using KNN that should be tuneable in an sklearn pipeline or grid search.
from sklearn.neighbors import KNeighborsClassifier
from sklearn.base import clone, BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_X_y, check_is_fitted, check_array
from sklearn.utils.multiclass import check_classification_targets
class KNeighborsOrdinalClassifier(BaseEstimator, ClassifierMixin):
def __init__(self, n_neighbors=5, *, weights='uniform',
algorithm='auto', leaf_size=30, p=2,
metric='minkowski', metric_params=None, n_jobs=None):
self.n_neighbors = n_neighbors
self.weights = weights
self.algorithm = algorithm
self.leaf_size = leaf_size
self.p = p
self.metric = metric
self.metric_params = metric_params
self.n_jobs = n_jobs
def fit(self, X, y):
X, y = check_X_y(X, y)
check_classification_targets(y)
self.clf_ = KNeighborsClassifier(**self.get_params())
self.clfs_ = {}
self.classes_ = np.sort(np.unique(y))
if self.classes_.shape[0] > 2:
for i in range(self.classes_.shape[0]-1):
# for each k - 1 ordinal value we fit a binary classification problem
binary_y = (y > self.classes_[i]).astype(np.uint8)
clf = clone(self.clf_)
clf.fit(X, binary_y)
self.clfs_[i] = clf
return self
def predict_proba(self, X):
X = check_array(X)
check_is_fitted(self, ['classes_', 'clf_', 'clfs_'])
clfs_predict = {k:self.clfs_[k].predict_proba(X) for k in self.clfs_}
predicted = []
for i,y in enumerate(self.classes_):
if i == 0:
# V1 = 1 - Pr(y > V1)
predicted.append(1 - clfs_predict[y][:,1])
elif y in clfs_predict:
# Vi = Pr(y > Vi-1) - Pr(y > Vi)
predicted.append(clfs_predict[y-1][:,1] - clfs_predict[y][:,1])
else:
# Vk = Pr(y > Vk-1)
predicted.append(clfs_predict[y-1][:,1])
return np.vstack(predicted).T
def predict(self, X):
X = check_array(X)
check_is_fitted(self, ['classes_', 'clf_', 'clfs_'])
return np.argmax(self.predict_proba(X), axis=1)
Post a Comment for "Multi-class, Multi-label, Ordinal Classification With Sklearn"