diff --git a/src/capymoa/classifier/__init__.py b/src/capymoa/classifier/__init__.py index 56bfd191..f9b0bad6 100644 --- a/src/capymoa/classifier/__init__.py +++ b/src/capymoa/classifier/__init__.py @@ -18,6 +18,7 @@ from ._samknn import SAMkNN from ._dynamic_weighted_majority import DynamicWeightedMajority from ._csmote import CSMOTE +from ._rwknn import RWkNN __all__ = [ "AdaptiveRandomForestClassifier", @@ -39,5 +40,6 @@ "HoeffdingAdaptiveTree", "SAMkNN", "DynamicWeightedMajority", - "CSMOTE" + "CSMOTE", + "RWkNN" ] diff --git a/src/capymoa/classifier/_rwknn.py b/src/capymoa/classifier/_rwknn.py new file mode 100644 index 00000000..ee9389e4 --- /dev/null +++ b/src/capymoa/classifier/_rwknn.py @@ -0,0 +1,72 @@ +from capymoa.base import MOAClassifier +from moa.classifiers.lazy import RW_kNN as _MOA_RWkNN +from capymoa.stream import Schema +from capymoa._utils import build_cli_str_from_mapping_and_locals + + +class RWkNN(MOAClassifier): + """RwkNN + + Reference: + + 'Incremental k-Nearest Neighbors Using Reservoir Sampling for Data Streams + Maroua Bahri, Albert Bifet + Discovery Science: 24th International Conference, 2021 + `_ + + Example usages: + + >>> from capymoa.datasets import ElectricityTiny + >>> from capymoa.classifier import RWkNN + >>> from capymoa.evaluation import prequential_evaluation + >>> stream = ElectricityTiny() + >>> schema = stream.get_schema() + >>> learner = RWkNN(schema) + >>> results = prequential_evaluation(stream, learner, max_instances=1000) + >>> results["cumulative"].accuracy() + 81.8 + """ + + + def __init__( + self, + schema: Schema, + k: int = 5, + limitW: int = 500, + limitR: int = 500, + nearest_neighbor_search: str = "LinearNN" + ): + + """ RW KNN Classifier + + :param schema: The schema of the stream. + :param k: The number of neighbors. + :param limit_w: The maximum number of instances to store in the window. + :param limit_r: The maximum number of instances to store in the reservoir. + :param nearest_neighbor_search: Nearest Neighbour Search to use. + """ + + self.nearest_neighbor_search = nearest_neighbor_search + if isinstance(self.nearest_neighbor_search, str) and (nearest_neighbor_search == "LinearNN" or nearest_neighbor_search == "KDTree"): + self.nearest_neighbor_search = nearest_neighbor_search + else: + # Raise an exception with information about valid options for max_features + raise ValueError("Invalid value for nearest_neighbor_search. Valid options: LinearNN, KDTree") + + + mapping = { + "k": "-k", + "limitW": "-w", + "limitR": "-r", + "nearest_neighbor_search": "-n", + } + + + config_str = build_cli_str_from_mapping_and_locals(mapping, locals()) + self.moa_learner = _MOA_RWkNN() + super(RWkNN, self).__init__( + schema=schema, + CLI=config_str, + moa_learner=self.moa_learner, + ) + diff --git a/tests/test_classifiers.py b/tests/test_classifiers.py index 73492593..c9e463a5 100644 --- a/tests/test_classifiers.py +++ b/tests/test_classifiers.py @@ -18,7 +18,9 @@ CSMOTE, LeveragingBagging, OnlineAdwinBagging, + RWkNN ) + from capymoa.base import Classifier from capymoa.base import MOAClassifier from capymoa.datasets import ElectricityTiny @@ -62,6 +64,7 @@ (partial(CSMOTE), 80.55, 79.0, None), (partial(LeveragingBagging), 86.7, 91.0, None), (partial(OnlineAdwinBagging), 85.25, 92.0, None), + (partial(RWkNN), 85.25, 92.0, None), ], ids=[ "OnlineBagging", @@ -77,6 +80,7 @@ "OzaBoost", "MajorityClass", "NoChange", + "OnlineAccuracyUpdatedEnsemble", "OnlineSmoothBoost", "StreamingRandomPatches", "HoeffdingAdaptiveTree", @@ -85,6 +89,7 @@ "CSMOTE", "LeveragingBagging", "OnlineAdwinBagging", + "RWkNN" ], ) def test_classifiers(