# Source code for asreview.balance_strategies.undersample

```# Copyright 2019 The ASReview Authors. All Rights Reserved.
#
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#
# Unless required by applicable law or agreed to in writing, software
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and

from math import ceil

import numpy as np

from asreview.balance_strategies.base import BaseBalance
from asreview.utils import get_random_state

[docs]class UndersampleBalance(BaseBalance):
"""Balancing class that undersamples the data with a given ratio.
"""
name = "undersample"

def __init__(self, ratio=1.0, random_state=None):
"""Initialize the undersampling balance strategy.

Arguments
---------
ratio: double
Undersampling ratio of the zero's. If for example we set a ratio of
0.25, we would sample only a quarter of the zeros and all the ones.
"""
super(UndersampleBalance, self).__init__()
self.ratio = ratio
self._random_state = get_random_state(random_state)

def sample(self, X, y, train_idx, shared):
one_ind = train_idx[np.where(y[train_idx] == 1)]
zero_ind = train_idx[np.where(y[train_idx] == 0)]

n_one = len(one_ind)
n_zero = len(zero_ind)

# If we don't have an excess of 0's, give back all training_samples.
if n_one / n_zero >= self.ratio:
shuf_ind = np.append(one_ind, zero_ind)
else:
n_zero_epoch = ceil(n_one / self.ratio)
zero_under = self._random_state.choice(
np.arange(n_zero), n_zero_epoch, replace=False)
shuf_ind = np.append(one_ind, zero_ind[zero_under])

self._random_state.shuffle(shuf_ind)
return X[shuf_ind], y[shuf_ind]

def full_hyper_space(self):
from hyperopt import hp
parameter_space = {
"bal_ratio": hp.lognormal('bal_ratio', 0, 1),
}
return parameter_space, {}
```