R/admissibleml.R
h2o.disparate_analysis.RdCreate a frame containing aggregations of intersectional fairness across the models.
h2o.disparate_analysis(
models,
newdata,
protected_columns,
reference,
favorable_class,
air_metric = "selectedRatio",
alpha = 0.05
)List of H2O Models
H2OFrame
List of categorical columns that contain sensitive information such as race, gender, age etc.
List of values corresponding to a reference for each protected columns. If set to NULL, it will use the biggest group as the reference.
Positive/favorable outcome class of the response.
Metric used for Adverse Impact Ratio calculation. Defaults to “selectedRatio“.
The alpha level is the probability of rejecting the null hypothesis that the protected group and the reference came from the same population when the null hypothesis is true.
frame containing aggregations of intersectional fairness across the models
if (FALSE) { # \dontrun{
library(h2o)
h2o.init()
data <- h2o.importFile(paste0("https://s3.amazonaws.com/h2o-public-test-data/smalldata/",
"admissibleml_test/taiwan_credit_card_uci.csv"))
x <- c('LIMIT_BAL', 'AGE', 'PAY_0', 'PAY_2', 'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6', 'BILL_AMT1',
'BILL_AMT2', 'BILL_AMT3', 'BILL_AMT4', 'BILL_AMT5', 'BILL_AMT6', 'PAY_AMT1', 'PAY_AMT2',
'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6')
y <- "default payment next month"
protected_columns <- c('SEX', 'EDUCATION')
for (col in c(y, protected_columns))
data[[col]] <- as.factor(data[[col]])
splits <- h2o.splitFrame(data, 0.8)
train <- splits[[1]]
test <- splits[[2]]
reference <- c(SEX = "1", EDUCATION = "2") # university educated man
favorable_class <- "0" # no default next month
aml <- h2o.automl(x, y, training_frame = train, max_models = 3)
h2o.disparate_analysis(aml, test, protected_columns = protected_columns,
reference = reference, favorable_class = favorable_class)
} # }