I am trying to create binary classifier using BERT’s RuBert.
Here is my config file:
model = {
"dataset_reader": {
"class_name": "basic_classification_reader",
"x": "title",
"y": "label",
"data_path": "/content/drive/MyDrive/AI/datasets/headers_news/splitted/",
"train": "train.csv",
"test": "test.csv",
"valid": "validation.csv"
},
"dataset_iterator": {
"class_name": "basic_classification_iterator",
"seed": 42,
"shuffle": True
},
"chainer": {
"in": [
"x"
],
"in_y": [
"y"
],
"pipe": [
{
"class_name": "bert_preprocessor",
"vocab_file": "/content/drive/MyDrive/AI/config/rubert_cased_L-12_H-768_A-12_v2/vocab.txt",
"do_lower_case": False,
"max_seq_length": 512,
"in": [
"x"
],
"out": [
"bert_features"
]
},
{
"id": "classes_vocab",
"class_name": "simple_vocab",
"fit_on": [
"y"
],
"save_path": "bert-news-classifier/data/binary_classes.dict",
"load_path": "bert-news-classifier/data/binary_classes.dict",
"in": "y",
"out": "y_ids"
},
{
"in": "y_ids",
"out": "y_onehot",
"class_name": "one_hotter",
"depth": 2,
"single_vector": True
},
{
"class_name": "bert_classifier",
"n_classes": 2,
"return_probas": True,
"one_hot_labels": True,
"bert_config_file": "/content/drive/MyDrive/AI/config/rubert_cased_L-12_H-768_A-12_v2/bert_config.json",
"pretrained_bert": "/content/drive/MyDrive/AI/config/rubert_cased_L-12_H-768_A-12_v2/bert_model.ckpt",
"save_path": "bert-news-classifier/data/bert_model/model",
"load_path": "bert-news-classifier/data/bert_model/model",
"learning_rate": 2e-6,
"keep_prob": 0.5,
"load_before_drop": True,
"loss": "cross-entropy",
"in": [
"bert_features"
],
"in_y": [
"y_onehot"
],
"out": [
"y_pred_probas"
]
},
{
"in": "y_pred_probas",
"out": "y_pred_ids",
"class_name": "proba2labels",
"max_proba": True
},
{
"in": "y_pred_ids",
"out": "y_pred_labels",
"ref": "classes_vocab"
}
],
"out": [
"y_pred_labels", "y_pred_probas"
]
},
"train": {
"batch_size": 8,
"epochs": 5,
"metrics": [
"f1",
{
"name": "accuracy",
"inputs": ["y_onehot", "y_pred_labels"]
},
{
"name": "roc_auc",
"inputs": ["y_onehot", "y_pred_probas"]
}
],
"show_examples": False,
"pytest_max_batches": 2,
"validation_patience": 2,
"val_every_n_epochs": 1,
"log_every_n_epochs": 1,
"tensorboard_log_dir": "bert-news-classifier/data/logs/",
"evaluation_targets": [
"train",
"valid",
],
"class_name": "nn_trainer"
},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"MODEL_PATH": "{MODELS_PATH}/classifiers/rusentiment_bert_v0/"
},
"requirements": [
"deeppavlov/requirements/tf.txt",
"deeppavlov/requirements/bert_dp.txt"
],
"download": [
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/bert/rubert_cased_L-12_H-768_A-12_v2.tar.gz",
"subdir": "/content/bert_models"
},
{
"url": "http://files.deeppavlov.ai/deeppavlov_data/classifiers/rubert_cased_L-12_H-768_A-12_v2.tar.gz",
"subdir": "drive/MyDrive/bert_model/model_mix_73/classifier"
}
]
}
}
but I am getting the following error:
TypeError: round_f1() takes 2 positional arguments but 3 were given
The structure of the dataset has no problem. It has 2 columns (text and label)
What am I doing wrong?