Собрал dataset insult ru. Evaluation выдает результат
2024-07-23 08:13:01.310 WARNING in 'deeppavlov.core.trainers.fit_trainer'['fit_trainer'] at line 66: TorchTrainer got additional init parameters ['device'] that will be ignored:
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
2024-07-23 08:13:04.34 WARNING in 'deeppavlov.core.models.torch_model'['torch_model'] at line 96: Unable to place component TorchTransformersClassifierModel on GPU, since no CUDA GPUs are available. Using CPU.
172it [02:53, 1.01s/it]
{"train": {"eval_examples_count": 10961, "metrics": {"roc_auc": 0.996, "accuracy": 0.986, "f1_macro": 0.9842}, "time_spent": "0:02:54"}}
37it [00:36, 1.01it/s]
{"valid": {"eval_examples_count": 2350, "metrics": {"roc_auc": 0.9074, "accuracy": 0.8481, "f1_macro": 0.8289}, "time_spent": "0:00:37"}}
37it [00:36, 1.01it/s]
{"test": {"eval_examples_count": 2344, "metrics": {"roc_auc": 0.9087, "accuracy": 0.8515, "f1_macro": 0.8335}, "time_spent": "0:00:37"}}
После того как обновил dataset, добавил новые записи, evaluation выдал результат, модель перестала обучать. Пробавал рабивать с разными пропорциями dataset, результат всегда неудовлетворительный
WARNING in 'deeppavlov.core.trainers.fit_trainer'['fit_trainer'] at line 66: TorchTrainer got additional init parameters ['device'] that will be ignored:
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
{"train": {"eval_examples_count": 11781, "metrics": {"roc_auc": 0.4848, "accuracy": 0.1035, "f1_macro": 0.1018}, "time_spent": "0:03:05"}}
27it [00:26, 1.04it/s]
{"valid": {"eval_examples_count": 1683, "metrics": {"roc_auc": 0.0, "accuracy": 0.1123, "f1_macro": 0.1065}, "time_spent": "0:00:27"}}
53it [00:51, 1.02it/s]
{"test": {"eval_examples_count": 3366, "metrics": {"roc_auc": 0.0, "accuracy": 0.1037, "f1_macro": 0.1025}, "time_spent": "0:00:52"}}
вот мой конфиг:
{
"dataset_reader": {
"class_name": "basic_classification_reader",
"x": "Comment",
"y": "Class",
"data_path": "{PROJECT_PATH}/data/"
},
"dataset_iterator": {
"class_name": "basic_classification_iterator",
"seed": 42
},
"chainer": {
"in": [
"x"
],
"in_y": [
"y"
],
"pipe": [
{
"class_name": "torch_transformers_preprocessor",
"vocab_file": "{TRANSFORMER}",
"do_lower_case": true,
"max_seq_length": 64,
"in": [
"x"
],
"out": [
"bert_features"
]
},
{
"id": "classes_vocab",
"class_name": "simple_vocab",
"fit_on": [
"y"
],
"save_path": "{MODEL_PATH}/classes.dict",
"load_path": "{MODEL_PATH}/classes.dict",
"in": [
"y"
],
"out": [
"y_ids"
]
},
{
"in": [
"y_ids"
],
"out": [
"y_onehot"
],
"class_name": "one_hotter",
"depth": "#classes_vocab.len",
"single_vector": true
},
{
"class_name": "torch_transformers_classifier",
"n_classes": "#classes_vocab.len",
"return_probas": true,
"pretrained_bert": "{TRANSFORMER}",
"save_path": "{MODEL_PATH}/model",
"load_path": "{MODEL_PATH}/model",
"optimizer": "AdamW",
"optimizer_parameters": {
"lr": 1e-05
},
"learning_rate_drop_patience": 5,
"learning_rate_drop_div": 2.0,
"in": [
"bert_features"
],
"in_y": [
"y_ids"
],
"out": [
"y_pred_probas"
]
},
{
"in": [
"y_pred_probas"
],
"out": [
"y_pred_ids"
],
"class_name": "proba2labels",
"max_proba": true
},
{
"in": [
"y_pred_ids"
],
"out": [
"y_pred_labels"
],
"ref": "classes_vocab"
}
],
"out": [
"y_pred_labels"
]
},
"train": {
"device": "gpu",
"epochs": 100,
"batch_size": 64,
"metrics": [
{
"name": "roc_auc",
"inputs": [
"y_onehot",
"y_pred_probas"
]
},
"accuracy",
"f1_macro"
],
"validation_patience": 5,
"val_every_n_epochs": 1,
"log_every_n_epochs": 1,
"show_examples": false,
"evaluation_targets": [
"train",
"valid",
"test"
],
"class_name": "torch_trainer"
},
"metadata": {
"NAME": "Russian Language Toxic Comments",
"variables": {
"ROOT_PATH": "./data",
"PROJECT_PATH": "{ROOT_PATH}/rlt",
"MODELS_PATH": "{PROJECT_PATH}/models",
"MODEL_PATH": "{MODELS_PATH}/classifiers",
"TRANSFORMER": "bert-base-uncased",
"LOGS_PATH": "{PROJECT_PATH}/logs"
}
}
}