BERT for classification error

Hi!
I’m trying to use pretrained BERT for classification:

config_file_path = “Data/downloads/bert_models/rubert_cased_L-12_H-768_A-12_v1/bert_config.json”
model_config = read_json(config_file_path)
m = train_model(model_config)

But get error:
ValueError Traceback (most recent call last)
in ()
----> 1 m = train_model(model_config)

C:\ProgramData\Anaconda3\lib\site-packages\deeppavlov_init_.py in train_model(config, download, recursive)
29 # TODO: make better
30 def train_model(config: [str, Path, dict], download: bool = False, recursive: bool = False) -> Chainer:
—> 31 train_evaluate_model_from_config(config, download=download, recursive=recursive)
32 return build_model(config, load_trained=True)
33

C:\ProgramData\Anaconda3\lib\site-packages\deeppavlov\core\commands\train.py in train_evaluate_model_from_config(config, iterator, to_train, evaluation_targets, to_validate, download, start_epoch_num, recursive)
119
120 if to_train:
–> 121 trainer.train(iterator)
122
123 res = {}

C:\ProgramData\Anaconda3\lib\site-packages\deeppavlov\core\trainers\nn_trainer.py in train(self, iterator)
289 def train(self, iterator: DataLearningIterator) -> None:
290 “”“Call :meth:~fit_chainer and then :meth:~train_on_batches with provided data iterator as an argument”""
–> 291 self.fit_chainer(iterator)
292 if callable(getattr(self._chainer, ‘train_on_batch’, None)):
293 try:

C:\ProgramData\Anaconda3\lib\site-packages\deeppavlov\core\trainers\fit_trainer.py in fit_chainer(self, iterator)
101 raise RuntimeError(‘Cannot fit already built chainer’)
102 for component_index, component_config in enumerate(self.chainer_config[‘pipe’], 1):
–> 103 component = from_params(component_config, mode=‘train’)
104 if ‘fit_on’ in component_config:
105 component: Estimator

C:\ProgramData\Anaconda3\lib\site-packages\deeppavlov\core\common\params.py in from_params(params, mode, serialized, **kwargs)
102 kwargs[‘mode’] = mode
103
–> 104 component = cls(**dict(config_params, **kwargs))
105 try:
106 _refs[config_params[‘id’]] = component

C:\ProgramData\Anaconda3\lib\site-packages\deeppavlov\core\models\tf_backend.py in call(cls, *args, **kwargs)
72 wrapped_attr = _graph_wrap(attr, obj.graph)
73 setattr(obj, meth, wrapped_attr)
—> 74 obj.init(*args, **kwargs)
75 return obj

C:\ProgramData\Anaconda3\lib\site-packages\deeppavlov\core\models\tf_backend.py in _wrapped(*args, **kwargs)
25 def _wrapped(*args, **kwargs):
26 with graph.as_default():
—> 27 return func(*args, **kwargs)
28 return _wrapped
29

C:\ProgramData\Anaconda3\lib\site-packages\deeppavlov\models\bert\bert_classifier.py in init(self, bert_config_file, n_classes, keep_prob, one_hot_labels, multilabel, return_probas, attention_probs_keep_prob, hidden_keep_prob, optimizer, num_warmup_steps, weight_decay_rate, pretrained_bert, min_learning_rate, **kwargs)
85 self.sess = tf.Session(config=self.sess_config)
86
—> 87 self._init_graph()
88
89 self._init_optimizer()

C:\ProgramData\Anaconda3\lib\site-packages\deeppavlov\core\models\tf_backend.py in _wrapped(*args, **kwargs)
25 def _wrapped(*args, **kwargs):
26 with graph.as_default():
—> 27 return func(*args, **kwargs)
28 return _wrapped
29

C:\ProgramData\Anaconda3\lib\site-packages\deeppavlov\models\bert\bert_classifier.py in _init_graph(self)
114 input_mask=self.input_masks_ph,
115 token_type_ids=self.token_types_ph,
–> 116 use_one_hot_embeddings=False,
117 )
118

C:\ProgramData\Anaconda3\lib\site-packages\bert_dp\modeling.py in init(self, config, is_training, input_ids, input_mask, token_type_ids, use_one_hot_embeddings, scope)
185 initializer_range=config.initializer_range,
186 word_embedding_name=“word_embeddings”,
–> 187 use_one_hot_embeddings=use_one_hot_embeddings)
188
189 # Add positional embeddings and token type embeddings, then layer

C:\ProgramData\Anaconda3\lib\site-packages\bert_dp\modeling.py in embedding_lookup(input_ids, vocab_size, embedding_size, initializer_range, word_embedding_name, use_one_hot_embeddings)
419 name=word_embedding_name,
420 shape=[vocab_size, embedding_size],
–> 421 initializer=create_initializer(initializer_range))
422
423 if use_one_hot_embeddings:

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\ops\variable_scope.py in get_variable(name, shape, dtype, initializer, regularizer, trainable, collections, caching_device, partitioner, validate_shape, use_resource, custom_getter, constraint, synchronization, aggregation)
1485 constraint=constraint,
1486 synchronization=synchronization,
-> 1487 aggregation=aggregation)
1488
1489

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\ops\variable_scope.py in get_variable(self, var_store, name, shape, dtype, initializer, regularizer, reuse, trainable, collections, caching_device, partitioner, validate_shape, use_resource, custom_getter, constraint, synchronization, aggregation)
1235 constraint=constraint,
1236 synchronization=synchronization,
-> 1237 aggregation=aggregation)
1238
1239 def _get_partitioned_variable(self,

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\ops\variable_scope.py in get_variable(self, name, shape, dtype, initializer, regularizer, reuse, trainable, collections, caching_device, partitioner, validate_shape, use_resource, custom_getter, constraint, synchronization, aggregation)
538 constraint=constraint,
539 synchronization=synchronization,
–> 540 aggregation=aggregation)
541
542 def _get_partitioned_variable(self,

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\ops\variable_scope.py in _true_getter(name, shape, dtype, initializer, regularizer, reuse, trainable, collections, caching_device, partitioner, validate_shape, use_resource, constraint, synchronization, aggregation)
490 constraint=constraint,
491 synchronization=synchronization,
–> 492 aggregation=aggregation)
493
494 # Set trainable value based on synchronization value.

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\ops\variable_scope.py in _get_single_variable(self, name, shape, dtype, initializer, regularizer, partition_info, reuse, trainable, collections, caching_device, validate_shape, use_resource, constraint, synchronization, aggregation)
902 "shape is not fully defined. The given initializer "
903 “function expects the following args %s” %
–> 904 tf_inspect.getargspec(initializer).args)
905 variable_dtype = dtype.base_dtype
906

ValueError: You can only pass an initializer function that expects no arguments to its callable when the shape is not fully defined. The given initializer function expects the following args [‘self’, ‘shape’, ‘dtype’, ‘partition_info’]

Config file:
{
“dataset_reader”: {
“class_name”: “basic_classification_reader”,
“x”: “x_column”,
“y”: “y_column”,
“data_path”: “Data/”,
“train”: “train.csv”,
“test”: “test.csv”,
“valid”: “valid.csv”
},
“dataset_iterator”: {
“class_name”: “basic_classification_iterator”,
“seed”: 42
},
“chainer”: {
“in”: [
“x”
],
“in_y”: [
“y”
],
“pipe”: [
{
“class_name”: “bert_preprocessor”,
“vocab_file”: “{DOWNLOADS_PATH}/bert_models/rubert_cased_L-12_H-768_A-12_v1/vocab.txt”,
“do_lower_case”: true,
“max_seq_length”: 64,
“in”: [
“x”
],
“out”: [
“bert_features”
]
},
{
“id”: “classes_vocab”,
“class_name”: “simple_vocab”,
“fit_on”: [
“y”
],
“save_path”: “{MODEL_PATH}/classes.dict”,
“load_path”: “{MODEL_PATH}/classes.dict”,
“in”: “y”,
“out”: “y_ids”
},
{
“in”: “y_ids”,
“out”: “y_onehot”,
“class_name”: “one_hotter”,
“depth”: “#classes_vocab.len”,
“single_vector”: true
},
{
“class_name”: “bert_classifier”,
“n_classes”: “#classes_vocab.len”,
“return_probas”: true,
“one_hot_labels”: true,
“bert_config_file”: “{DOWNLOADS_PATH}/bert_models/rubert_cased_L-12_H-768_A-12_v1/bert_config.json”,
“pretrained_bert”: “{DOWNLOADS_PATH}/bert_models/rubert_cased_L-12_H-768_A-12_v1/bert_model.ckpt”,
“save_path”: “{MODEL_PATH}/model”,
“load_path”: “{MODEL_PATH}/model”,
“keep_prob”: 0.5,
“learning_rate”: 1e-05,
“learning_rate_drop_patience”: 5,
“learning_rate_drop_div”: 2.0,
“in”: [
“bert_features”
],
“in_y”: [
“y_onehot”
],
“out”: [
“y_pred_probas”
]
},
{
“in”: “y_pred_probas”,
“out”: “y_pred_ids”,
“class_name”: “proba2labels”,
“max_proba”: true
},
{
“in”: “y_pred_ids”,
“out”: “y_pred_labels”,
“ref”: “classes_vocab”
}
],
“out”: [
“y_pred_labels”
]
},
“train”: {
“batch_size”: 64,
“epochs”: 100,
“metrics”: [
“f1_weighted”,
“f1_macro”,
“sets_accuracy”,
{
“name”: “roc_auc”,
“inputs”: [
“y_onehot”,
“y_pred_probas”
]
}
],
“show_examples”: false,
“pytest_max_batches”: 2,
“validation_patience”: 5,
“val_every_n_epochs”: 1,
“log_every_n_epochs”: 1,
“evaluation_targets”: [
“train”,
“valid”,
“test”
],
“tensorboard_log_dir”: “{MODEL_PATH}/”
},
“metadata”: {
“variables”: {
“ROOT_PATH”: “Data”,
“DOWNLOADS_PATH”: “{ROOT_PATH}/downloads”,
“MODELS_PATH”: “{ROOT_PATH}/models”,
“MODEL_PATH”: “{MODELS_PATH}/classifiers/mydata_rubert_v0”
},
“requirements”: [
“{DEEPPAVLOV_PATH}/requirements/tf.txt”,
“{DEEPPAVLOV_PATH}/requirements/bert_dp.txt”
]
}
}

What can cause this problem?

Hi!
Here in the code you use config file for RuBERT model, not a config for any classification model like this one for sentiment classification:

config_file_path = “Data/downloads/bert_models/rubert_cased_L-12_H-768_A-12_v1/bert_config.json”
model_config = read_json(config_file_path)
m = train_model(model_config)

check classification doc page http://docs.deeppavlov.ai/en/master/features/models/classifiers.html#python-code