Добрый день!
Заранее прошу прощения, если подобное было - я не нашел нечего применительно именно к FAQ.
У меня есть набор вопрос-ответ данных (табличка со столбцами “Question”, “Answer”). Нужно построить умного FAQ бота. В доках deeppavlov нашел раздел FAQ, но не могу разобраться как его использовать со своими данными.
вот мой конфиг:
my_config = {
"dataset_reader": {
"class_name": "faq_reader",
"x_col_name": "Question",
"y_col_name": "Answer",
"data_url": None,
"data_path": "files/lgd_faq.csv"
},
"dataset_iterator": {
"class_name": "basic_classification_iterator",
"seed": 42
},
"chainer": {
"in": "q",
"in_y": "y",
"pipe": [
{
"class_name": "ru_tokenizer",
"id": "my_tokenizer",
"in": "q",
"lemmas": True,
"out": "q_token_lemmas"
},
{
"ref": "my_tokenizer",
"in": "q_token_lemmas",
"out": "q_lem"
},
{
"in": [
"q_lem"
],
"out": [
"q_vect"
],
"fit_on": [
"q_lem"
],
"id": "tfidf_vec",
"class_name": "sklearn_component",
"save_path": "{MODELS_PATH}/tfidf_vectorizer_ruwiki_v2.pkl",
"load_path": "{MODELS_PATH}/tfidf_vectorizer_ruwiki_v2.pkl",
"model_class": "sklearn.feature_extraction.text:TfidfVectorizer",
"infer_method": "transform"
},
{
"id": "answers_vocab",
"class_name": "simple_vocab",
"fit_on": [
"y"
],
"save_path": "{MODELS_PATH}/ru_mipt_answers.dict",
"load_path": "{MODELS_PATH}/ru_mipt_answers.dict",
"in": "y",
"out": "y_ids"
},
{
"in": "q_vect",
"fit_on": [
"q_vect",
"y_ids"
],
"out": [
"y_pred_proba"
],
"class_name": "sklearn_component",
"main": True,
"save_path": "{MODELS_PATH}/tfidf_logreg_classifier_v2.pkl",
"load_path": "{MODELS_PATH}/tfidf_logreg_classifier_v2.pkl",
"model_class": "sklearn.linear_model:LogisticRegression",
"infer_method": "predict_proba",
"C": 1000,
"penalty": "l2"
},
{
"in": "y_pred_proba",
"out": "y_pred_ids",
"class_name": "proba2labels",
"max_proba": True
},
{
"in": "y_pred_ids",
"out": "y_pred_answers",
"ref": "answers_vocab"
}
],
"out": [
"y_pred_answers",
"y_pred_proba"
]
},
"train": {
"class_name": "fit_trainer",
"evaluation_targets": []
},
"metadata": {
"variables": {
"ROOT_PATH": "~/.deeppavlov",
"DOWNLOADS_PATH": "{ROOT_PATH}/downloads",
"MODELS_PATH": "{ROOT_PATH}/models",
"MODEL_PATH": "{ROOT_PATH}/faq/faq_leadgid"
}
}
}
при запуске faq = train_model(my_config), получаю ответ:
[nltk_data] Downloading package punkt to /Users/Kolyambo/nltk_data...
[nltk_data] Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data] /Users/Kolyambo/nltk_data...
[nltk_data] Package stopwords is already up-to-date!
[nltk_data] Downloading package perluniprops to
[nltk_data] /Users/Kolyambo/nltk_data...
[nltk_data] Package perluniprops is already up-to-date!
[nltk_data] Downloading package nonbreaking_prefixes to
[nltk_data] /Users/Kolyambo/nltk_data...
[nltk_data] Package nonbreaking_prefixes is already up-to-date!
2022-04-14 18:22:18.824 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 203: Loading model sklearn.feature_extraction.text:TfidfVectorizer from /Users/Kolyambo/.deeppavlov/models/tfidf_vectorizer_ruwiki_v2.pkl
2022-04-14 18:22:18.826 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 210: Model sklearn.feature_extraction.textTfidfVectorizer loaded with parameters
2022-04-14 18:22:18.827 WARNING in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 216: Fitting of loaded model can not be continued. Model can be fitted from scratch.If one needs to continue fitting, please, look at `warm_start` parameter
2022-04-14 18:22:19.12 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 109: Fitting model sklearn.feature_extraction.textTfidfVectorizer
2022-04-14 18:22:19.24 INFO in 'deeppavlov.models.sklearn.sklearn_component'['sklearn_component'] at line 241: Saving model to /Users/Kolyambo/.deeppavlov/models/tfidf_vectorizer_ruwiki_v2.pkl
2022-04-14 18:22:19.29 INFO in 'deeppavlov.core.data.simple_vocab'['simple_vocab'] at line 115: [loading vocabulary from /Users/Kolyambo/.deeppavlov/models/ru_mipt_answers.dict]
2022-04-14 18:22:19.30 ERROR in 'deeppavlov.core.common.params'['params'] at line 112: Exception in <class 'deeppavlov.core.data.simple_vocab.SimpleVocabulary'>
Traceback (most recent call last):
File "/Users/Kolyambo/opt/anaconda3/envs/faq_bot/lib/python3.7/site-packages/deeppavlov/core/common/params.py", line 106, in from_params
component = obj(**dict(config_params, **kwargs))
File "/Users/Kolyambo/opt/anaconda3/envs/faq_bot/lib/python3.7/site-packages/deeppavlov/core/data/simple_vocab.py", line 62, in __init__
self.load()
File "/Users/Kolyambo/opt/anaconda3/envs/faq_bot/lib/python3.7/site-packages/deeppavlov/core/data/simple_vocab.py", line 118, in load
token, cnt = self.load_line(ln)
File "/Users/Kolyambo/opt/anaconda3/envs/faq_bot/lib/python3.7/site-packages/deeppavlov/core/data/simple_vocab.py", line 139, in load_line
token, cnt = ln.rsplit('\t', 1)
ValueError: not enough values to unpack (expected 2, got 1)
Помогите разобраться пожалуйста.