Source code for fortex.huggingface.question_and_answering_single

# Copyright 2021 The Forte Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Wrapper of the Question Answering models on HuggingFace platform (context
understanding)
"""
import importlib
from typing import Dict, Set

from transformers import pipeline
from ft.onto.base_ontology import Phrase

from forte.common import Resources
from forte.common.configuration import Config
from forte.data.data_pack import DataPack
from forte.processors.base import PackProcessor

__all__ = [
    "QuestionAnsweringSingle",
]


[docs]class QuestionAnsweringSingle(PackProcessor): r"""Wrapper of the models on HuggingFace platform with pipeline tag of `question-answering` (reading comprehension). https://huggingface.co/models?pipeline_tag=question-answering This wrapper could take any model name on HuggingFace platform with pipeline tag of `question-answering` in configs to make prediction on the context of user specified entry type in the input pack and the prediction result would be annotated as `Phrase` in the output pack. User could input the question in the config. """ def __init__(self): super().__init__() self.extractor = None def set_up(self): device_num = self.configs["cuda_devices"] self.extractor = pipeline( "question-answering", model=self.configs.model_name, tokenizer=self.configs.model_name, framework="pt", device=device_num, )
[docs] def initialize(self, resources: Resources, configs: Config): super().initialize(resources, configs) self.set_up()
def _process(self, input_pack: DataPack): path_str, module_str = self.configs.entry_type.rsplit(".", 1) mod = importlib.import_module(path_str) entry = getattr(mod, module_str) for entry_specified in input_pack.get(entry_type=entry): result = self.extractor( context=entry_specified.text, question=self.configs.question, max_answer_len=self.configs.max_answer_len, handle_impossible_answer=self.configs.handle_impossible_answer, ) start = result["start"] end = result["end"] Phrase(pack=input_pack, begin=start, end=end)
[docs] @classmethod def default_configs(cls): r"""This defines a basic config structure for `QuestionAnsweringSingle`. Following are the keys for this dictionary: - `entry_type`: defines which entry type in the input pack to make prediction on. The default makes prediction on each `Document` in the input pack. - `model_name`: language model, default is `"ktrapeznikov/biobert_v1.1_pubmed_squad_v2"`. The wrapper supports Hugging Face models with pipeline tag of `question-answering`. - `question`: One question to retrieve answer from the input pack context. - `max_answer_len`: The maximum length of predicted answers (e.g., only answers with a shorter length are considered). - `cuda_device`: Device ordinal for CPU/GPU supports. Setting this to -1 will leverage CPU, a positive will run the model on the associated CUDA device id. - `handle_impossible_answer`: Whether or not we accept impossible as an answer. Returns: A dictionary with the default config for this processor. """ return { "entry_type": "ft.onto.base_ontology.Document", "model_name": "ktrapeznikov/biobert_v1.1_pubmed_squad_v2", "question": "Where do I live", "max_answer_len": 15, "cuda_devices": -1, "handle_impossible_answer": False, }
[docs] def expected_types_and_attributes(self): r"""Method to add user specified expected type which would be checked before running the processor if the pipeline is initialized with `enforce_consistency=True` or :meth:`~forte.pipeline.Pipeline.enforce_consistency` was enabled for the pipeline. """ return {self.configs["entry_type"]: set()}
[docs] def record(self, record_meta: Dict[str, Set[str]]): r"""Method to add output type record of `QuestionAnsweringSingle` which is `"ft.onto.base_ontology.Phrase"` to :attr:`forte.data.data_pack.Meta.record`. Args: record_meta: the field in the datapack for type record that need to fill in for consistency checking. """ if "ft.onto.base_ontology.Phrase" not in record_meta.keys(): record_meta["ft.onto.base_ontology.Phrase"] = set()