Source code for fortex.vader.sentiment_analysis

# Copyright 2019 The Forte Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

__all__ = [
    "VaderSentimentProcessor",
]

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

from forte.common import Resources
from forte.common.configuration import Config
from forte.data.data_pack import DataPack
from forte.processors.base import PackProcessor


[docs]class VaderSentimentProcessor(PackProcessor):
    r"""A wrapper of a sentiment analyzer: Vader (Valence Aware Dictionary
    and Sentiment Reasoner). Vader needs to be installed to use this package

     > `pip install vaderSentiment`

     or

     > `pip install --upgrade vaderSentiment`

    This processor will add assign sentiment label to each sentence in the
    document. If the input pack contains no sentence then no processing will
    happen. If the data pack has multiple set of sentences, one can specify
    the set of sentences to tag by setting the `sentence_component` attribute.

    Vader URL: (https://github.com/cjhutto/vaderSentiment)

    Citation: VADER: A Parsimonious Rule-based Model for Sentiment Analysis of
    Social Media Text (by C.J. Hutto and Eric Gilbert)

    """

    def __init__(self):
        super().__init__()
        self.sentence_component = None
        self.analyzer = SentimentIntensityAnalyzer()

[docs]    def initialize(self, resources: Resources, configs: Config):
        super().initialize(resources, configs)
        self.sentence_component = configs.get("sentence_component")

    def _process(self, input_pack: DataPack):
        for entry_specified in input_pack.get(
            entry_type=self.configs.entry_type,
            components=self.sentence_component,
        ):
            scores = self.analyzer.polarity_scores(entry_specified.text)
            setattr(entry_specified, self.configs.attribute_name, scores)

[docs]    @classmethod
    def default_configs(cls):
        r"""This defines a basic config structure for VaderSentimentProcessor.

        Returns:
            A dictionary with the default config for this processor.

        Following are the keys for this dictionary:

        - `"entry_type"`:
            Defines which entry type in the input pack to make
            prediction on. The default makes prediction on each `Sentence`
            in the input pack.

        - `"attribute_name"`:
            Defines which attribute of the `entry_type`
            in the input pack to save score to. The default saves prediction
            to the `sentiment` attribute for each `Sentence` in the input pack.

        - `"sentence_component"`:
            str. If not None, the processor will process sentence with the
            provided component name. If None, then all sentences will be
            processed.
        """
        return {
            "entry_type": "ft.onto.base_ontology.Sentence",
            "attribute_name": "sentiment",
            "sentence_component": None,
        }

[docs]    def expected_types_and_attributes(self):
        r"""Method to add expected type `ft.onto.base_ontology.Sentence` which
        would be checked before running the processor if
        the pipeline is initialized with
        `enforce_consistency=True` or
        :meth:`~forte.pipeline.Pipeline.enforce_consistency` was enabled for
        the pipeline.
        """
        return {self.configs["entry_type"]: set()}