Source code for colonel.word

# Copyright 2018 The NLP Odyssey Authors.
# Copyright 2018 Marco Nicola <marconicola@disroot.org>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Module providing the :class:`.Word` class."""

from typing import Optional
from colonel.base_rich_sentence_element import BaseRichSentenceElement

__all__ = ['Word']


[docs]class Word(BaseRichSentenceElement): """Representation of a *Word* sentence element""" __slots__ = ('index', 'head', 'deprel') def __init__( self, index: Optional[int] = None, head: Optional[int] = None, deprel: Optional[str] = None, **kwargs ) -> None: super(Word, self).__init__(**kwargs) #: Word index. #: #: It is compatible with *CoNLL-U* ``ID`` field. #: #: The term *index* has been preferred over the more conventional *ID*, #: mostly for the purpose of preventing confusion, especially with #: Python's :func:`id` built-in function (which returns the #: *"identity"* of an object). self.index: Optional[int] = index #: Head of the current word, which is usually a value of another #: Word's :attr:`index` or zero (``0``, for ``root`` grammatical #: relations). #: #: It is compatible with *CoNLL-U* ``HEAD`` field. self.head: Optional[int] = head #: *Universal dependency relation* to the :attr:`head` or a defined #: language-specific subtype of one. #: #: It is compatible with *CoNLL-U* ``DEPREL`` field. self.deprel: Optional[str] = deprel
[docs] def is_valid(self) -> bool: """Returns whether or not the object can be considered valid, however ignoring the context of the sentence in which the word itself is possibly inserted. In compliance with the *CoNLL-U* format, an instance of type :class:`.Word` is considered valid only when :attr:`index` is set to a value greater than zero (``0``). """ return super(Word, self).is_valid() and \ self.index is not None and self.index > 0
[docs] def to_conllu(self) -> str: """Returns a *CoNLL-U* formatted representation of the element. No validity check is performed on the attributes; values not compatible with *CoNLL-U* format could lead to an incorrect output value or raising of exceptions. """ return '\t'.join([ str(self.index), self.form or '_', self.lemma or '_', self.upos.name if self.upos else '_', self.xpos or '_', self._feats_to_conllu(), str(self.head) if self.head is not None else '_', self.deprel or '_', self._deps_to_conllu(), self.misc or '_' ])