# Copyright 2018 The NLP Odyssey Authors.
# Copyright 2018 Marco Nicola <marconicola@disroot.org>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""This package provides methods and modules to process the *CoNLL-U* format.
In most situations it's sufficient to make use of :func:`parse` and
:func:`to_conllu` functions, without caring too much about the implementation
under the hood.
In more detail, this package provides a lexical analyzer (see :mod:`.lexer`)
and a parser (see :mod:`.parser`) to transform the raw string input into
related :class:`.Sentence` objects.
Lexer and parser classes are implemented taking advantage of the *PLY
(Python Lex-Yacc)* library; you can learn more from the
`PLY documentation <http://www.dabeaz.com/ply>`_ and from the
`Lex & Yacc Page <http://dinosaur.compilertools.net/>`_.
"""
from typing import List
from colonel.sentence import Sentence
from colonel.conllu.parser import ConlluParserBuilder
[docs]def parse(content: str) -> List[Sentence]:
"""Parses a *CoNLL-U* string content, returning a list of sentences.
:raise lexer.LexerError: (any specific subclass) in case of invalid input
breaking the rules of the *CoNLL-U* lexer
:raise parser.ParserError: (any specific subclass) in case of invalid input
breaking the rules of the *CoNLL-U* parser
:param content: *CoNLL-U* formatted string to be parsed
:return: list of parsed :class:`.Sentence` items
"""
return ConlluParserBuilder.build().parse(content)
[docs]def to_conllu(sentences: List[Sentence]) -> str:
"""Serializes a list of sentences to a formatted *CoNLL-U* string.
This method simply concatenates the output of :meth:`.Sentence.to_conllu`
for each given sentence and do not perform any validity check;
sentences and elements not compatible with *CoNLL-U* format could lead to
an incorrect output value or raising of exceptions.
:param sentences: list of :class:`.Sentence` items
:return: a *CoNLL-U* formatted representation of the sentences
"""
return ''.join(sentence.to_conllu() for sentence in sentences)