SpacyEntityRecognizer

Bases: EntityRecognizer

Create an EntityRecognizer from a spaCy Langauge instance

Source code in recon/recognizer.py

class SpacyEntityRecognizer(EntityRecognizer):
    """Create an EntityRecognizer from a spaCy Langauge instance"""

    def __init__(self, nlp: Language):
        """Initialize a SpacyEntityRecognizer

        Args:
            nlp (Language): spaCy Language instance that can sets doc.ents
        """
        super().__init__()
        self.nlp = nlp

    @property
    def labels(self) -> List[str]:
        """Return List of spaCy ner labels

        Returns:
            List[str]: List of labels from spaCy ner pipe
        """
        all_labels: Set[str] = set()

        for pipe in ["ner", "entity_ruler"]:
            if self.nlp.has_pipe(pipe):
                all_labels = all_labels | set(self.nlp.get_pipe(pipe).labels)  # type: ignore

        return sorted(list(all_labels))

    def predict(self, texts: Iterable[str]) -> Iterator[Example]:
        """Run spaCy nlp.pipe on a batch of raw texts.

        Args:
            texts (Iterable[str]): Raw text examples

        Yields:
            Iterator[Example]: Examples constructed from spaCy Model predictions
        """
        for doc in self.nlp.pipe(texts):
            yield Example(
                text=doc.text,
                spans=[
                    Span(
                        text=e.text,
                        start=e.start_char,
                        end=e.end_char,
                        label=e.label_,
                        token_start=e.start,
                        token_end=e.end,
                    )
                    for e in doc.ents
                ],
                tokens=[
                    Token(text=t.text, start=t.idx, end=t.idx + len(t), id=t.i)
                    for t in doc
                ],
            )

    def _evaluate(self, data: List[Example]) -> Scores:
        """Evaluate spaCy recognizer performance on dataset

        Args:
            data (List[Example]): Examples to evaluate on
            verbose (bool, optional): Print results or not. Defaults to True.

        Returns:
            Scorer: spaCy scorer object
        """

        with tempfile.TemporaryDirectory() as tmp_dir:
            data_path = Path(tmp_dir) / "data.spacy"
            to_spacy(data_path, data)
            corpus = SpacyCorpus(data_path, gold_preproc=False)
            dev_dataset = list(corpus(self.nlp))
            sc = self.nlp.evaluate(dev_dataset)
            scores = Scores(**sc)
        return scores

`labels: List[str]` `property` ¶

Return List of spaCy ner labels

Returns:

Type	Description
`List[str]`	List[str]: List of labels from spaCy ner pipe

`init(nlp)` ¶

Initialize a SpacyEntityRecognizer

Parameters:

Name	Type	Description	Default
`nlp`	`Language`	spaCy Language instance that can sets doc.ents	required

Source code in recon/recognizer.py

def __init__(self, nlp: Language):
    """Initialize a SpacyEntityRecognizer

    Args:
        nlp (Language): spaCy Language instance that can sets doc.ents
    """
    super().__init__()
    self.nlp = nlp

`predict(texts)` ¶

Run spaCy nlp.pipe on a batch of raw texts.

Parameters:

Name	Type	Description	Default
`texts`	`Iterable[str]`	Raw text examples	required

Yields:

Type	Description
`Example`	Iterator[Example]: Examples constructed from spaCy Model predictions

Source code in recon/recognizer.py

def predict(self, texts: Iterable[str]) -> Iterator[Example]:
    """Run spaCy nlp.pipe on a batch of raw texts.

    Args:
        texts (Iterable[str]): Raw text examples

    Yields:
        Iterator[Example]: Examples constructed from spaCy Model predictions
    """
    for doc in self.nlp.pipe(texts):
        yield Example(
            text=doc.text,
            spans=[
                Span(
                    text=e.text,
                    start=e.start_char,
                    end=e.end_char,
                    label=e.label_,
                    token_start=e.start,
                    token_end=e.end,
                )
                for e in doc.ents
            ],
            tokens=[
                Token(text=t.text, start=t.idx, end=t.idx + len(t), id=t.i)
                for t in doc
            ],
        )

SpacyEntityRecognizer

labels: List[str] property ¶

__init__(nlp) ¶

predict(texts) ¶

`labels: List[str]` `property` ¶

`init(nlp)` ¶

`predict(texts)` ¶