Skip to content

SpacyEntityRecognizer

Bases: EntityRecognizer

Create an EntityRecognizer from a spaCy Langauge instance

Source code in recon/recognizer.py
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
class SpacyEntityRecognizer(EntityRecognizer):
    """Create an EntityRecognizer from a spaCy Langauge instance"""

    def __init__(self, nlp: Language):
        """Initialize a SpacyEntityRecognizer

        Args:
            nlp (Language): spaCy Language instance that can sets doc.ents
        """
        super().__init__()
        self.nlp = nlp

    @property
    def labels(self) -> List[str]:
        """Return List of spaCy ner labels

        Returns:
            List[str]: List of labels from spaCy ner pipe
        """
        all_labels: Set[str] = set()

        for pipe in ["ner", "entity_ruler"]:
            if self.nlp.has_pipe(pipe):
                all_labels = all_labels | set(self.nlp.get_pipe(pipe).labels)  # type: ignore

        return sorted(list(all_labels))

    def predict(self, texts: Iterable[str]) -> Iterator[Example]:
        """Run spaCy nlp.pipe on a batch of raw texts.

        Args:
            texts (Iterable[str]): Raw text examples

        Yields:
            Iterator[Example]: Examples constructed from spaCy Model predictions
        """
        for doc in self.nlp.pipe(texts):
            yield Example(
                text=doc.text,
                spans=[
                    Span(
                        text=e.text,
                        start=e.start_char,
                        end=e.end_char,
                        label=e.label_,
                        token_start=e.start,
                        token_end=e.end,
                    )
                    for e in doc.ents
                ],
                tokens=[
                    Token(text=t.text, start=t.idx, end=t.idx + len(t), id=t.i)
                    for t in doc
                ],
            )

    def _evaluate(self, data: List[Example]) -> Scores:
        """Evaluate spaCy recognizer performance on dataset

        Args:
            data (List[Example]): Examples to evaluate on
            verbose (bool, optional): Print results or not. Defaults to True.

        Returns:
            Scorer: spaCy scorer object
        """

        with tempfile.TemporaryDirectory() as tmp_dir:
            data_path = Path(tmp_dir) / "data.spacy"
            to_spacy(data_path, data)
            corpus = SpacyCorpus(data_path, gold_preproc=False)
            dev_dataset = list(corpus(self.nlp))
            sc = self.nlp.evaluate(dev_dataset)
            scores = Scores(**sc)
        return scores

labels: List[str] property

Return List of spaCy ner labels

Returns:

Type Description
List[str]

List[str]: List of labels from spaCy ner pipe

__init__(nlp)

Initialize a SpacyEntityRecognizer

Parameters:

Name Type Description Default
nlp Language

spaCy Language instance that can sets doc.ents

required
Source code in recon/recognizer.py
87
88
89
90
91
92
93
94
def __init__(self, nlp: Language):
    """Initialize a SpacyEntityRecognizer

    Args:
        nlp (Language): spaCy Language instance that can sets doc.ents
    """
    super().__init__()
    self.nlp = nlp

predict(texts)

Run spaCy nlp.pipe on a batch of raw texts.

Parameters:

Name Type Description Default
texts Iterable[str]

Raw text examples

required

Yields:

Type Description
Example

Iterator[Example]: Examples constructed from spaCy Model predictions

Source code in recon/recognizer.py
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
def predict(self, texts: Iterable[str]) -> Iterator[Example]:
    """Run spaCy nlp.pipe on a batch of raw texts.

    Args:
        texts (Iterable[str]): Raw text examples

    Yields:
        Iterator[Example]: Examples constructed from spaCy Model predictions
    """
    for doc in self.nlp.pipe(texts):
        yield Example(
            text=doc.text,
            spans=[
                Span(
                    text=e.text,
                    start=e.start_char,
                    end=e.end_char,
                    label=e.label_,
                    token_start=e.start,
                    token_end=e.end,
                )
                for e in doc.ents
            ],
            tokens=[
                Token(text=t.text, start=t.idx, end=t.idx + len(t), id=t.i)
                for t in doc
            ],
        )