Skip to content

Loaders

Load a list of Example data from JSON represented records in the Prodigy format.

from_spacy(path, nlp=None, lang_code='en')

Load examples from .spacy docbin format

Parameters:

Name Type Description Default
path Path

Path to data

required
nlp Language

Spacy Language object.

None
lang_code str

Language code to create a blank spacy model with if nlp is not provided.

'en'

Yields:

Type Description
Iterable[Example]

Iterable[Example]: List of typed Examples

Source code in recon/loaders.py
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
def from_spacy(
    path: Path, nlp: Optional[Language] = None, lang_code: str = "en"
) -> Iterable[Example]:
    """Load examples from .spacy docbin format

    Args:
        path (Path): Path to data
        nlp (Language, optional): Spacy Language object.
        lang_code (str, optional): Language code to create a
            blank spacy model with if nlp is not provided.

    Yields:
        Iterable[Example]: List of typed Examples
    """
    if not nlp:
        nlp = spacy.blank(lang_code)

    doc_bin = DocBin().from_disk(path)
    for doc in doc_bin.get_docs(nlp.vocab):
        yield Example(
            text=doc.text,
            spans=[
                Span(
                    text=e.text,
                    start=e.start_char,
                    end=e.end_char,
                    label=e.label_,
                    token_start=e.start,
                    token_end=e.end,
                )
                for e in doc.ents
            ],
            tokens=[
                Token(text=t.text, start=t.idx, end=t.idx + len(t), id=t.i) for t in doc
            ],
        )

json_to_examples(data)

Convert List of Dicts to List of typed Examples

Parameters:

Name Type Description Default
data List[Dict[str, Any]]

Input List of Dicts to convert

required

Returns:

Type Description
List[Example]

List[Example]: List of typed Examples

Source code in recon/loaders.py
46
47
48
49
50
51
52
53
54
55
def json_to_examples(data: List[Dict[str, Any]]) -> List[Example]:
    """Convert List of Dicts to List of typed Examples

    Args:
        data (List[Dict[str, Any]]): Input List of Dicts to convert

    Returns:
        List[Example]: List of typed Examples
    """
    return [Example(**example) for example in data]

read_json(path)

Read annotations in JSON file format

Parameters:

Name Type Description Default
path Path

Path to data

required

Returns:

Type Description
List[Example]

List[Example]: List of examples

Source code in recon/loaders.py
32
33
34
35
36
37
38
39
40
41
42
43
def read_json(path: Path) -> List[Example]:
    """Read annotations in JSON file format

    Args:
        path (Path): Path to data

    Returns:
        List[Example]: List of examples
    """
    data = cast(List[Dict[str, Any]], srsly.read_json(path))
    examples = json_to_examples(data)
    return examples

read_jsonl(path)

Read annotations in JSONL file format

Parameters:

Name Type Description Default
path Path

Path to data

required

Returns:

Type Description
List[Example]

List[Example]: List of examples

Source code in recon/loaders.py
18
19
20
21
22
23
24
25
26
27
28
29
def read_jsonl(path: Path) -> List[Example]:
    """Read annotations in JSONL file format

    Args:
        path (Path): Path to data

    Returns:
        List[Example]: List of examples
    """
    data = cast(List[Dict[str, Any]], srsly.read_jsonl(path))
    examples = json_to_examples(data)
    return examples

to_spacy(path, data, nlp=None, lang_code='en')

Save a batch of examples to disk in the .spacy DocBin format

Parameters:

Name Type Description Default
path Path

Path to data

required
data Iterable[Example]

Input Examples

required
nlp Language

Spacy Language object.

None
lang_code str

Language code to create a blank spacy model with if nlp is not provided.

'en'

Returns:

Name Type Description
DocBin DocBin

Spacy DocBin with stored example data.

Source code in recon/loaders.py
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
def to_spacy(
    path: Path,
    data: Iterable[Example],
    nlp: Optional[Language] = None,
    lang_code: str = "en",
) -> DocBin:
    """Save a batch of examples to disk in the .spacy DocBin format

    Args:
        path (Path): Path to data
        data (Iterable[Example]): Input Examples
        nlp (Language, optional): Spacy Language object.
        lang_code (str, optional): Language code to create a blank
            spacy model with if nlp is not provided.

    Returns:
        DocBin: Spacy DocBin with stored example data.
    """

    if not nlp:
        nlp = spacy.blank(lang_code)

    doc_bin = DocBin(attrs=["ENT_IOB", "ENT_TYPE"])
    for example in data:
        if example.tokens:
            tokens = [token.text for token in example.tokens]
            words, spaces = get_words_and_spaces(tokens, example.text)
            doc = Doc(nlp.vocab, words=words, spaces=spaces)
            spacy_spans = [
                doc.char_span(s.start, s.end, label=s.label) for s in example.spans
            ]
            doc.set_ents(cast(List[SpacySpan], spacy_spans))
            doc_bin.add(doc)
    doc_bin.to_disk(path)
    return doc_bin