Skip to content

Hashing

dataset_hash(dataset)

Hash of Dataset

Parameters:

Name Type Description Default
dataset Dataset

Dataset to hash

required

Returns:

Name Type Description
str int

Dataset hash

Source code in recon/hashing.py
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
def dataset_hash(dataset: "Dataset") -> int:
    """Hash of Dataset

    Args:
        dataset (Dataset): Dataset to hash

    Returns:
        str: Dataset hash
    """
    hash_data = (dataset.name,) + tuple(
        (example_hash(example) for example in dataset.data)
    )
    return _hash(hash_data)

example_hash(example)

Hash of Example type

Parameters:

Name Type Description Default
example Example

Example to hash

required

Returns:

Name Type Description
str int

Example hash

Source code in recon/hashing.py
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
def example_hash(example: "Example") -> int:
    """Hash of Example type

    Args:
        example (Example): Example to hash

    Returns:
        str: Example hash
    """
    hash_data = [example.text]
    for span in example.spans:
        hash_data += [
            span.start,
            span.end,
            span.label,
            span.text,
        ]
    return _hash(tuple(hash_data))

prediction_error_hash(prediction_error)

Hash of PredictionError

Parameters:

Name Type Description Default
prediction_error PredictionError

PredictionError to hash

required

Returns:

Name Type Description
str int

PredictionError hash

Source code in recon/hashing.py
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
def prediction_error_hash(prediction_error: "PredictionError") -> int:
    """Hash of PredictionError

    Args:
        prediction_error (PredictionError): PredictionError to hash

    Returns:
        str: PredictionError hash
    """
    hash_data = (
        prediction_error.text,
        prediction_error.true_label,
        prediction_error.pred_label,
    )
    return _hash(hash_data)

span_hash(span)

Hash of Span type

Parameters:

Name Type Description Default
span Span

Span to hash

required

Returns:

Name Type Description
str int

Span hash

Source code in recon/hashing.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
def span_hash(span: "Span") -> int:
    """Hash of Span type

    Args:
        span (Span): Span to hash

    Returns:
        str: Span hash
    """
    hash_data = (
        span.start,
        span.end,
        span.label,
        span.text,
        span.token_start if span.token_start else 0,
        span.token_end if span.token_end else 0,
    )
    return _hash(hash_data)

token_hash(token)

Hash of Token type

Parameters:

Name Type Description Default
token Token

Token to hash

required

Returns:

Name Type Description
str int

Token hash

Source code in recon/hashing.py
10
11
12
13
14
15
16
17
18
19
def token_hash(token: "Token") -> int:
    """Hash of Token type

    Args:
        token (Token): Token to hash

    Returns:
        str: Token hash
    """
    return _hash((token.text, token.start, token.end, token.id))

tokenized_example_hash(example)

Hash of Example type including token data

Parameters:

Name Type Description Default
example Example

Example to hash

required

Returns:

Name Type Description
str int

Example hash

Source code in recon/hashing.py
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
def tokenized_example_hash(example: "Example") -> int:
    """Hash of Example type including token data

    Args:
        example (Example): Example to hash

    Returns:
        str: Example hash
    """
    tokens = example.tokens or []
    hash_data = [example.text]
    for span in example.spans:
        hash_data += [
            span.start,
            span.end,
            span.label,
            span.text,
            span.token_start if span.token_start else 0,
            span.token_end if span.token_end else 0,
        ]
    for token in tokens:
        hash_data += [token.text, token.start, token.end, token.id]

    return _hash(tuple(hash_data))