Spaces:
Sleeping
Sleeping
init working commit
Browse files- README.md +0 -12
- app.py +132 -0
- evaluation_metrics.py +49 -0
- predefined_example.py +58 -0
- requirements.txt +2 -0
- span_dataclass_converters.py +30 -0
- token_level_output.py +77 -0
README.md
CHANGED
|
@@ -1,12 +0,0 @@
|
|
| 1 |
-
---
|
| 2 |
-
title: Ner Evaluation Metrics
|
| 3 |
-
emoji: 👁
|
| 4 |
-
colorFrom: purple
|
| 5 |
-
colorTo: green
|
| 6 |
-
sdk: streamlit
|
| 7 |
-
sdk_version: 1.36.0
|
| 8 |
-
app_file: app.py
|
| 9 |
-
pinned: false
|
| 10 |
-
---
|
| 11 |
-
|
| 12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import streamlit as st
|
| 3 |
+
from annotated_text import annotated_text
|
| 4 |
+
from annotated_text.util import get_annotated_html
|
| 5 |
+
from streamlit_annotation_tools import text_labeler
|
| 6 |
+
|
| 7 |
+
from evaluation_metrics import EVALUATION_METRICS, get_evaluation_metric
|
| 8 |
+
from predefined_example import EXAMPLES
|
| 9 |
+
from span_dataclass_converters import (
|
| 10 |
+
get_highlight_spans_from_ner_spans,
|
| 11 |
+
get_ner_spans_from_annotations,
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
@st.cache_resource
|
| 16 |
+
def get_examples_attributes(selected_example):
|
| 17 |
+
"Return example attributes so that they are not refreshed on every interaction"
|
| 18 |
+
return (
|
| 19 |
+
selected_example.text,
|
| 20 |
+
selected_example.gt_labels,
|
| 21 |
+
selected_example.gt_spans,
|
| 22 |
+
selected_example.predictions,
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
if __name__ == "__main__":
|
| 27 |
+
st.set_page_config(layout="wide")
|
| 28 |
+
st.title("NER Evaluation Metrics Comparison")
|
| 29 |
+
|
| 30 |
+
st.write(
|
| 31 |
+
"Evaluation for the NER task requires a ground truth and a prediction that will be evaluated. The ground truth is shown below, add predictions in the next section to compare the evaluation metrics."
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
# with st.container():
|
| 35 |
+
st.subheader("Ground Truth") # , divider='rainbow')
|
| 36 |
+
|
| 37 |
+
selected_example = st.selectbox(
|
| 38 |
+
"Select an example text from the drop down below",
|
| 39 |
+
[example for example in EXAMPLES],
|
| 40 |
+
format_func=lambda ex: ex.text,
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
text, gt_labels, gt_spans, predictions = get_examples_attributes(selected_example)
|
| 44 |
+
|
| 45 |
+
annotated_text(
|
| 46 |
+
get_highlight_spans_from_ner_spans(
|
| 47 |
+
get_ner_spans_from_annotations(gt_labels), text
|
| 48 |
+
)
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
annotated_predictions = [
|
| 52 |
+
get_annotated_html(get_highlight_spans_from_ner_spans(ner_span, text))
|
| 53 |
+
for ner_span in predictions
|
| 54 |
+
]
|
| 55 |
+
predictions_df = pd.DataFrame(
|
| 56 |
+
{
|
| 57 |
+
# "ID": [f"Prediction_{index}" for index in range(len(predictions))],
|
| 58 |
+
"Prediction": annotated_predictions,
|
| 59 |
+
"ner_spans": predictions,
|
| 60 |
+
},
|
| 61 |
+
index=[f"Prediction_{index}" for index in range(len(predictions))],
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
st.subheader("Predictions") # , divider='rainbow')
|
| 65 |
+
|
| 66 |
+
with st.expander("Click to Add Predictions"):
|
| 67 |
+
st.subheader("Adding predictions")
|
| 68 |
+
st.markdown(
|
| 69 |
+
"""
|
| 70 |
+
Add predictions to the list of predictions on which the evaluation metric will be caculated.
|
| 71 |
+
- Select the entity type/label name and then highlight the span in the text below.
|
| 72 |
+
- To remove a span, double click on the higlighted text.
|
| 73 |
+
- Once you have your desired prediction, click on the 'Add' button.(The prediction created is shown in a json below)
|
| 74 |
+
"""
|
| 75 |
+
)
|
| 76 |
+
st.write(
|
| 77 |
+
"Note: Only the spans of the selected label name is shown at a given instance.",
|
| 78 |
+
)
|
| 79 |
+
labels = text_labeler(text, gt_labels)
|
| 80 |
+
st.json(labels, expanded=False)
|
| 81 |
+
|
| 82 |
+
# if st.button("Add Prediction"):
|
| 83 |
+
# labels = text_labeler(text)
|
| 84 |
+
if st.button("Add!"):
|
| 85 |
+
spans = get_ner_spans_from_annotations(labels)
|
| 86 |
+
spans = sorted(spans, key=lambda span: span["start"])
|
| 87 |
+
predictions.append(spans)
|
| 88 |
+
annotated_predictions.append(
|
| 89 |
+
get_annotated_html(get_highlight_spans_from_ner_spans(spans, text))
|
| 90 |
+
)
|
| 91 |
+
predictions_df = pd.DataFrame(
|
| 92 |
+
{
|
| 93 |
+
# "ID": [f"Prediction_{index}" for index in range(len(predictions))],
|
| 94 |
+
"Prediction": annotated_predictions,
|
| 95 |
+
"ner_spans": predictions,
|
| 96 |
+
},
|
| 97 |
+
index=[f"Prediction_{index}" for index in range(len(predictions))],
|
| 98 |
+
)
|
| 99 |
+
print("added")
|
| 100 |
+
|
| 101 |
+
highlighted_predictions_df = predictions_df[["Prediction"]]
|
| 102 |
+
st.write(highlighted_predictions_df.to_html(escape=False), unsafe_allow_html=True)
|
| 103 |
+
st.divider()
|
| 104 |
+
|
| 105 |
+
### EVALUATION METRICS COMPARISION ###
|
| 106 |
+
|
| 107 |
+
st.subheader("Evaluation Metrics Comparision") # , divider='rainbow')
|
| 108 |
+
st.markdown("""
|
| 109 |
+
The different evaluation metrics we have for the NER task are
|
| 110 |
+
- Span Based Evaluation with Partial Overlap
|
| 111 |
+
- Token Based Evaluation with Micro Avg
|
| 112 |
+
- Token Based Evaluation with Macro Avg
|
| 113 |
+
""")
|
| 114 |
+
|
| 115 |
+
with st.expander("View Predictions Details"):
|
| 116 |
+
st.write(predictions_df.to_html(escape=False), unsafe_allow_html=True)
|
| 117 |
+
|
| 118 |
+
if st.button("Get Metrics!"):
|
| 119 |
+
for evaluation_metric_type in EVALUATION_METRICS:
|
| 120 |
+
predictions_df[evaluation_metric_type] = predictions_df.ner_spans.apply(
|
| 121 |
+
lambda ner_spans: get_evaluation_metric(
|
| 122 |
+
metric_type=evaluation_metric_type,
|
| 123 |
+
gt_ner_span=gt_spans,
|
| 124 |
+
pred_ner_span=ner_spans,
|
| 125 |
+
text=text,
|
| 126 |
+
)
|
| 127 |
+
)
|
| 128 |
+
|
| 129 |
+
metrics_df = predictions_df.drop(["ner_spans"], axis=1)
|
| 130 |
+
|
| 131 |
+
st.write(metrics_df.to_html(escape=False), unsafe_allow_html=True)
|
| 132 |
+
print("compared")
|
evaluation_metrics.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from nervaluate import Evaluator
|
| 2 |
+
from sklearn.metrics import classification_report
|
| 3 |
+
|
| 4 |
+
from token_level_output import get_token_output_labels
|
| 5 |
+
|
| 6 |
+
EVALUATION_METRICS = [
|
| 7 |
+
"Span Based Evaluation with Partial Overlap",
|
| 8 |
+
"Token Based Evaluation with Micro Avg",
|
| 9 |
+
"Token Based Evaluation with Macro Avg",
|
| 10 |
+
]
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def get_span_eval(gt_ner_span, pred_ner_span, text):
|
| 14 |
+
evaluator = Evaluator([gt_ner_span], [pred_ner_span], tags=["Disease", "Drug"])
|
| 15 |
+
return round(evaluator.evaluate()[0]["ent_type"]["f1"], 2)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def get_token_micro_eval(gt_ner_span, pred_ner_span, text):
|
| 19 |
+
return round(
|
| 20 |
+
classification_report(
|
| 21 |
+
get_token_output_labels(gt_ner_span, text),
|
| 22 |
+
get_token_output_labels(pred_ner_span, text),
|
| 23 |
+
labels=["Disease", "Drug"],
|
| 24 |
+
output_dict=True,
|
| 25 |
+
)["micro avg"]["f1-score"],
|
| 26 |
+
2,
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def get_token_macro_eval(gt_ner_span, pred_ner_span, text):
|
| 31 |
+
return round(
|
| 32 |
+
classification_report(
|
| 33 |
+
get_token_output_labels(gt_ner_span, text),
|
| 34 |
+
get_token_output_labels(pred_ner_span, text),
|
| 35 |
+
labels=["Disease", "Drug"],
|
| 36 |
+
output_dict=True,
|
| 37 |
+
)["macro avg"]["f1-score"],
|
| 38 |
+
2,
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def get_evaluation_metric(metric_type, gt_ner_span, pred_ner_span, text):
|
| 43 |
+
match metric_type:
|
| 44 |
+
case "Span Based Evaluation with Partial Overlap":
|
| 45 |
+
return get_span_eval(gt_ner_span, pred_ner_span, text)
|
| 46 |
+
case "Token Based Evaluation with Micro Avg":
|
| 47 |
+
return get_token_micro_eval(gt_ner_span, pred_ner_span, text)
|
| 48 |
+
case "Token Based Evaluation with Macro Avg":
|
| 49 |
+
return get_token_macro_eval(gt_ner_span, pred_ner_span, text)
|
predefined_example.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from dataclasses import dataclass
|
| 2 |
+
|
| 3 |
+
from span_dataclass_converters import get_ner_spans_from_annotations
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
@dataclass
|
| 7 |
+
class PredefinedExample:
|
| 8 |
+
text: str
|
| 9 |
+
gt_labels: dict
|
| 10 |
+
# gt_spans: list
|
| 11 |
+
# predictions: list
|
| 12 |
+
|
| 13 |
+
@property
|
| 14 |
+
def gt_spans(self):
|
| 15 |
+
return sorted(
|
| 16 |
+
get_ner_spans_from_annotations(self.gt_labels),
|
| 17 |
+
key=lambda span: span["start"],
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
@property
|
| 21 |
+
def predictions(self):
|
| 22 |
+
return [self.gt_spans]
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
small_example = PredefinedExample(
|
| 26 |
+
text="The patient was diagnosed with bronchitis and was prescribed a mucolytic",
|
| 27 |
+
gt_labels={
|
| 28 |
+
"Disease": [
|
| 29 |
+
{"start": 31, "end": 41, "label": "bronchitis"},
|
| 30 |
+
],
|
| 31 |
+
"Drug": [
|
| 32 |
+
{"start": 63, "end": 72, "label": "mucolytic"},
|
| 33 |
+
],
|
| 34 |
+
},
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
big_example = PredefinedExample(
|
| 38 |
+
text=(
|
| 39 |
+
"The patient was experiencing stomach pain and flu like symptoms for 3 days. "
|
| 40 |
+
"Upon investigation, the chest xray revealed acute bronchitis disease. "
|
| 41 |
+
"The patient was asked to take rest for a week and was prescribed a mucolytic along with paracetamol for body pains."
|
| 42 |
+
),
|
| 43 |
+
gt_labels={
|
| 44 |
+
"Disease": [
|
| 45 |
+
{"start": 120, "end": 144, "label": "acute bronchitis disease"},
|
| 46 |
+
],
|
| 47 |
+
"Drug": [
|
| 48 |
+
{"start": 213, "end": 222, "label": "mucolytic"},
|
| 49 |
+
{"start": 234, "end": 245, "label": "paracetamol"},
|
| 50 |
+
],
|
| 51 |
+
"Symptoms": [
|
| 52 |
+
{"start": 29, "end": 41, "label": "stomach pain"},
|
| 53 |
+
{"start": 46, "end": 63, "label": "flu like symptoms"},
|
| 54 |
+
],
|
| 55 |
+
},
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
EXAMPLES = [small_example, big_example]
|
requirements.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit_annotation_tools
|
| 2 |
+
annotated_text
|
span_dataclass_converters.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def get_ner_spans_from_annotations(annotated_labels):
|
| 2 |
+
spans = []
|
| 3 |
+
for entity_type, spans_list in annotated_labels.items():
|
| 4 |
+
for spans_dict in spans_list:
|
| 5 |
+
ner_span_dict = {
|
| 6 |
+
**spans_dict,
|
| 7 |
+
"label": entity_type,
|
| 8 |
+
"span_text": spans_dict["label"],
|
| 9 |
+
}
|
| 10 |
+
spans.append(ner_span_dict)
|
| 11 |
+
return spans
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def get_highlight_spans_from_ner_spans(ner_spans, parent_text):
|
| 15 |
+
if not ner_spans:
|
| 16 |
+
return [parent_text]
|
| 17 |
+
|
| 18 |
+
output_list = []
|
| 19 |
+
prev_span_end = 0
|
| 20 |
+
# output_list = [parent_text[ner_spans[0]["start"]]]
|
| 21 |
+
for span in ner_spans:
|
| 22 |
+
output_list.append(parent_text[prev_span_end : span["start"]])
|
| 23 |
+
tup = (span["span_text"], span["label"])
|
| 24 |
+
output_list.append(tup)
|
| 25 |
+
prev_span_end = span["end"]
|
| 26 |
+
|
| 27 |
+
if prev_span_end != len(parent_text):
|
| 28 |
+
output_list.append(parent_text[prev_span_end:])
|
| 29 |
+
|
| 30 |
+
return output_list
|
token_level_output.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class WhitespaceTokenSplitter:
|
| 5 |
+
def __init__(self):
|
| 6 |
+
self.whitespace_pattern = re.compile(r"\w+(?:[-_]\w+)*|\S")
|
| 7 |
+
|
| 8 |
+
def __call__(self, text):
|
| 9 |
+
for match in self.whitespace_pattern.finditer(text):
|
| 10 |
+
yield match.group(), match.start(), match.end()
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
tokenizer = WhitespaceTokenSplitter()
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def get_char_label_map(ner_spans: list):
|
| 17 |
+
"""return a dict with char indices(int) as keys and the label they belong to as values
|
| 18 |
+
example -- {1:'label1', 2: 'label1', 5:'label2', 5:'label2'}
|
| 19 |
+
note: the char indices that do not belong to a span do not exist in the map
|
| 20 |
+
"""
|
| 21 |
+
char_label_map = {}
|
| 22 |
+
for span in ner_spans:
|
| 23 |
+
char_label_map = {
|
| 24 |
+
**char_label_map,
|
| 25 |
+
**{
|
| 26 |
+
char_index: span["label"]
|
| 27 |
+
for char_index in range(span["start"], span["end"])
|
| 28 |
+
},
|
| 29 |
+
}
|
| 30 |
+
return char_label_map
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def get_tokens(text: str) -> list[str]:
|
| 34 |
+
tokens_with_offsets = list(tokenizer(text))
|
| 35 |
+
return [token for token, start, end in tokens_with_offsets]
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def get_token_offsets(text: str) -> list[tuple[int, int]]:
|
| 39 |
+
tokens_with_offsets = list(tokenizer(text))
|
| 40 |
+
return [(start, end) for token, start, end in tokens_with_offsets]
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def get_list_of_token_label_tuples(
|
| 44 |
+
tokens: list[str],
|
| 45 |
+
token_spans: list[tuple[int, int]],
|
| 46 |
+
char_label_map: dict[int, str],
|
| 47 |
+
) -> list[tuple[str, str]]:
|
| 48 |
+
"""
|
| 49 |
+
returns a list of tuples with first element as token and second element as the label
|
| 50 |
+
example - [('a', 'O'), ('cat', 'ANIMAL'), ('sits', 'O')]
|
| 51 |
+
note: the label of a token is decided based on the max chars in the token belonging to a span
|
| 52 |
+
"""
|
| 53 |
+
token_labels = []
|
| 54 |
+
for token, offsets in zip(tokens, token_spans):
|
| 55 |
+
if offsets[0] == offsets[1]:
|
| 56 |
+
token_labels.append((token, "O"))
|
| 57 |
+
continue
|
| 58 |
+
char_labels = [
|
| 59 |
+
char_label_map.get(char_index, "O") for char_index in range(*offsets)
|
| 60 |
+
]
|
| 61 |
+
token_label = max(set(char_labels), key=char_labels.count)
|
| 62 |
+
token_labels.append((token, token_label))
|
| 63 |
+
return token_labels
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def get_token_outputs(ner_spans, parent_text):
|
| 67 |
+
char_label_map = get_char_label_map(ner_spans)
|
| 68 |
+
|
| 69 |
+
token_offsets = get_token_offsets(parent_text)
|
| 70 |
+
tokens = get_tokens(parent_text)
|
| 71 |
+
|
| 72 |
+
return get_list_of_token_label_tuples(tokens, token_offsets, char_label_map)
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def get_token_output_labels(ner_spans, parent_text):
|
| 76 |
+
token_output = get_token_outputs(ner_spans, parent_text)
|
| 77 |
+
return [label for token, label in token_output]
|