Usage
import numpy as np
import onnxruntime as ort
from transformers import AutoTokenizer
from huggingface_hub import hf_hub_download
import time
class SaudiEOU:
def __init__(self, repo_id="mohamedsamyy/Saudi-EOU"):
print(f"Loading model from repo: {repo_id}")
model_path = hf_hub_download(repo_id=repo_id, filename="Saudi_EOU.onnx")
self.tokenizer = AutoTokenizer.from_pretrained(repo_id)
self.session = ort.InferenceSession(model_path, providers=["CUDAExecutionProvider"])
self.max_length = 128
print("โ
Model and tokenizer loaded successfully.")
def predict(self, text: str) -> tuple:
inputs = self.tokenizer(text, truncation=True, max_length=self.max_length, return_tensors="np")
feed_dict = {"input_ids": inputs["input_ids"], "attention_mask": inputs["attention_mask"]}
start = time.perf_counter()
outputs = self.session.run(None, feed_dict)
logits = outputs[0][0]
confidence = self._sigmoid(logits[0])
end = time.perf_counter()
print(f"'{text}' -> latency: {end - start:.4f}s")
predicted_label = 1 if confidence >= 0.5 else 0
return predicted_label, confidence
def _sigmoid(self, x):
return 1 / (1 + np.exp(-x))
# Example usage
detector = SaudiEOU()
sentences = ["ุญูุงู ุงููู", "ู
ู
ู
", "ุงููุง", "ูุง ููุง ", "ุงูุณูุงู
ุนูููู
"]
for sentence in sentences:
predicted_label, confidence = detector.predict(sentence)
result = "End of Turn" if predicted_label == 1 else "Not End of Turn"
print(f"'{sentence}' -> {result} (confidence: {confidence:.3f})")
This example shows how to load the SaudiEOU ONNX model from the Hugging Face Hub and predict if a sentence is an end-of-turn utterance. The model runs on GPU if available, and prints the latency per sentence.
Model tree for mohamedsamyy/Saudi-EOU
Base model
aubmindlab/bert-base-arabertv02