adamnsandle
2023-04-28 11:48:01 +00:00
parent c3c67cdcb8
commit a9d2b591de
2 changed files with 4 additions and 5 deletions

Binary file not shown.

View File

@@ -13,11 +13,11 @@ class OnnxWrapper():
import numpy as np import numpy as np
global np global np
import onnxruntime import onnxruntime
opts = onnxruntime.SessionOptions() opts = onnxruntime.SessionOptions()
opts.inter_op_num_threads = 1 opts.inter_op_num_threads = 1
opts.intra_op_num_threads = 1 opts.intra_op_num_threads = 1
if force_onnx_cpu and 'CPUExecutionProvider' in onnxruntime.get_available_providers(): if force_onnx_cpu and 'CPUExecutionProvider' in onnxruntime.get_available_providers():
self.session = onnxruntime.InferenceSession(path, providers=['CPUExecutionProvider'], sess_options=opts) self.session = onnxruntime.InferenceSession(path, providers=['CPUExecutionProvider'], sess_options=opts)
else: else:
@@ -34,7 +34,7 @@ class OnnxWrapper():
if sr != 16000 and (sr % 16000 == 0): if sr != 16000 and (sr % 16000 == 0):
step = sr // 16000 step = sr // 16000
x = x[::step] x = x[:, ::step]
sr = 16000 sr = 16000
if sr not in self.sample_rates: if sr not in self.sample_rates:
@@ -291,7 +291,7 @@ def get_speech_timestamps(audio: torch.Tensor,
triggered = True triggered = True
current_speech['start'] = window_size_samples * i current_speech['start'] = window_size_samples * i
continue continue
if triggered and (window_size_samples * i) - current_speech['start'] > max_speech_samples: if triggered and (window_size_samples * i) - current_speech['start'] > max_speech_samples:
if prev_end: if prev_end:
current_speech['end'] = prev_end current_speech['end'] = prev_end
@@ -309,7 +309,6 @@ def get_speech_timestamps(audio: torch.Tensor,
prev_end = next_start = temp_end = 0 prev_end = next_start = temp_end = 0
triggered = False triggered = False
continue continue
if (speech_prob < neg_threshold) and triggered: if (speech_prob < neg_threshold) and triggered:
if not temp_end: if not temp_end: