mirror of
https://github.com/snakers4/silero-vad.git
synced 2026-02-05 18:09:22 +08:00
修复CalculateProb方法计算句子EndOffset的bug
修改语法提示
This commit is contained in:
@@ -21,7 +21,7 @@ class Program
|
|||||||
MIN_SPEECH_DURATION_MS, MAX_SPEECH_DURATION_SECONDS, MIN_SILENCE_DURATION_MS, SPEECH_PAD_MS);
|
MIN_SPEECH_DURATION_MS, MAX_SPEECH_DURATION_SECONDS, MIN_SILENCE_DURATION_MS, SPEECH_PAD_MS);
|
||||||
List<SileroSpeechSegment> speechTimeList = vadDetector.GetSpeechSegmentList(new FileInfo(EXAMPLE_WAV_FILE));
|
List<SileroSpeechSegment> speechTimeList = vadDetector.GetSpeechSegmentList(new FileInfo(EXAMPLE_WAV_FILE));
|
||||||
//Console.WriteLine(speechTimeList.ToJson());
|
//Console.WriteLine(speechTimeList.ToJson());
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new();
|
||||||
foreach (var speechSegment in speechTimeList)
|
foreach (var speechSegment in speechTimeList)
|
||||||
{
|
{
|
||||||
sb.Append($"start second: {speechSegment.StartSecond}, end second: {speechSegment.EndSecond}\n");
|
sb.Append($"start second: {speechSegment.StartSecond}, end second: {speechSegment.EndSecond}\n");
|
||||||
|
|||||||
@@ -53,28 +53,26 @@ public class SileroVadDetector
|
|||||||
{
|
{
|
||||||
Reset();
|
Reset();
|
||||||
|
|
||||||
using (var audioFile = new AudioFileReader(wavFile.FullName))
|
using var audioFile = new AudioFileReader(wavFile.FullName);
|
||||||
{
|
List<float> speechProbList = [];
|
||||||
List<float> speechProbList = new List<float>();
|
|
||||||
this._audioLengthSamples = (int)(audioFile.Length / 2);
|
this._audioLengthSamples = (int)(audioFile.Length / 2);
|
||||||
float[] buffer = new float[this._windowSizeSample];
|
float[] buffer = new float[this._windowSizeSample];
|
||||||
|
|
||||||
while (audioFile.Read(buffer, 0, buffer.Length) > 0)
|
while (audioFile.Read(buffer, 0, buffer.Length) > 0)
|
||||||
{
|
{
|
||||||
float speechProb = _model.Call(new[] { buffer }, _samplingRate)[0];
|
float speechProb = _model.Call([buffer], _samplingRate)[0];
|
||||||
speechProbList.Add(speechProb);
|
speechProbList.Add(speechProb);
|
||||||
}
|
}
|
||||||
|
|
||||||
return CalculateProb(speechProbList);
|
return CalculateProb(speechProbList);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
private List<SileroSpeechSegment> CalculateProb(List<float> speechProbList)
|
private List<SileroSpeechSegment> CalculateProb(List<float> speechProbList)
|
||||||
{
|
{
|
||||||
List<SileroSpeechSegment> result = new List<SileroSpeechSegment>();
|
List<SileroSpeechSegment> result = [];
|
||||||
bool triggered = false;
|
bool triggered = false;
|
||||||
int tempEnd = 0, prevEnd = 0, nextStart = 0;
|
int tempEnd = 0, prevEnd = 0, nextStart = 0;
|
||||||
SileroSpeechSegment segment = new SileroSpeechSegment();
|
SileroSpeechSegment segment = new();
|
||||||
|
|
||||||
for (int i = 0; i < speechProbList.Count; i++)
|
for (int i = 0; i < speechProbList.Count; i++)
|
||||||
{
|
{
|
||||||
@@ -164,7 +162,8 @@ public class SileroVadDetector
|
|||||||
|
|
||||||
if (segment.StartOffset != null && (_audioLengthSamples - segment.StartOffset) > _minSpeechSamples)
|
if (segment.StartOffset != null && (_audioLengthSamples - segment.StartOffset) > _minSpeechSamples)
|
||||||
{
|
{
|
||||||
segment.EndOffset = _audioLengthSamples;
|
//segment.EndOffset = _audioLengthSamples;
|
||||||
|
segment.EndOffset = speechProbList.Count * _windowSizeSample;
|
||||||
result.Add(segment);
|
result.Add(segment);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -182,7 +181,7 @@ public class SileroVadDetector
|
|||||||
int silenceDuration = nextItem.StartOffset.Value - item.EndOffset.Value;
|
int silenceDuration = nextItem.StartOffset.Value - item.EndOffset.Value;
|
||||||
if (silenceDuration < 2 * _speechPadSamples)
|
if (silenceDuration < 2 * _speechPadSamples)
|
||||||
{
|
{
|
||||||
item.EndOffset = item.EndOffset + (silenceDuration / 2);
|
item.EndOffset += (silenceDuration / 2);
|
||||||
nextItem.StartOffset = Math.Max(0, nextItem.StartOffset.Value - (silenceDuration / 2));
|
nextItem.StartOffset = Math.Max(0, nextItem.StartOffset.Value - (silenceDuration / 2));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@@ -200,9 +199,9 @@ public class SileroVadDetector
|
|||||||
return MergeListAndCalculateSecond(result, _samplingRate);
|
return MergeListAndCalculateSecond(result, _samplingRate);
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<SileroSpeechSegment> MergeListAndCalculateSecond(List<SileroSpeechSegment> original, int samplingRate)
|
private static List<SileroSpeechSegment> MergeListAndCalculateSecond(List<SileroSpeechSegment> original, int samplingRate)
|
||||||
{
|
{
|
||||||
List<SileroSpeechSegment> result = new List<SileroSpeechSegment>();
|
List<SileroSpeechSegment> result = [];
|
||||||
if (original == null || original.Count == 0)
|
if (original == null || original.Count == 0)
|
||||||
{
|
{
|
||||||
return result;
|
return result;
|
||||||
@@ -216,7 +215,10 @@ public class SileroVadDetector
|
|||||||
for (int i = 1; i < original.Count; i++)
|
for (int i = 1; i < original.Count; i++)
|
||||||
{
|
{
|
||||||
SileroSpeechSegment segment = original[i];
|
SileroSpeechSegment segment = original[i];
|
||||||
|
if (i == 235)
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
||||||
if (segment.StartOffset > right)
|
if (segment.StartOffset > right)
|
||||||
{
|
{
|
||||||
result.Add(new SileroSpeechSegment(left, right,
|
result.Add(new SileroSpeechSegment(left, right,
|
||||||
@@ -242,7 +244,7 @@ public class SileroVadDetector
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
private float CalculateSecondByOffset(int offset, int samplingRate)
|
private static float CalculateSecondByOffset(int offset, int samplingRate)
|
||||||
{
|
{
|
||||||
float secondValue = offset * 1.0f / samplingRate;
|
float secondValue = offset * 1.0f / samplingRate;
|
||||||
return (float)Math.Floor(secondValue * 1000.0f) / 1000.0f;
|
return (float)Math.Floor(secondValue * 1000.0f) / 1000.0f;
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
using Microsoft.ML.OnnxRuntime;
|
using Microsoft.ML.OnnxRuntime;
|
||||||
using Microsoft.ML.OnnxRuntime.Tensors;
|
using Microsoft.ML.OnnxRuntime.Tensors;
|
||||||
|
|
||||||
using System;
|
using System;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using System.Linq;
|
using System.Linq;
|
||||||
@@ -7,21 +8,23 @@ using System.Linq;
|
|||||||
namespace VADdotnet;
|
namespace VADdotnet;
|
||||||
|
|
||||||
|
|
||||||
public class SileroVadOnnxModel : IDisposable
|
public class SileroVadOnnxModel : IDisposable
|
||||||
{
|
{
|
||||||
private readonly InferenceSession session;
|
private readonly InferenceSession session;
|
||||||
private float[][][] state;
|
private float[][][] state;
|
||||||
private float[][] context;
|
private float[][] context;
|
||||||
private int lastSr = 0;
|
private int lastSr = 0;
|
||||||
private int lastBatchSize = 0;
|
private int lastBatchSize = 0;
|
||||||
private static readonly List<int> SAMPLE_RATES = new List<int> { 8000, 16000 };
|
private static readonly List<int> SAMPLE_RATES = [8000, 16000];
|
||||||
|
|
||||||
public SileroVadOnnxModel(string modelPath)
|
public SileroVadOnnxModel(string modelPath)
|
||||||
{
|
{
|
||||||
var sessionOptions = new SessionOptions();
|
var sessionOptions = new SessionOptions
|
||||||
sessionOptions.InterOpNumThreads = 1;
|
{
|
||||||
sessionOptions.IntraOpNumThreads = 1;
|
InterOpNumThreads = 1,
|
||||||
sessionOptions.EnableCpuMemArena = true;
|
IntraOpNumThreads = 1,
|
||||||
|
EnableCpuMemArena = true
|
||||||
|
};
|
||||||
|
|
||||||
session = new InferenceSession(modelPath, sessionOptions);
|
session = new InferenceSession(modelPath, sessionOptions);
|
||||||
ResetStates();
|
ResetStates();
|
||||||
@@ -34,33 +37,27 @@ namespace VADdotnet;
|
|||||||
state[1] = new float[1][];
|
state[1] = new float[1][];
|
||||||
state[0][0] = new float[128];
|
state[0][0] = new float[128];
|
||||||
state[1][0] = new float[128];
|
state[1][0] = new float[128];
|
||||||
context = Array.Empty<float[]>();
|
context = [];
|
||||||
lastSr = 0;
|
lastSr = 0;
|
||||||
lastBatchSize = 0;
|
lastBatchSize = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void Dispose()
|
public void Dispose()
|
||||||
{
|
{
|
||||||
session?.Dispose();
|
GC.SuppressFinalize(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
public class ValidationResult
|
public class ValidationResult(float[][] x, int sr)
|
||||||
{
|
{
|
||||||
public float[][] X { get; }
|
public float[][] X { get; } = x;
|
||||||
public int Sr { get; }
|
public int Sr { get; } = sr;
|
||||||
|
|
||||||
public ValidationResult(float[][] x, int sr)
|
|
||||||
{
|
|
||||||
X = x;
|
|
||||||
Sr = sr;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private ValidationResult ValidateInput(float[][] x, int sr)
|
private static ValidationResult ValidateInput(float[][] x, int sr)
|
||||||
{
|
{
|
||||||
if (x.Length == 1)
|
if (x.Length == 1)
|
||||||
{
|
{
|
||||||
x = new float[][] { x[0] };
|
x = [x[0]];
|
||||||
}
|
}
|
||||||
if (x.Length > 2)
|
if (x.Length > 2)
|
||||||
{
|
{
|
||||||
@@ -186,13 +183,12 @@ namespace VADdotnet;
|
|||||||
|
|
||||||
var inputs = new List<NamedOnnxValue>
|
var inputs = new List<NamedOnnxValue>
|
||||||
{
|
{
|
||||||
NamedOnnxValue.CreateFromTensor("input", new DenseTensor<float>(x.SelectMany(a => a).ToArray(), new[] { x.Length, x[0].Length })),
|
NamedOnnxValue.CreateFromTensor("input", new DenseTensor<float>(x.SelectMany(a => a).ToArray(), [x.Length, x[0].Length])),
|
||||||
NamedOnnxValue.CreateFromTensor("sr", new DenseTensor<long>(new[] { (long)sr }, new[] { 1 })),
|
NamedOnnxValue.CreateFromTensor("sr", new DenseTensor<long>(new[] { (long)sr }, [1])),
|
||||||
NamedOnnxValue.CreateFromTensor("state", new DenseTensor<float>(state.SelectMany(a => a.SelectMany(b => b)).ToArray(), new[] { state.Length, state[0].Length, state[0][0].Length }))
|
NamedOnnxValue.CreateFromTensor("state", new DenseTensor<float>(state.SelectMany(a => a.SelectMany(b => b)).ToArray(), [state.Length, state[0].Length, state[0][0].Length]))
|
||||||
};
|
};
|
||||||
|
|
||||||
using (var outputs = session.Run(inputs))
|
using var outputs = session.Run(inputs);
|
||||||
{
|
|
||||||
var output = outputs.First(o => o.Name == "output").AsTensor<float>();
|
var output = outputs.First(o => o.Name == "output").AsTensor<float>();
|
||||||
var newState = outputs.First(o => o.Name == "stateN").AsTensor<float>();
|
var newState = outputs.First(o => o.Name == "stateN").AsTensor<float>();
|
||||||
|
|
||||||
@@ -214,7 +210,6 @@ namespace VADdotnet;
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return output.ToArray();
|
return [.. output];
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|||||||
25
examples/csharp/VadDotNet.sln
Normal file
25
examples/csharp/VadDotNet.sln
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
|
||||||
|
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||||
|
# Visual Studio Version 17
|
||||||
|
VisualStudioVersion = 17.14.36616.10 d17.14
|
||||||
|
MinimumVisualStudioVersion = 10.0.40219.1
|
||||||
|
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "VadDotNet", "VadDotNet.csproj", "{F36E1741-EDDB-90C7-7501-4911058F8996}"
|
||||||
|
EndProject
|
||||||
|
Global
|
||||||
|
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||||
|
Debug|Any CPU = Debug|Any CPU
|
||||||
|
Release|Any CPU = Release|Any CPU
|
||||||
|
EndGlobalSection
|
||||||
|
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||||
|
{F36E1741-EDDB-90C7-7501-4911058F8996}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||||
|
{F36E1741-EDDB-90C7-7501-4911058F8996}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
|
{F36E1741-EDDB-90C7-7501-4911058F8996}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
|
{F36E1741-EDDB-90C7-7501-4911058F8996}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
|
EndGlobalSection
|
||||||
|
GlobalSection(SolutionProperties) = preSolution
|
||||||
|
HideSolutionNode = FALSE
|
||||||
|
EndGlobalSection
|
||||||
|
GlobalSection(ExtensibilityGlobals) = postSolution
|
||||||
|
SolutionGuid = {DFC4CEE8-1034-46B4-A5F4-D1649B3543E6}
|
||||||
|
EndGlobalSection
|
||||||
|
EndGlobal
|
||||||
Reference in New Issue
Block a user