fix: floor (#293)

This commit is contained in:
Chenghao Mou
2025-04-04 06:04:56 +01:00
committed by GitHub
parent 23d88dcfb9
commit e636166b85

View File

@@ -1,12 +1,13 @@
import os
import math import math
import os
import librosa import librosa
import numpy as np import numpy as np
import torch import torch
from einops import rearrange from einops import rearrange
from transformers import AutoFeatureExtractor from transformers import AutoFeatureExtractor
class AudioProcessor: class AudioProcessor:
def __init__(self, feature_extractor_path="openai/whisper-tiny/"): def __init__(self, feature_extractor_path="openai/whisper-tiny/"):
self.feature_extractor = AutoFeatureExtractor.from_pretrained(feature_extractor_path) self.feature_extractor = AutoFeatureExtractor.from_pretrained(feature_extractor_path)
@@ -58,8 +59,8 @@ class AudioProcessor:
audio_fps = 50 audio_fps = 50
fps = int(fps) fps = int(fps)
whisper_idx_multiplier = audio_fps / fps whisper_idx_multiplier = audio_fps / fps
num_frames = math.floor((librosa_length / sr)) * fps num_frames = math.floor((librosa_length / sr) * fps)
actual_length = math.floor((librosa_length / sr)) * audio_fps actual_length = math.floor((librosa_length / sr) * audio_fps)
whisper_feature = whisper_feature[:,:actual_length,...] whisper_feature = whisper_feature[:,:actual_length,...]
# Calculate padding amount # Calculate padding amount
@@ -98,4 +99,3 @@ if __name__ == "__main__":
print("Audio Feature shape:", audio_feature.shape) print("Audio Feature shape:", audio_feature.shape)
print("librosa_feature_length:", librosa_feature_length) print("librosa_feature_length:", librosa_feature_length)