<enhance>: modified inference codes

1. Can set bbox_shift in configs/inference/test.yaml
2. Do not need to pip install whisper now
This commit is contained in:
zkangchen
2024-04-03 14:35:55 +08:00
parent dde2ee49ef
commit bc1379abad
18 changed files with 28 additions and 96 deletions

View File

@@ -52,7 +52,6 @@ def get_image(image,face,face_box,upper_boundary_ratio = 0.5,expand=1.2):
blur_kernel_size = int(0.1 * ori_shape[0] // 2 * 2) + 1
mask_array = cv2.GaussianBlur(np.array(modified_mask_image), (blur_kernel_size, blur_kernel_size), 0)
mask_image = Image.fromarray(mask_array)
mask_image.save("./debug_mask.png")
face_large.paste(face, (x-x_s, y-y_s, x1-x_s, y1-y_s))
body.paste(face_large, crop_box[:2], mask_image)

View File

@@ -1,7 +1,5 @@
import os
#import whisper
from whisper import load_model
#import whisper.whispher as whiisper
from .whisper import load_model
import soundfile as sf
import numpy as np
import time
@@ -9,11 +7,12 @@ import sys
sys.path.append("..")
class Audio2Feature():
def __init__(self, whisper_model_type="tiny",model_path="./checkpoints/wisper_tiny.pt"):
def __init__(self,
whisper_model_type="tiny",
model_path="./models/whisper/tiny.pt"):
self.whisper_model_type = whisper_model_type
self.model = load_model(model_path) #
def get_sliced_feature(self,feature_array, vid_idx, audio_feat_length= [2,2],fps = 25):
"""
Get sliced features based on a given index

View File

@@ -1,6 +0,0 @@
numpy
torch
tqdm
more-itertools
transformers>=4.19.0
ffmpeg-python==0.2.0

View File

@@ -1,24 +0,0 @@
import os
import pkg_resources
from setuptools import setup, find_packages
setup(
name="whisper",
py_modules=["whisper"],
version="1.0",
description="",
author="OpenAI",
packages=find_packages(exclude=["tests*"]),
install_requires=[
str(r)
for r in pkg_resources.parse_requirements(
open(os.path.join(os.path.dirname(__file__), "requirements.txt"))
)
],
entry_points = {
'console_scripts': ['whisper=whisper.transcribe:cli'],
},
include_package_data=True,
extras_require={'dev': ['pytest']},
)

View File

@@ -1,5 +0,0 @@
Metadata-Version: 2.1
Name: whisper
Version: 1.0
Author: OpenAI
Provides-Extra: dev

View File

@@ -1,18 +0,0 @@
setup.py
whisper/__init__.py
whisper/__main__.py
whisper/audio.py
whisper/decoding.py
whisper/model.py
whisper/tokenizer.py
whisper/transcribe.py
whisper/utils.py
whisper.egg-info/PKG-INFO
whisper.egg-info/SOURCES.txt
whisper.egg-info/dependency_links.txt
whisper.egg-info/entry_points.txt
whisper.egg-info/requires.txt
whisper.egg-info/top_level.txt
whisper/normalizers/__init__.py
whisper/normalizers/basic.py
whisper/normalizers/english.py

View File

@@ -1,2 +0,0 @@
[console_scripts]
whisper = whisper.transcribe:cli

View File

@@ -1,9 +0,0 @@
numpy
torch
tqdm
more-itertools
transformers>=4.19.0
ffmpeg-python==0.2.0
[dev]
pytest

View File

@@ -1 +0,0 @@
whisper