#!/bin/bash # Function to extract video and audio sections extract_sections() { input_video=$1 base_name=$(basename "$input_video" .mp4) output_dir=$2 split=$3 duration=$(ffmpeg -i "$input_video" 2>&1 | grep Duration | awk '{print $2}' | tr -d ,) IFS=: read -r hours minutes seconds <<< "$duration" total_seconds=$((10#${hours}*3600 + 10#${minutes}*60 + 10#${seconds%.*})) chunk_size=180 # 3 minutes in seconds index=0 mkdir -p "$output_dir" while [ $((index * chunk_size)) -lt $total_seconds ]; do start_time=$((index * chunk_size)) section_video="${output_dir}/${base_name}_part${index}.mp4" section_audio="${output_dir}/${base_name}_part${index}.mp3" ffmpeg -i "$input_video" -ss "$start_time" -t "$chunk_size" -c copy "$section_video" ffmpeg -i "$input_video" -ss "$start_time" -t "$chunk_size" -q:a 0 -map a "$section_audio" # Create and update the config.yaml file echo "task_0:" > config.yaml echo " video_path: \"$section_video\"" >> config.yaml echo " audio_path: \"$section_audio\"" >> config.yaml # Run the Python script with the current config.yaml python -m scripts.data --inference_config config.yaml --folder_name "$base_name" index=$((index + 1)) done # Clean up save folder rm -rf $output_dir } # Main script if [ $# -lt 3 ]; then echo "Usage: $0 " exit 1 fi split=$1 output_dir=$2 shift 2 input_videos=("$@") # Initialize JSON array json_array="[" for input_video in "${input_videos[@]}"; do base_name=$(basename "$input_video" .mp4) # Extract sections and run the Python script for each section extract_sections "$input_video" "$output_dir" "$split" # Add entry to JSON array json_array+="\"../data/images/$base_name\"," done # Remove trailing comma and close JSON array json_array="${json_array%,}]" # Write JSON array to the correct file if [ "$split" == "train" ]; then echo "$json_array" > train.json elif [ "$split" == "test" ]; then echo "$json_array" > test.json else echo "Invalid split: $split. Must be 'train' or 'test'." exit 1 fi echo "Processing complete."