mirror of
https://github.com/FunAudioLLM/CosyVoice.git
synced 2026-02-05 18:09:24 +08:00
Merge pull request #639 from FunAudioLLM/dev/lyuxiang.lx
use stream read to save memory
This commit is contained in:
@@ -40,7 +40,8 @@ def parquet_opener(data, mode='train', tts_data={}):
|
|||||||
assert 'src' in sample
|
assert 'src' in sample
|
||||||
url = sample['src']
|
url = sample['src']
|
||||||
try:
|
try:
|
||||||
df = pq.read_table(url).to_pandas()
|
for df in pq.ParquetFile(url).iter_batches(batch_size=64):
|
||||||
|
df = df.to_pandas()
|
||||||
for i in range(len(df)):
|
for i in range(len(df)):
|
||||||
if mode == 'inference' and df.loc[i, 'utt'] not in tts_data:
|
if mode == 'inference' and df.loc[i, 'utt'] not in tts_data:
|
||||||
continue
|
continue
|
||||||
|
|||||||
Reference in New Issue
Block a user