Add interpolation methods and refactor

parent 20c57790
from magenta.models.nsynth import utils
from magenta.models.nsynth.wavenet import fastgen
import numpy as np
def load_encoding(fname, sample_length=None, sr=16000, ckpt='wavenet-ckpt/model.ckpt-200000'):
# load the audio file
audio = utils.load_audio(fname, sample_length=sample_length, sr=sr)
# encode audio
# encode
encoding = fastgen.encode(audio, ckpt, sample_length)
return audio, encoding
def synthesize():
# wav file from https://commons.wikimedia.org/wiki/File:Drum_(avk).wav
fname = 'Drum_(avk).wav'
ckpt = 'wavenet-ckpt/model.ckpt-200000'
sample_length = 100000
audio, encoding = load_encoding(fname, sample_length=sample_length, ckpt=ckpt)
def fade(encoding, mode='in'):
# taken from nsynth demo notebook
# https://github.com/magenta/magenta-demos/blob/main/jupyter-notebooks/NSynth.ipynb
length = encoding.shape[1]
fadein = (0.5 * (1.0 - np.cos(3.1415 * np.arange(length) /
float(length)))).reshape(1, -1, 1)
if mode == 'in':
return fadein * encoding
return (1.0 - fadein) * encoding
def cross_fade(encoding1, encoding2):
return fade(encoding1, 'out') + fade(encoding2, 'in')
def mashup(fname1, fname2, sample_length=100000):
print('mashing up two files')
# fname = 'Drum_(avk).wav'
# sample_length = 100000 # get first second of each file
audio1, encoding1 = load_encoding(fname1, sample_length=sample_length)
audio2, encoding2 = load_encoding(fname2, sample_length=sample_length)
mashed_encodings = cross_fade(encoding1, encoding2)
return mashed_encodings
def synthesize(encoding, new_fname, ckpt='wavenet-ckpt/model.ckpt-200000', sample_length=100000):
print('lets synthesize!!!')
fastgen.synthesize(encoding, save_paths=['new-audio.wav'], checkpoint_path=ckpt, samples_per_save=sample_length)
fastgen.synthesize(encoding, save_paths=['gen_'+new_fname], checkpoint_path=ckpt, samples_per_save=sample_length)
if __name__ == "__main__":
fname1 = "snd/Fårlock_-_SMV_-_SVA_BB_5335_45.wav"
fname2 = "snd/Intervju_om_lekar_-_SMV_-_SVA_BB_5325_29.wav"
# create interpolated encodings
encoding = mashup(fname1, fname2)
# merge file names for new audio file
new_fname = fname1.split('/')[1] + fname2.split('/')[1]
# synthesize
synthesize(encoding, new_fname)
