mirror of
https://github.com/ParisNeo/lollms-webui.git
synced 2025-01-04 19:44:09 +00:00
126 lines
3.1 KiB
Python
126 lines
3.1 KiB
Python
import os
|
|
import tkinter as tk
|
|
|
|
import customtkinter as ctk
|
|
import torch
|
|
import torchaudio
|
|
import vlc
|
|
from tkVideoPlayer import TkinterVideo
|
|
from tortoise.api import TextToSpeech
|
|
from tortoise.utils.audio import load_voice
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
|
# Setup the app
|
|
app = tk.Tk()
|
|
app.geometry("600x550")
|
|
app.title("Rap God v2.0")
|
|
ctk.set_appearance_mode("dark")
|
|
|
|
promptFrame = tk.Frame()
|
|
promptFrame.pack(padx=10, pady=10)
|
|
buttonFrame = tk.Frame()
|
|
buttonFrame.pack()
|
|
|
|
prompt = ctk.CTkEntry(
|
|
promptFrame, height=40, width=300, text_color="black", fg_color="white"
|
|
)
|
|
prompt.pack(side="left", padx=10)
|
|
lyrics = ctk.CTkEntry(None, height=240, width=500, text_color="black", fg_color="white")
|
|
lyrics.pack()
|
|
|
|
|
|
def generateText():
|
|
model = AutoModelForCausalLM.from_pretrained("stormzy").to("cuda")
|
|
tokenizer = AutoTokenizer.from_pretrained("distilgpt2", use_fast=True)
|
|
tokens = tokenizer.encode(prompt.get(), return_tensors="pt")
|
|
tokens = tokens.to("cuda")
|
|
attn_mask = torch.ones_like(tokens)
|
|
out = model.generate(
|
|
tokens,
|
|
attention_mask=attn_mask,
|
|
num_beams=5,
|
|
early_stopping=True,
|
|
max_length=200,
|
|
no_repeat_ngram_size=2,
|
|
)
|
|
rap = tokenizer.decode(out[0])
|
|
lyrics.delete(0, tk.END)
|
|
lyrics.insert(0, rap)
|
|
|
|
|
|
def generateAudio():
|
|
voice_samples, conditioning_latents = load_voice(
|
|
"stormzy", extra_voice_dirs=["stormzy_samples"]
|
|
)
|
|
tts = TextToSpeech()
|
|
gen = tts.tts_with_preset(
|
|
lyrics.get(),
|
|
voice_samples=voice_samples,
|
|
conditioning_latents=conditioning_latents,
|
|
preset="ultra_fast",
|
|
)
|
|
torchaudio.save("generated.wav", gen.squeeze(0).cpu(), 24000)
|
|
|
|
|
|
def playAudio():
|
|
if os.path.exists("generated.wav"):
|
|
p = vlc.MediaPlayer("file:///generated.wav")
|
|
p.play()
|
|
|
|
|
|
videoplayer = TkinterVideo(master=app, scaled=True, keep_aspect=True)
|
|
|
|
|
|
def generateVideo():
|
|
os.system("xcopy /y generated.wav .\MakeItTalk\examples")
|
|
os.system("cd MakeItTalk & python generate.py")
|
|
|
|
if os.path.exists("generated.wav"):
|
|
p = vlc.MediaPlayer("file:///generated.wav")
|
|
p.play()
|
|
|
|
videoplayer.load("MakeItTalk\examples\stormzy_pred_fls_generated_audio_embed.mp4")
|
|
videoplayer.pack(fill="both", expand=True)
|
|
videoplayer.play()
|
|
|
|
|
|
genTextButton = ctk.CTkButton(
|
|
promptFrame,
|
|
height=40,
|
|
width=120,
|
|
text_color="black",
|
|
text="Generate",
|
|
command=generateText,
|
|
)
|
|
genTextButton.pack(side="right")
|
|
genAudioButton = ctk.CTkButton(
|
|
buttonFrame,
|
|
height=40,
|
|
width=120,
|
|
text_color="black",
|
|
text="Syn Audio",
|
|
command=generateAudio,
|
|
)
|
|
genAudioButton.pack(side="left", padx=10)
|
|
playAudioButton = ctk.CTkButton(
|
|
buttonFrame,
|
|
height=40,
|
|
width=120,
|
|
text_color="black",
|
|
text="Play Rap",
|
|
command=playAudio,
|
|
)
|
|
playAudioButton.pack(side="left", padx=10)
|
|
genVideoButton = ctk.CTkButton(
|
|
buttonFrame,
|
|
height=40,
|
|
width=120,
|
|
text_color="black",
|
|
text="Syn Video",
|
|
command=generateVideo,
|
|
)
|
|
genVideoButton.pack(side="left", padx=10)
|
|
|
|
# Run the app
|
|
app.mainloop()
|