Commit e883733a authored by Edward Tang's avatar Edward Tang
Browse files

first scraper done

parent fd219f3e
No related merge requests found
Showing with 370 additions and 63 deletions
+370 -63
File added
......@@ -2,6 +2,7 @@ import os
import random
import gdown
import numpy as np
import moviepy
from moviepy.editor import *
from moviepy.editor import concatenate_videoclips
# from melo.api import TTS
......@@ -11,69 +12,95 @@ from moviepy.editor import VideoFileClip, ImageClip, CompositeVideoClip, concate
from PIL import Image, ImageFilter
import numpy as np
from flask import Flask, flash, request, redirect, url_for, jsonify, Response
import subprocess
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse, unquote
from scraper import Scraper
import openai
app = Flask(__name__)
def generate_speech(text, output_wav_path):
# Initialize TTS model
tts = TTS(language='EN', device="cuda" if torch.cuda.is_available() else "cpu")
speaker_ids = tts.hps.data.spk2id
tts.tts_to_file(text, speaker_ids['EN-US'], output_wav_path, 1.0)
print(f"Generated speech saved at {output_wav_path}")
def lip_sync_video_retalking(input_video_path, audio_path):
# Check if the necessary directories exist
print(f"Input video path: {input_video_path}")
print(f"Audio path: {audio_path}")
%cd /content/video-retalking
!python inference.py \
--face "/content/sample_video.mp4" \
--audio "/content/output_audio.mp3" \
--outfile '/content/output_high_qual.mp4'
def create_slideshow(image_urls, duration_per_image=3, width=1080, height=1920):
def blur_image(image_path, blur_radius=20):
img = Image.open(image_path)
blurred_img = img.filter(ImageFilter.GaussianBlur(blur_radius))
return np.array(blurred_img)
clips = []
original_images = [] # Store the original image clips for bottom-left placement
for img_url in image_urls:
# Load and resize the original foreground image
original_image = ImageClip(img_url).resize(height=height * 0.5).set_duration(duration_per_image)
original_images.append(original_image) # Save the original image clip
# Create a blurred background
blurred_bg_array = blur_image(img_url)
background = ImageClip(blurred_bg_array).resize((width, height)).set_duration(duration_per_image)
# Center the original image on the blurred background
foreground = original_image.set_position('center')
combined = CompositeVideoClip([background, foreground])
clips.append(combined)
# Concatenate all clips to form the slideshow
slideshow = concatenate_videoclips(clips, method="compose")
return slideshow, original_images
def process_pipeline(text, video_path, image_urls ):
# Step 1: Generate speech from text
wav_file = "/content/output_audio.mp3"
generate_speech(text, wav_file)
# # Step 2: Perform lip-sync
# lip_synced_video_path = "/content/Wav2Lip/results/result_voice.mp4"
# lip_sync_video(video_path, wav_file)
lip_synced_video_path = '/content/output_high_qual.mp4'
lip_sync_video_retalking(video_path, wav_file)
# Step 3: Create a background slideshow from images
slideshow_video, original_images = create_slideshow(image_urls, duration_per_image=7)
# # Step 4: Combine everything into one video
# # combine_video_and_background(lip_synced_video_path, slideshow_video, '/content/Wav2Lip/results/result_video_final2.mp4')
combine_video_and_background2('/content/green_screen_video.mp4',lip_synced_video_path , slideshow_video, original_images, '/content/result_video_final2.mp4')
app = Flask(__name__)
@app.route('/scrape', methods=['POST'])
def scrape():
scraper = Scraper()
url = request.json['url']
if not url:
return jsonify(status="error", message="No URL provided."), 400
# Decode the URL if it's percent-encoded
url = unquote(url)
# Scrape the homepage
homepage_data = scraper.scrape_page(url)
if "error" in homepage_data:
return jsonify(status="error", message=homepage_data["error"]), 500
# Scrape 1-level-down links
all_text_content = homepage_data["text"]
all_images = homepage_data["images"]
for link in homepage_data["links"]:
link_data = scraper.scrape_page(link)
if "text" in link_data and "images" in link_data:
all_text_content += " " + link_data["text"]
all_images.extend(link_data["images"])
# Remove duplicates from all_images while preserving order
all_images = list(dict.fromkeys(all_images))
return jsonify(
{"text": all_text_content, "images": all_images}
)
# Initialize OpenAI API key from environment variables
openai.api_key = "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"
@app.route('/generate_summary', methods=['POST'])
def generate_summary():
if request.method != 'POST':
return jsonify(error=f"Method {request.method} Not Allowed"), 405
# Extract 'prompt' from the request body
data = request.get_json()
prompt = data.get('prompt')
if not prompt:
return jsonify(error="Prompt is required."), 400
try:
# Call the OpenAI API to generate a chat completion
response = openai.ChatCompletion.create(
model='gpt-4o-mini', # Use the appropriate model available to you
messages=[
{"role": "system", "content": "You are a creative assistant specializing in advertisement scripts. Follow the prompt and tone exactly. Only generate the content that will be spoken out by the narrator."},
{"role": "user", "content": prompt},
],
max_tokens=1500, # Limit the response size
temperature=0.7, # Adjust for creativity
)
# Extract the content of the response
summary = response.choices[0]['message']['content'].strip()
if not summary:
raise ValueError("No content generated.")
return jsonify(summary=summary), 200
except Exception as e:
# Log the error and return a 500 Internal Server Error
print(f"Error generating summary: {e}")
return jsonify(error="Failed to generate summary."), 500
@app.route('/generate-video', methods=['POST'])
def generate_video():
return
if __name__ == '__main__':
app.run(host = "0.0.0.0",port=3333, debug = True, threaded = True)
\ No newline at end of file
import os
import random
import gdown
import numpy as np
from moviepy.editor import *
from moviepy.editor import concatenate_videoclips
from melo.api import TTS
import torch
import cv2
from moviepy.editor import VideoFileClip, CompositeVideoClip, concatenate_videoclips, ImageClip, AudioFileClip, CompositeAudioClip
from PIL import Image, ImageFilter
from flask import Flask, flash, request, redirect, url_for, jsonify, Response
import subprocess
import moviepy
import moviepy.editor as mp
class LipSyncGenerator:
def __init__(self):
pass
def generate_speech(self, text, output_wav_path):
# Initialize TTS model
tts = TTS(language='EN', device="cuda" if torch.cuda.is_available() else "cpu")
speaker_ids = tts.hps.data.spk2id
tts.tts_to_file(text, speaker_ids['EN-US'], output_wav_path, 1.0)
print(f"Generated speech saved at {output_wav_path}")
def lip_sync_video_retalking(self, input_video_path, audio_path, output_path):
# Print the paths for debugging
print(f"Input video path: {input_video_path}")
print(f"Audio path: {audio_path}")
# Change directory to /content/video-retalking (or any appropriate directory on your local machine)
os.chdir('/path/to/video-retalking') # Replace with the actual path on your machine
# Run the inference script using subprocess
command = [
'python', 'inference.py',
'--face', input_video_path,
'--audio', audio_path,
'--outfile', output_path
]
subprocess.run(command, check=True)
def create_slideshow(self, image_urls, duration_per_image=3, width=1080, height=1920):
def blur_image(image_path, blur_radius=20):
img = Image.open(image_path)
blurred_img = img.filter(ImageFilter.GaussianBlur(blur_radius))
return np.array(blurred_img)
clips = []
original_images = [] # Store the original image clips for bottom-left placement
for img_url in image_urls:
# Load and resize the original foreground image
original_image = ImageClip(img_url).resize(height=height * 0.5).set_duration(duration_per_image)
original_images.append(original_image) # Save the original image clip
# Create a blurred background
blurred_bg_array = blur_image(img_url)
background = ImageClip(blurred_bg_array).resize((width, height)).set_duration(duration_per_image)
# Center the original image on the blurred background
foreground = original_image.set_position('center')
combined = CompositeVideoClip([background, foreground])
clips.append(combined)
# Concatenate all clips to form the slideshow
slideshow = concatenate_videoclips(clips, method="compose")
return slideshow, original_images
# Chroma Keying (Green Screen Replacement) with dynamic background
def chroma_key(self, clip, background_clip, color_key=(129, 204, 56), threshold=100, softness=10):
# Apply the mask_color effect for chroma key (green screen removal)
masked_clip = clip.fx(mp.vfx.mask_color, color=color_key, thr=threshold, s=softness)
# Ensure the mask clip is resized properly (in case it's smaller or larger than background)
masked_clip = masked_clip.set_pos(('left', 'bottom'))
# Composite the video with the background
final_clip = CompositeVideoClip([background_clip, masked_clip])
return final_clip
def combine_video_and_background2(self, lip_synced_video_path, green_screen_video_path, slideshow_video, original_images, output_video_path, output_audio, bgMusic, switch_interval=7):
# Load the lip-synced video (original with intact background)
lip_synced_video = VideoFileClip(lip_synced_video_path)
# Load the green screen version of the video
green_screen_video = VideoFileClip(green_screen_video_path)
# Calculate the number of switches based on the shorter duration
min_duration = min(lip_synced_video.duration, slideshow_video.duration)
num_switches = int(min_duration // switch_interval)
width = 1080
height = 1920
clips = []
for i in range(num_switches):
start_time = i * switch_interval
end_time = start_time + switch_interval
if i % 2 == 0:
# Layout 1: Use green screen video with slideshow as the background
background = slideshow_video.subclip(start_time, end_time)
green_screen_with_background = chroma_key(
green_screen_video.subclip(start_time, end_time), background
)
layout_clip = CompositeVideoClip(
[green_screen_with_background],
size=(width, height)
)
else:
# Layout 2: Use original video with slideshow image in bottom-left
original_image_clip = original_images[i % len(original_images)].resize(width=width * 0.4)
original_image_clip = original_image_clip.set_position((30, height - original_image_clip.h - 50))
# original_image_clip = original_images[i % len(original_images)].resize(width=width * 0.4).set_position((30, height - original_image_clip.h - 50))
layout_clip = CompositeVideoClip(
[lip_synced_video.subclip(start_time, end_time).resize((width, height)),
original_image_clip],
size=(width, height)
)
clips.append(layout_clip)
# Concatenate all layout clips
final_video = concatenate_videoclips(clips, method="compose")
# Load the audio files
voice_audio = AudioFileClip(output_audio) # or .wav
bg_audio = AudioFileClip(bgMusic) # or .wav
# Debug: Print the original volume levels of both tracks
print(f"Original Voice Audio Volume: {voice_audio.max_volume()}")
print(f"Original Background Audio Volume: {bg_audio.max_volume()}")
# Adjust the volume levels
voice_audio = voice_audio.volumex(1.0) # Normal volume for voice
bg_audio = bg_audio.volumex(0.3) # Reduced volume for background music
# Debug: Print the new volume levels after applying volumex
print(f"Adjusted Voice Audio Volume: {voice_audio.max_volume()}")
print(f"Adjusted Background Audio Volume: {bg_audio.max_volume()}")
# Ensure the audio matches the video duration
final_video_duration = final_video.duration
# Trim or loop the audio to fit the video duration
voice_audio = voice_audio.subclip(0, final_video_duration) # Trim to video duration
bg_audio = bg_audio.subclip(0, final_video_duration) # Trim to video duration
# Combine both audio tracks
combined_audio = CompositeAudioClip([voice_audio, bg_audio])
# Set the combined audio to the final video
final_video = final_video.set_audio(combined_audio)
# Specify output path for video
out_path = output_video_path
# Write the final video to file with audio and specified parameters
final_video.write_videofile(
out_path,
codec='libx264', # Use H.264 codec for video compression
audio_codec='aac', # Use AAC codec for audio
temp_audiofile='temp-audio.m4a', # Temporary audio file during processing
remove_temp=True # Remove temporary audio file after video processing
)
print(f"Final video saved at {out_path}")
def process_pipeline(self, text, video_path, image_urls ):
# Step 1: Generate speech from text
wav_file = "/content/output_audio.mp3"
self.generate_speech(text, wav_file)
# # Step 2: Perform lip-sync
# lip_synced_video_path = "/content/Wav2Lip/results/result_voice.mp4"
# lip_sync_video(video_path, wav_file)
lip_synced_video_path = '/content/output_high_qual.mp4'
self.lip_sync_video_retalking(video_path, wav_file)
# Step 3: Create a background slideshow from images
slideshow_video, original_images = create_slideshow(image_urls, duration_per_image=7)
# # Step 4: Combine everything into one video
# # combine_video_and_background(lip_synced_video_path, slideshow_video, '/content/Wav2Lip/results/result_video_final2.mp4')
combine_video_and_background2('/content/green_screen_video.mp4',lip_synced_video_path , slideshow_video, original_images, '/content/result_video_final2.mp4')
annotated-types==0.7.0
anyio==4.8.0
beautifulsoup4==4.12.3
blinker==1.9.0
bs4==0.0.2
certifi==2024.12.14
charset-normalizer==3.4.1
click==8.1.8
decorator==4.4.2
distro==1.9.0
exceptiongroup==1.2.2
filelock==3.16.1
Flask==3.1.0
fsspec==2024.12.0
gdown==5.2.0
h11==0.14.0
httpcore==1.0.7
httpx==0.28.1
idna==3.10
imageio==2.36.1
imageio-ffmpeg==0.5.1
itsdangerous==2.2.0
Jinja2==3.1.5
jiter==0.8.2
MarkupSafe==3.0.2
moviepy==1.0.3
mpmath==1.3.0
networkx==3.4.2
numpy==2.2.1
openai==1.59.3
opencv-python==4.10.0.84
pillow==10.4.0
proglog==0.1.10
pydantic==2.10.4
pydantic_core==2.27.2
PySocks==1.7.1
python-dotenv==1.0.1
requests==2.32.3
sniffio==1.3.1
soupsieve==2.6
sympy==1.13.1
torch==2.5.1
tqdm==4.67.1
typing_extensions==4.12.2
urllib3==2.3.0
Werkzeug==3.1.3
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse, unquote
from http.server import BaseHTTPRequestHandler
import json
class Scraper:
def __init__(self):
pass
def scrape_page(self, url):
try:
response = requests.get(url)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
# Extract all links
base_url_parsed = urlparse(url)
base_url = f"{base_url_parsed.scheme}://{base_url_parsed.netloc}"
links = [
urljoin(url, link['href'])
for link in soup.find_all('a', href=True)
if urljoin(url, link['href']).startswith(base_url) and urljoin(url, link['href']) != url
]
# Extract text content
text_content = ' '.join([p.get_text() for p in soup.find_all('p')])
# Extract image URLs and remove duplicates while preserving order
images = list(dict.fromkeys(
urljoin(url, img['src']) for img in soup.find_all('img', src=True)
))
return {"text": text_content, "images": images, "links": links}
except requests.RequestException as e:
return {"error": f"Error scraping {url}: {str(e)}"}
if __name__ == "__main__":
scraper = Scraper()
homepage_data = scraper.scrape_page("https://www.smartzliving.ai")
print("homepage_data ---->", homepage_data)
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment