Spaces:

LYL1015
/

JarvisIR

Runtime error

App Files Files Community

JarvisIR / app.py

LYL1015

Update app.py

619afd4 verified 19 days ago

raw

history blame contribute delete

24.1 kB

	import subprocess
	import os
	from pathlib import Path


	def install_cuda_toolkit():
	CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/12.1.0/local_installers/cuda_12.1.0_530.30.02_linux.run"
	CUDA_TOOLKIT_FILE = "/tmp/%s" % os.path.basename(CUDA_TOOLKIT_URL)
	subprocess.call(["wget", "-q", CUDA_TOOLKIT_URL, "-O", CUDA_TOOLKIT_FILE])
	subprocess.call(["chmod", "+x", CUDA_TOOLKIT_FILE])
	subprocess.call([CUDA_TOOLKIT_FILE, "--silent", "--toolkit"])

	os.environ["CUDA_HOME"] = "/usr/local/cuda"
	os.environ["PATH"] = "%s/bin:%s" % (os.environ["CUDA_HOME"], os.environ["PATH"])
	os.environ["LD_LIBRARY_PATH"] = "%s/lib:%s" % (
	os.environ["CUDA_HOME"],
	"" if "LD_LIBRARY_PATH" not in os.environ else os.environ["LD_LIBRARY_PATH"],
	)
	# Fix: arch_list[-1] += '+PTX'; IndexError: list index out of range
	os.environ["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6"
	os.environ["BASICSR_JIT"] = "False"

	install_cuda_toolkit()

	BASE_DIR = Path("/home/user/app/JarvisIR")

	commands = [
	("python -V", BASE_DIR),
	("pip install -r requirements.txt", BASE_DIR),
	("python setup.py develop --no_cuda_ext", BASE_DIR / "package/agent_tools/Retinexformer"),
	("pip install basicsr_ridcp-0.0.0-cp310-cp310-linux_x86_64.whl", BASE_DIR),
	("pip install -e .", BASE_DIR / "package")
	]

	def run_command(cmd, cwd=None):
	try:
	result = subprocess.run(
	cmd, # 注意：这里不再使用 shlex.split()
	cwd=str(cwd) if cwd else None,
	shell=True, # 需要 shell=True 来支持 && 等操作符
	check=True,
	stdout=subprocess.PIPE,
	stderr=subprocess.PIPE,
	text=True
	)
	print(f"[SUCCESS] {cmd}")
	if result.stdout: print(result.stdout)
	return True
	except subprocess.CalledProcessError as e:
	print(f"[FAILED] {cmd}")
	print(f"Error: {e.stderr}")
	return False

	for cmd, cwd in commands:
	run_command(cmd, cwd)
	import sys
	sys.path.append("/home/user/app/JarvisIR/package")
	sys.path.append("/home/user/app/JarvisIR/package/agent_tools/Retinexformer")
	# sys.path.append("/home/user/app/JarvisIR/package/agent_tools/RIDCP")

	# change import error
	from pathlib import Path

	# 1. 定义文件路径
	file_path = Path("/usr/local/lib/python3.10/site-packages/basicsr/data/degradations.py") # 替换为实际路径

	# 2. 读取文件内容
	try:
	content = file_path.read_text(encoding='utf-8')

	# 3. 执行替换
	new_content = content.replace(
	'from torchvision.transforms.functional_tensor import rgb_to_grayscale',
	'from torchvision.transforms.functional import rgb_to_grayscale'
	)

	# 4. 写回文件
	if content != new_content:
	file_path.write_text(new_content, encoding='utf-8')
	print("Change it!")
	else:
	print("No changes")

	except FileNotFoundError:
	print(f"{file_path} does not exist")
	except Exception as e:
	print(f"Error: {str(e)}")


	import spaces
	import re
	import random
	import gradio as gr
	import torch
	from PIL import Image
	from transformers import AutoProcessor, LlavaForConditionalGeneration, TextIteratorStreamer
	from threading import Thread


	def download_tools_ckpts(target_dir, url):
	from huggingface_hub import snapshot_download
	import os
	import shutil

	tmp_dir = "hf_temp_download"
	os.makedirs(tmp_dir, exist_ok=True)

	snapshot_download(
	repo_id="LYL1015/JarvisIR",
	repo_type="model",
	local_dir=tmp_dir,
	allow_patterns=os.path.join(url, "**"),
	local_dir_use_symlinks=False,
	)

	src_dir = os.path.join(tmp_dir, url)


	shutil.copytree(src_dir, target_dir)

	shutil.rmtree(tmp_dir)

	target_dir = "JarvisIR/checkpoints/agent_tools"
	if not os.path.exists(target_dir):
	download_tools_ckpts(target_dir, "agent_tools/checkpoints")

	llm_targer_dir = "JarvisIR/checkpoints/pretrained_preview"
	if not os.path.exists(llm_targer_dir):
	download_tools_ckpts(llm_targer_dir, "pretrained/mrrhf")

	# Model configuration
	# XXX: Path to the fine-tuned LLaVA model
	model_id = llm_targer_dir

	# Available image restoration tasks and their corresponding models
	all_tasks = " {denoise: [scunet, restormer], lighten: [retinexformer_fivek, hvicidnet, lightdiff], \
	derain: [idt, turbo_rain, s2former], defog:[ridcp, kanet], \
	desnow:[turbo_snow, snowmaster], super_resolution: [real_esrgan], \
	}"

	# Various prompt templates for querying the LLM about image degradation and restoration tasks
	prompts_query2 = [
	f"Considering the image's degradation, suggest the required tasks with explanations, and identify suitable tools for each task. Options for tasks and tools include: {all_tasks}.",
	f"Given the image's degradation, outline the essential tasks along with justifications, and choose the appropriate tools for each task from the following options: {all_tasks}.",
	f"Please specify the tasks required due to the image's degradation, explain the reasons, and select relevant tools for each task from the provided options: {all_tasks}.",
	f"Based on the image degradation, determine the necessary tasks and their reasons, along with the appropriate tools for each task. Choose from these options: {all_tasks}.",
	f"Identify the tasks required to address the image's degradation, including the reasons for each, and select tools from the options: {all_tasks}.",
	f"Considering the degradation observed, list the tasks needed and their justifications, then pick the most suitable tools for each task from these options: {all_tasks}.",
	f"Evaluate the image degradation, and based on that, provide the necessary tasks and reasons, along with tools chosen from the options: {all_tasks}.",
	f"With respect to the image degradation, outline the tasks needed and explain why, selecting tools from the following list: {all_tasks}.",
	f"Given the level of degradation in the image, specify tasks to address it, include reasons, and select tools for each task from: {all_tasks}.",
	f"Examine the image's degradation, propose relevant tasks and their explanations, and identify tools from the options provided: {all_tasks}.",
	f"Based on observed degradation, detail the tasks required, explain your choices, and select tools from these options: {all_tasks}.",
	f"Using the image's degradation as a guide, list the necessary tasks, include explanations, and pick tools from the provided choices: {all_tasks}.",
	f"Assess the image degradation, provide the essential tasks and reasons, and select the appropriate tools for each task from the options: {all_tasks}.",
	f"According to the image's degradation, determine which tasks are necessary and why, choosing tools for each task from: {all_tasks}.",
	f"Observe the degradation in the image, specify the needed tasks with justifications, and select appropriate tools from: {all_tasks}.",
	f"Taking the image degradation into account, specify tasks needed, provide reasons, and choose tools from the following: {all_tasks}.",
	f"Consider the image's degradation level, outline the tasks necessary, provide reasoning, and select suitable tools from: {all_tasks}.",
	f"Evaluate the degradation in the image, identify tasks required, explain your choices, and pick tools from: {all_tasks}.",
	f"Analyze the image degradation and suggest tasks with justifications, choosing the best tools from these options: {all_tasks}.",
	f"Review the image degradation, and based on it, specify tasks needed, provide reasons, and select tools for each task from: {all_tasks}."
	]

	# Initialize models
	print("Loading LLM model...")

	# Initialize the image restoration toolkit
	from agent_tools import RestorationToolkit
	tool_engine = RestorationToolkit(score_weight=[0,0,0,0,0])
	# Load the LLaVA model in half precision to reduce memory usage
	model = LlavaForConditionalGeneration.from_pretrained(
	model_id,
	torch_dtype=torch.float16,
	device_map="auto",
	low_cpu_mem_usage=True
	)
	processor = AutoProcessor.from_pretrained(model_id)

	print("Loading tool engine...")

	def parse_llm_response(response):
	"""
	Parse the LLM response to extract reason and answer sections

	Args:
	response (str): The raw response from the LLM

	Returns:
	tuple: (reason, answer) extracted from the response
	"""
	reason_match = re.search(r'<reason>(.*?)</reason>', response, re.DOTALL)
	answer_match = re.search(r'<answer>(.*?)</answer>', response, re.DOTALL)

	reason = reason_match.group(1).strip() if reason_match else "No reasoning provided"
	answer = answer_match.group(1).strip() if answer_match else "No answer provided"

	return reason, answer

	def extract_models_from_answer(answer):
	"""
	Extract model names from the answer string using regex

	Args:
	answer (str): The answer string containing model recommendations

	Returns:
	list: List of extracted model names
	"""
	# Pattern to match [type:xxx]:(model:xxx)
	pattern = r'\[type:[^\]]+\]:\(model:([^)]+)\)'
	models = re.findall(pattern, answer)
	return models

	def beautify_recommended_actions(answer, models):
	"""
	Format the LLM's recommendations in a more visually appealing way

	Args:
	answer (str): The raw answer from LLM
	models (list): List of extracted model names

	Returns:
	str: Beautified display of recommendations
	"""

	# Task type to emoji mapping for visual enhancement
	task_icons = {
	'denoise': '🧹',
	'lighten': '💡',
	'derain': '🌧️',
	'defog': '🌫️',
	'desnow': '❄️',
	'super_resolution': '🔍'
	}

	# Parse the answer to extract tasks and models
	pattern = r'\[type:([^\]]+)\]:\(model:([^)]+)\)'
	matches = re.findall(pattern, answer)

	if not matches:
	return f"🎯 Recommended Actions:\n\n{answer}\n\nExtracted Models: {', '.join(models) if models else 'None'}"

	# Create beautified display
	beautified = "🎯 Recommended Actions:\n"
	beautified += "> "

	# Create horizontal flow of actions
	action_parts = []
	for task_type, model_name in matches:
	task_type = task_type.strip()
	model_name = model_name.strip()

	# Get icon for task type
	icon = task_icons.get(task_type, '🔧')

	# Format task name (capitalize and replace underscores)
	task_display = task_type.title().replace('_', ' ')

	# Create action part: icon + task + model
	action_part = f"{icon} {task_display}：`{model_name}`"
	action_parts.append(action_part)

	# Join with arrows to show sequence
	beautified += " ➡ ".join(action_parts) + "\n\n"

	# Add summary information
	beautified += f"📋 Processing Pipeline: {len(matches)} steps\n"
	beautified += f"🛠️ Models to use: {' → '.join(models)}"

	return beautified

	def resize_image_to_original(processed_image_path, original_size):
	"""
	Resize processed image back to original dimensions

	Args:
	processed_image_path (str): Path to the processed image
	original_size (tuple): Original image dimensions (width, height)

	Returns:
	str: Path to the resized image
	"""
	if processed_image_path and os.path.exists(processed_image_path):
	img = Image.open(processed_image_path)
	img_resized = img.resize(original_size, Image.Resampling.LANCZOS)

	# Save resized image
	output_path = os.path.join('temp_outputs', 'final_result.png')
	img_resized.save(output_path)
	return output_path
	return processed_image_path

	def get_llm_response_streaming(image_path):
	"""
	Get streaming response from LLM for image analysis

	Args:
	image_path (str): Path to the input image

	Returns:
	TextIteratorStreamer: A streamer object to yield tokens
	"""
	# Select random prompt from the templates
	instruction = prompts_query2[random.randint(0, len(prompts_query2)-1)]

	# Format the prompt with image for multimodal input
	prompt = (f"<\|start_header_id\|>user<\|end_header_id\|>\n\n<image>\n{instruction}<\|eot_id\|>"
	"<\|start_header_id\|>assistant<\|end_header_id\|>\n\n")

	# Load and process image
	raw_image = Image.open(image_path)
	inputs = processor(prompt, raw_image, return_tensors='pt').to(0, torch.float16)

	# Setup streaming for token-by-token generation
	streamer = TextIteratorStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True)

	# Generate response in a separate thread to avoid blocking
	generation_kwargs = dict(
	**inputs,
	streamer=streamer,
	max_new_tokens=400,
	do_sample=False
	)

	thread = Thread(target=model.generate, kwargs=generation_kwargs)
	thread.start()

	return streamer


	def process_image_with_tools(image_path, models, original_size):
	"""
	Process image using the tool engine and restore to original size

	Args:
	image_path (str): Path to the input image
	models (list): List of models to apply
	original_size (tuple): Original image dimensions

	Returns:
	str: Path to the final processed image
	"""
	if not models:
	return None

	# Create output directory
	os.makedirs('temp_outputs', exist_ok=True)

	# Process the image with selected models
	print("---------using tools---------")
	print("CUDA runtime version:", torch.version.cuda)
	res = tool_engine.process_image(models, image_path, 'temp_outputs')
	print("---------resizing back---------")
	# Resize back to original dimensions
	final_result = resize_image_to_original(res['output_path'], original_size)

	return final_result
	@spaces.GPU(duration=150)
	def process_full_pipeline(image):
	"""
	Main processing pipeline with streaming UI updates

	Args:
	image (str): Path to the input image

	Yields:
	tuple: (chat_history, processed_image) for Gradio UI updates
	"""
	if image is None:
	return [], None


	# Get original image size for later restoration
	original_img = Image.open(image)
	original_size = original_img.size

	# Initialize chat history for UI
	chat_history = [("Image uploaded for analysis", None)]

	# Step 1: Get streaming LLM response
	streamer = get_llm_response_streaming(image)

	# Stream the response to UI with real-time updates
	full_response = ""
	in_reason = False
	in_answer = False
	reason_displayed = False
	answer_displayed = False
	reasoning_added = False # Track if reasoning entry was added

	for new_text in streamer:
	full_response += new_text

	# Check if we're entering reason section or if we need to start showing content
	if ('<reason>' in full_response and not in_reason and not reason_displayed) or (not reasoning_added and not in_reason and not reason_displayed):
	in_reason = True
	reasoning_added = True

	if '<reason>' in full_response:
	# Extract content after <reason>
	reason_start = full_response.find('<reason>') + len('<reason>')
	reason_content = full_response[reason_start:].strip()
	else:
	# Show all content as reasoning if no tag yet
	reason_content = full_response.strip()

	# Add reasoning to chat history
	chat_history.append((None, f"🤔 Analysis & Reasoning:\n\n{reason_content}"))
	yield chat_history, None

	# If we're in reason section, update content
	elif in_reason and not reason_displayed:
	# Check if reason section is complete
	if '</reason>' in full_response:
	# Extract complete reason content
	reason_start = full_response.find('<reason>') + len('<reason>')
	reason_end = full_response.find('</reason>')
	reason_content = full_response[reason_start:reason_end].strip()

	# Update chat history with complete reason
	chat_history[1] = (None, f"🤔 Analysis & Reasoning:\n\n{reason_content}")
	reason_displayed = True
	in_reason = False
	yield chat_history, None
	else:
	# Continue streaming reason content
	if '<reason>' in full_response:
	reason_start = full_response.find('<reason>') + len('<reason>')
	reason_content = full_response[reason_start:].strip()
	else:
	reason_content = full_response.strip()

	# Update chat history with partial reason
	chat_history[1] = (None, f"🤔 Analysis & Reasoning:\n\n{reason_content}")
	yield chat_history, None

	# Check if we're entering answer section
	elif '<answer>' in full_response and not in_answer and not answer_displayed and reason_displayed:
	in_answer = True
	# Extract content after <answer>
	answer_start = full_response.find('<answer>') + len('<answer>')
	answer_content = full_response[answer_start:]

	# Add partial answer to chat history
	models = extract_models_from_answer(answer_content)
	beautified = beautify_recommended_actions(answer_content, models)
	chat_history.append((None, beautified))
	yield chat_history, None

	# If we're in answer section, update content
	elif in_answer and not answer_displayed:
	# Check if answer section is complete
	if '</answer>' in full_response:
	# Extract complete answer content
	answer_start = full_response.find('<answer>') + len('<answer>')
	answer_end = full_response.find('</answer>')
	answer_content = full_response[answer_start:answer_end].strip()

	# Parse and process final answer
	models = extract_models_from_answer(answer_content)
	beautified = beautify_recommended_actions(answer_content, models)
	chat_history[2] = (None, beautified)
	answer_displayed = True
	in_answer = False
	yield chat_history, None

	# Process image with tools
	if models:
	chat_history.append((None, "🔄 Processing image..."))
	yield chat_history, None

	processed_image = process_image_with_tools(image, models, original_size)
	chat_history[-1] = (None, "✅ Processing Complete!")
	yield chat_history, processed_image
	return
	else:
	chat_history.append((None, "❌ No valid models found in the response"))
	yield chat_history, None
	return
	else:
	# Continue streaming answer content
	answer_start = full_response.find('<answer>') + len('<answer>')
	answer_content = full_response[answer_start:].strip()

	# Update chat history with partial answer
	models = extract_models_from_answer(answer_content)
	beautified = beautify_recommended_actions(answer_content, models)
	chat_history[2] = (None, beautified)
	yield chat_history, None

	# Fallback if streaming completes without proper tags
	if not answer_displayed:
	reason, answer = parse_llm_response(full_response)
	models = extract_models_from_answer(answer)

	chat_history = [
	("Image uploaded for analysis", None),
	(None, f"🤔 Analysis & Reasoning:\n\n{reason}"),
	(None, beautify_recommended_actions(answer, models))
	]

	if models:
	chat_history.append((None, "🔄 Processing image..."))
	yield chat_history, None

	processed_image = process_image_with_tools(image, models, original_size)
	chat_history[-1] = (None, "✅ Processing Complete!")
	yield chat_history, processed_image
	else:
	chat_history.append((None, "❌ No valid models found in the response"))
	yield chat_history, None



	# Create Gradio interface
	def create_interface():
	"""
	Create and configure the Gradio web interface

	Returns:
	gr.Blocks: Configured Gradio interface
	"""
	with gr.Blocks(title="JarvisIR: Elevating Autonomous Driving Perception with Intelligent Image Restoration", theme=gr.themes.Soft()) as demo:
	# Header with logo and title
	gr.Markdown("""
	# <img src="https://cvpr2025-jarvisir.github.io/imgs/icon.png" width="32" height="32" style="display: inline-block; vertical-align: middle; transform: translateY(-2px); margin-right: 1px;"/> JarvisIR: Elevating Autonomous Driving Perception with Intelligent Image Restoration

	Upload an image and let JarvisIR analyze its degradation and recommend the best restoration tools!
	""")

	with gr.Row():
	with gr.Column(scale=1):
	# Input image upload component
	input_image = gr.Image(
	type="filepath",
	label="📸 Upload Your Image",
	height=400
	)

	# Process button
	process_btn = gr.Button(
	"🚀 Analyze & Process",
	variant="primary",
	size="lg"
	)

	with gr.Column(scale=1):
	# Chat interface to show analysis
	chatbot = gr.Chatbot(
	label="💬 AI Analysis Chat",
	height=400,
	show_label=True,
	bubble_full_width=False
	)

	with gr.Row():
	# Output image display
	output_image = gr.Image(
	label="✨ Processed Result",
	height=300
	)

	# Connect event handler for the process button
	process_btn.click(
	fn=process_full_pipeline,
	inputs=[input_image],
	outputs=[chatbot, output_image]
	)

	# Instructions section
	gr.Markdown("### 📝 Instructions:")
	gr.Markdown("""
	1. Upload an image that needs restoration (blurry, dark, noisy, etc.)
	2. Or click on sample images below to try with pre-loaded examples
	3. Click 'Analyze & Process' to let JarvisIR analyze the image
	4. View the chat to see JarvisIR's reasoning and recommendations in real-time
	5. Check the result - processed image restored to original dimensions
	""")

	# Add examples section at the bottom
	gr.Examples(
	examples=[
	["example/img4.jpg"],
	["example/img10.png"],
	["example/img12.png"],
	["example/img13.jpg"]
	],
	inputs=input_image,
	label="🖼️ Example Images - Click to Try!"
	)

	return demo

	if __name__ == "__main__":
	print("Starting Image Restoration Assistant...")
	demo = create_interface()
	# Launch the Gradio app on specified host and port
	demo.launch(
	server_name="0.0.0.0",
	share=False
	)