onkar-waghmode commited on
Commit
ed463e7
Β·
1 Parent(s): 123d301
Files changed (1) hide show
  1. app.py +41 -190
app.py CHANGED
@@ -3,11 +3,8 @@ import torch
3
  from transformers import (
4
  AutoTokenizer,
5
  AutoModelForSeq2SeqLM,
6
- T5ForConditionalGeneration,
7
- T5Tokenizer
8
  )
9
  from sentence_transformers import SentenceTransformer, util
10
- import numpy as np
11
  from typing import List, Tuple, Dict
12
  import re
13
  import difflib
@@ -50,7 +47,6 @@ def load_model(model_name: str, model_path: str):
50
 
51
  def chunk_text(text: str, max_sentences: int = 4) -> List[str]:
52
  """Split text into chunks based on number of sentences"""
53
- import re
54
  sentences = re.split(r'(?<=[.!?]) +', text.strip())
55
  chunks = [' '.join(sentences[i:i+max_sentences]) for i in range(0, len(sentences), max_sentences)]
56
  return [chunk for chunk in chunks if chunk.strip()]
@@ -79,9 +75,12 @@ def calculate_max_length(input_text: str, mode: str, base_max_length: int) -> in
79
 
80
  def calculate_similarity(text1: str, text2: str) -> float:
81
  """Calculate cosine similarity between two texts"""
 
 
82
  embeddings = similarity_model.encode([text1, text2], convert_to_tensor=True)
83
- similarity = util.cos_sim(embeddings[0], embeddings[1])
84
- return similarity.item()
 
85
 
86
  def highlight_differences(original: str, generated: str) -> Tuple[str, str, Dict]:
87
  """
@@ -387,184 +386,6 @@ def process_text(
387
  return error_msg, "Error occurred", 0.0, "", "", ""
388
 
389
  # Create Gradio interface
390
- # with gr.Blocks(title="Text Paraphraser & Expander", theme=gr.themes.Soft()) as demo:
391
- # gr.Markdown(
392
- # """
393
- # # πŸ“ Text Paraphraser & Expander
394
- # Transform your text with AI-powered paraphrasing and expansion capabilities.
395
- # """
396
- # )
397
-
398
- # with gr.Row():
399
- # with gr.Column(scale=1):
400
- # mode = gr.Radio(
401
- # choices=["Paraphrase", "Expand"],
402
- # value="Paraphrase",
403
- # label="Mode",
404
- # info="Choose to paraphrase or expand your text"
405
- # )
406
-
407
- # model_dropdown = gr.Dropdown(
408
- # choices=list(PARAPHRASE_MODELS.keys()),
409
- # value=list(PARAPHRASE_MODELS.keys())[0],
410
- # label="Model Selection",
411
- # info="Choose the model for processing"
412
- # )
413
-
414
- # gr.Markdown("### βš™οΈ Parameters")
415
-
416
- # temperature = gr.Slider(
417
- # minimum=0.0,
418
- # maximum=2.0,
419
- # value=0.7,
420
- # step=0.1,
421
- # label="Temperature",
422
- # info="Higher = more creative, Lower = more focused"
423
- # )
424
-
425
- # top_p = gr.Slider(
426
- # minimum=0.1,
427
- # maximum=1.0,
428
- # value=0.9,
429
- # step=0.05,
430
- # label="Top-p (Nucleus Sampling)",
431
- # info="Probability threshold for token selection"
432
- # )
433
-
434
- # max_length = gr.Slider(
435
- # minimum=128,
436
- # maximum=1024,
437
- # value=512,
438
- # step=32,
439
- # label="Max Length (tokens)",
440
- # info="Maximum length of generated text per chunk"
441
- # )
442
-
443
- # num_beams = gr.Slider(
444
- # minimum=1,
445
- # maximum=10,
446
- # value=4,
447
- # step=1,
448
- # label="Number of Beams",
449
- # info="Higher = better quality but slower"
450
- # )
451
-
452
- # max_sentences = gr.Slider(
453
- # minimum=1,
454
- # maximum=10,
455
- # value=4,
456
- # step=1,
457
- # label="Sentences per Chunk",
458
- # info="Number of sentences to process together"
459
- # )
460
-
461
- # target_words = gr.Number(
462
- # value=300,
463
- # label="Target Word Count (Expand mode)",
464
- # info="Approximate number of words for expansion",
465
- # visible=False
466
- # )
467
-
468
- # with gr.Row():
469
- # with gr.Column(scale=1):
470
- # gr.Markdown("### πŸ“₯ Input Text")
471
- # input_text = gr.Textbox(
472
- # lines=10,
473
- # placeholder="Enter your text here...",
474
- # label="Original Text",
475
- # show_copy_button=True
476
- # )
477
-
478
- # with gr.Column(scale=1):
479
- # gr.Markdown("### πŸ“€ Generated Text")
480
- # output_text = gr.Textbox(
481
- # lines=10,
482
- # label="Processed Text",
483
- # show_copy_button=True
484
- # )
485
-
486
- # with gr.Row():
487
- # process_btn = gr.Button("πŸš€ Generate", variant="primary", size="lg")
488
- # clear_btn = gr.ClearButton([input_text, output_text], value="πŸ—‘οΈ Clear")
489
-
490
- # stats_display = gr.Markdown()
491
-
492
- # similarity_display = gr.Number(
493
- # label="Cosine Similarity Score",
494
- # precision=4,
495
- # interactive=False
496
- # )
497
-
498
- # # Highlighted comparison section
499
- # gr.Markdown("---")
500
- # gr.Markdown("## πŸ” Visual Comparison - See What Changed")
501
-
502
- # with gr.Row():
503
- # with gr.Column(scale=1):
504
- # gr.Markdown("### πŸ“„ Original Text (with changes highlighted)")
505
- # highlighted_original = gr.HTML(
506
- # label="Original with Changes",
507
- # show_label=False
508
- # )
509
-
510
- # with gr.Column(scale=1):
511
- # gr.Markdown("### ✨ Generated Text (with changes highlighted)")
512
- # highlighted_generated = gr.HTML(
513
- # label="Generated with Changes",
514
- # show_label=False
515
- # )
516
-
517
- # change_stats = gr.HTML(label="Change Statistics")
518
-
519
- # # Event handlers
520
- # mode.change(
521
- # fn=update_model_choices,
522
- # inputs=[mode],
523
- # outputs=[model_dropdown]
524
- # )
525
-
526
- # mode.change(
527
- # fn=update_parameters_visibility,
528
- # inputs=[mode],
529
- # outputs=[target_words]
530
- # )
531
-
532
- # process_btn.click(
533
- # fn=process_text,
534
- # inputs=[
535
- # input_text,
536
- # mode,
537
- # model_dropdown,
538
- # temperature,
539
- # top_p,
540
- # max_length,
541
- # num_beams,
542
- # max_sentences,
543
- # target_words
544
- # ],
545
- # outputs=[
546
- # output_text,
547
- # stats_display,
548
- # similarity_display,
549
- # highlighted_original,
550
- # highlighted_generated,
551
- # change_stats
552
- # ]
553
- # )
554
-
555
- # gr.Markdown(
556
- # """
557
- # ---
558
- # ### πŸ’‘ Tips:
559
- # - **Paraphrase Mode**: Rewrites text while preserving meaning
560
- # - **Expand Mode**: Adds details and elaboration to make text longer
561
- # - **Sentences per Chunk**: Controls how many sentences are processed together (4 recommended)
562
- # - Adjust temperature for creativity (0.7-1.0 for paraphrase, 1.0-1.5 for expansion)
563
- # - Higher beam count = better quality but slower processing
564
- # - Max length is automatically calculated based on input, but can be overridden
565
- # - Output chunks are separated by double newlines for readability
566
- # """
567
- # )
568
 
569
  with gr.Blocks(title="Text Paraphraser & Expander", theme=gr.themes.Soft()) as demo:
570
  gr.Markdown(
@@ -663,13 +484,13 @@ with gr.Blocks(title="Text Paraphraser & Expander", theme=gr.themes.Soft()) as d
663
 
664
  with gr.Row():
665
  process_btn = gr.Button("πŸš€ Generate", variant="primary", size="lg")
666
- clear_btn = gr.ClearButton([input_text, output_text], value="πŸ—‘οΈ Clear")
667
 
668
  stats_display = gr.Markdown()
669
 
670
  similarity_display = gr.Number(
671
- label="Cosine Similarity Score",
672
- precision=4,
673
  interactive=False
674
  )
675
 
@@ -682,17 +503,34 @@ with gr.Blocks(title="Text Paraphraser & Expander", theme=gr.themes.Soft()) as d
682
  gr.Markdown("### πŸ“„ Original Text (with changes highlighted)")
683
  highlighted_original = gr.HTML(
684
  label="Original with Changes",
685
- show_label=False
 
686
  )
687
 
688
  with gr.Column(scale=1):
689
  gr.Markdown("### ✨ Generated Text (with changes highlighted)")
690
  highlighted_generated = gr.HTML(
691
  label="Generated with Changes",
692
- show_label=False
 
693
  )
694
 
695
- change_stats = gr.HTML(label="Change Statistics")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
696
 
697
  # Event handlers
698
  mode.change(
@@ -730,6 +568,19 @@ with gr.Blocks(title="Text Paraphraser & Expander", theme=gr.themes.Soft()) as d
730
  ]
731
  )
732
 
 
 
 
 
 
 
 
 
 
 
 
 
 
733
  gr.Markdown(
734
  """
735
  ---
 
3
  from transformers import (
4
  AutoTokenizer,
5
  AutoModelForSeq2SeqLM,
 
 
6
  )
7
  from sentence_transformers import SentenceTransformer, util
 
8
  from typing import List, Tuple, Dict
9
  import re
10
  import difflib
 
47
 
48
  def chunk_text(text: str, max_sentences: int = 4) -> List[str]:
49
  """Split text into chunks based on number of sentences"""
 
50
  sentences = re.split(r'(?<=[.!?]) +', text.strip())
51
  chunks = [' '.join(sentences[i:i+max_sentences]) for i in range(0, len(sentences), max_sentences)]
52
  return [chunk for chunk in chunks if chunk.strip()]
 
75
 
76
  def calculate_similarity(text1: str, text2: str) -> float:
77
  """Calculate cosine similarity between two texts"""
78
+ if not text1.strip() or not text2.strip():
79
+ return 0.0
80
  embeddings = similarity_model.encode([text1, text2], convert_to_tensor=True)
81
+ similarity = util.cos_sim(embeddings[0], embeddings[1]).item()
82
+ similarity = round(similarity*100,2)
83
+ return similarity
84
 
85
  def highlight_differences(original: str, generated: str) -> Tuple[str, str, Dict]:
86
  """
 
386
  return error_msg, "Error occurred", 0.0, "", "", ""
387
 
388
  # Create Gradio interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
389
 
390
  with gr.Blocks(title="Text Paraphraser & Expander", theme=gr.themes.Soft()) as demo:
391
  gr.Markdown(
 
484
 
485
  with gr.Row():
486
  process_btn = gr.Button("πŸš€ Generate", variant="primary", size="lg")
487
+ clear_btn = gr.Button("πŸ—‘οΈ Clear",size="lg")
488
 
489
  stats_display = gr.Markdown()
490
 
491
  similarity_display = gr.Number(
492
+ label="Content Similarity (%)",
493
+ precision=2,
494
  interactive=False
495
  )
496
 
 
503
  gr.Markdown("### πŸ“„ Original Text (with changes highlighted)")
504
  highlighted_original = gr.HTML(
505
  label="Original with Changes",
506
+ show_label=False,
507
+ elem_id="highlighted_original"
508
  )
509
 
510
  with gr.Column(scale=1):
511
  gr.Markdown("### ✨ Generated Text (with changes highlighted)")
512
  highlighted_generated = gr.HTML(
513
  label="Generated with Changes",
514
+ show_label=False,
515
+ elem_id="highlighted_original"
516
  )
517
 
518
+ change_stats = gr.HTML(label="Change Statistics",elem_id="change_stats")
519
+
520
+ gr.HTML("""
521
+ <style>
522
+ #highlighted_original > div {
523
+ overflow-y: auto;
524
+ max-height: 400px;
525
+ }
526
+ #highlighted_original > div:empty {
527
+ overflow: hidden;
528
+ }
529
+ #change_stats > div:empty {
530
+ overflow: hidden;
531
+ }
532
+ </style>
533
+ """)
534
 
535
  # Event handlers
536
  mode.change(
 
568
  ]
569
  )
570
 
571
+ clear_btn.click(
572
+ fn=lambda: ("", "", 0.0, "", "", ""),
573
+ inputs=[],
574
+ outputs=[
575
+ input_text,
576
+ output_text,
577
+ similarity_display,
578
+ highlighted_original,
579
+ highlighted_generated,
580
+ change_stats
581
+ ]
582
+ )
583
+
584
  gr.Markdown(
585
  """
586
  ---