Source code for m3sgg.language.summarization.summarize

from m3sgg.language.summarization.wrappers import (
    PegasusCustomConfig,
    PegasusSeparateLoader,
    PegasusSummarizationWrapper,
    T5SummarizationWrapper,
)


[docs] def linearize_triples(triples, mode="flat"): """Convert scene graph triples into natural language sentences. Transforms subject-predicate-object triples into human-readable sentences using predefined relationship patterns for visual attention, spatial relationships, and physical interactions. :param triples: List of (subject, predicate, object) tuples :type triples: list :param mode: Linearization mode (flat, majority, time) :type mode: str :return: List of natural language sentences :rtype: list """ sentences = [] relationship_patterns = { # Visual attention relationships "lookingat": "is looking at", "notlookingat": "is not looking at", "unsure": "may be looking at", # Spatial relationships "above": "is above", "beneath": "is beneath", "infrontof": "is in front of", "behind": "is behind", "onthesideof": "is on the side of", "in": "is in", # Physical interactions "carrying": "is carrying", "coveredby": "is covered by", "drinkingfrom": "is drinking from", "eating": "is eating", "haveitontheback": "has it on the back", "holding": "is holding", "leaningon": "is leaning on", "lyingon": "is lying on", "notcontacting": "is not contacting", "otherrelationship": "has some relationship with", "sittingon": "is sitting on", "standingon": "is standing on", "touching": "is touching", "twisting": "is twisting", "wearing": "is wearing", "wiping": "is wiping", "writingon": "is writing on", } if mode == "flat": # Simple flat linearization - all triples as separate sentences for triple in triples: subject, predicate, object_item = triple if predicate in relationship_patterns: pattern = relationship_patterns[predicate] sentence = f"The {subject} {pattern} the {object_item}." else: # Fallback for unknown relationships sentence = f"The {subject} is {predicate} the {object_item}." sentences.append(sentence) elif mode == "majority": # Majority voting - group by object and use most common relationship from collections import defaultdict, Counter object_relationships = defaultdict(list) for triple in triples: subject, predicate, object_item = triple object_relationships[object_item].append((subject, predicate)) for object_item, rels in object_relationships.items(): # Count relationships for this object rel_counts = Counter([rel for _, rel in rels]) most_common_rel = rel_counts.most_common(1)[0][0] # Get all subjects for this relationship subjects = [subj for subj, rel in rels if rel == most_common_rel] subject_text = " and ".join(subjects) if len(subjects) > 1 else subjects[0] if most_common_rel in relationship_patterns: pattern = relationship_patterns[most_common_rel] sentence = f"The {subject_text} {pattern} the {object_item}." else: sentence = f"The {subject_text} is {most_common_rel} the {object_item}." sentences.append(sentence) elif mode == "time": # Time-aware linearization - group by temporal patterns # For now, just use flat linearization with temporal markers for i, triple in enumerate(triples): subject, predicate, object_item = triple if predicate in relationship_patterns: pattern = relationship_patterns[predicate] sentence = f"At time {i+1}: The {subject} {pattern} the {object_item}." else: sentence = ( f"At time {i+1}: The {subject} is {predicate} the {object_item}." ) sentences.append(sentence) else: # Default to flat mode for triple in triples: subject, predicate, object_item = triple if predicate in relationship_patterns: pattern = relationship_patterns[predicate] sentence = f"The {subject} {pattern} the {object_item}." else: sentence = f"The {subject} is {predicate} the {object_item}." sentences.append(sentence) return sentences
[docs] def summarize_sentences(sentences, model_name="google-t5/t5-base", model_type="t5"): combined_text = " ".join(sentences) if model_type.lower() == "t5": wrapper = T5SummarizationWrapper(model_name) summary = wrapper.summarize(combined_text) elif model_type.lower() == "pegasus": wrapper = PegasusSummarizationWrapper(model_name) summary = wrapper.summarize(combined_text) else: raise ValueError(f"Unsupported model type: {model_type}") return summary
[docs] def summarize_with_pegasus_separate(sentences, model_name="google/pegasus-xsum"): combined_text = " ".join(sentences) separate_loader = PegasusSeparateLoader(model_name) separate_loader.load_tokenizer() separate_loader.load_model() summary = separate_loader.summarize(combined_text) return summary
[docs] def summarize_with_pegasus_custom( sentences, model_name="google/pegasus-xsum", **kwargs ): combined_text = " ".join(sentences) custom_config = PegasusCustomConfig(model_name) custom_config.load_with_config() summary = custom_config.summarize(combined_text, **kwargs) return summary
[docs] def main(): triples = [ ("man", "riding", "bicycle"), ("woman", "holding", "umbrella"), ("child", "playing with", "dog"), ("girl", "wearing", "red dress"), ("boy", "sitting on", "bench"), ] sentences = linearize_triples(triples) print("Original sentences:") for sentence in sentences: print(f" - {sentence}") print() # Test different summarization approaches print("=" * 60) print("T5 Summarization:") print("=" * 60) summary_t5 = summarize_sentences(sentences, model_type="t5") print(f"Summary: {summary_t5}") print() print("=" * 60) print("Pegasus Summarization:") print("=" * 60) summary_pegasus = summarize_sentences(sentences, model_type="pegasus") print(f"Summary: {summary_pegasus}") print() print("=" * 60) print("Pegasus Separate Loader:") print("=" * 60) summary_separate = summarize_with_pegasus_separate(sentences) print(f"Summary: {summary_separate}") print() summary_custom = summarize_with_pegasus_custom( sentences, max_length=80, min_length=15, length_penalty=1.5, num_beams=6 ) print(f"Summary: {summary_custom}")
if __name__ == "__main__": main()