Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
64 commits
Select commit Hold shift + click to select a range
5feffa1
Updated'
vibhansa-msft Sep 17, 2025
e9078d2
Updated"
vibhansa-msft Sep 17, 2025
d510bef
Updated
vibhansa-msft Sep 17, 2025
b0a8444
Updated
vibhansa-msft Sep 17, 2025
125366c
Updated
vibhansa-msft Sep 17, 2025
56aaa25
Updated
vibhansa-msft Sep 17, 2025
cb23eb6
Updated
vibhansa-msft Sep 17, 2025
21d1a8e
Updated"
vibhansa-msft Sep 17, 2025
cbd4aa4
Updated
vibhansa-msft Sep 17, 2025
b0a023a
Updated
vibhansa-msft Sep 17, 2025
506e36f
Updated
vibhansa-msft Sep 17, 2025
1cc4bca
Updated
vibhansa-msft Sep 17, 2025
32aab92
Updated
vibhansa-msft Sep 18, 2025
2da022e
Add error handling
vibhansa-msft Sep 18, 2025
eb49069
Updated
vibhansa-msft Sep 18, 2025
1b9a595
Updated
vibhansa-msft Sep 18, 2025
fb4ec2f
Updated
vibhansa-msft Sep 18, 2025
b309734
Updated
vibhansa-msft Sep 18, 2025
d84a6a0
Updated
vibhansa-msft Sep 18, 2025
d4f82b0
Updated
vibhansa-msft Sep 18, 2025
c3c1002
Updated
vibhansa-msft Sep 18, 2025
40b72ab
Updated
vibhansa-msft Sep 18, 2025
10251aa
Updated
vibhansa-msft Sep 18, 2025
0d05c1b
Updated
vibhansa-msft Sep 18, 2025
76b1278
Updated
vibhansa-msft Sep 18, 2025
10ba7e8
Updated
vibhansa-msft Sep 18, 2025
f4e75f9
Updated
vibhansa-msft Sep 18, 2025
f60184c
Updated
vibhansa-msft Sep 18, 2025
7ebe10c
Updated
vibhansa-msft Sep 18, 2025
31851c1
Updated
vibhansa-msft Sep 18, 2025
85fe5ed
Finalizing filter pattern
vibhansa-msft Sep 18, 2025
b14403f
Updated
vibhansa-msft Sep 18, 2025
7e76bd1
Updated
vibhansa-msft Sep 22, 2025
3aa019c
Adding script to retreive all issues and pr and normalize the data
vibhansa-msft Sep 22, 2025
09f4aee
Updated
vibhansa-msft Sep 22, 2025
1de76bb
Addign workflow to dump list of issues and PR every month
vibhansa-msft Sep 23, 2025
64dd3e5
Adding current issue and pr dump
vibhansa-msft Sep 23, 2025
cab85f6
Adding authentication
vibhansa-msft Sep 23, 2025
4d28648
Updated
vibhansa-msft Sep 23, 2025
e91a9f0
Updated
vibhansa-msft Sep 23, 2025
b22b513
Updated
vibhansa-msft Sep 23, 2025
21cf3ad
Updated
vibhansa-msft Sep 23, 2025
022f77c
Updated
vibhansa-msft Sep 23, 2025
a9df0bd
Updated
vibhansa-msft Sep 23, 2025
b1e5dde
Updated
vibhansa-msft Sep 23, 2025
1401521
Updated
vibhansa-msft Sep 23, 2025
8f5b20d
Updated
vibhansa-msft Sep 23, 2025
8a76787
Updated
vibhansa-msft Sep 23, 2025
792c3e0
Updated
vibhansa-msft Sep 23, 2025
b65ae36
Updated
vibhansa-msft Sep 23, 2025
fa2e9ca
Updated
vibhansa-msft Sep 23, 2025
896f5f9
Updated
vibhansa-msft Sep 23, 2025
3d798c2
Updated
vibhansa-msft Sep 23, 2025
f79e44d
Updated
vibhansa-msft Sep 23, 2025
2a65b0c
Updated
vibhansa-msft Sep 23, 2025
53d51d8
Updated
vibhansa-msft Sep 23, 2025
b161bb6
Updated
vibhansa-msft Sep 23, 2025
8bbd6da
Updated
vibhansa-msft Sep 23, 2025
2eef7c6
Updated
vibhansa-msft Sep 24, 2025
24280b5
Updated
vibhansa-msft Sep 24, 2025
c7ee541
Updated
vibhansa-msft Sep 25, 2025
e95a764
Merge remote-tracking branch 'upstream/main'
vibhansa-msft Sep 29, 2025
7b3c99d
Merge branch 'main' into main
vibhansa-msft Sep 29, 2025
abb8ffc
Merge branch 'Azure:main' into main
vibhansa-msft Oct 17, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 128 additions & 0 deletions .github/scripts/ai_summarize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
import sys
import os
import subprocess
from transformers import pipeline, AutoTokenizer

# The maximum sequence length for this model. This is a fixed constraint.
MAX_SEQUENCE_LENGTH = 1024

def summarize_text_with_llm(text):
"""
Summarizes text using a Hugging Face model from the `transformers` library.
This function now handles large texts by splitting them into chunks and then
performing a second summarization pass on the combined results.
"""
try:
# Load the summarization pipeline with the specified model.
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")

# Load the tokenizer for the same model to correctly handle token limits.
tokenizer = AutoTokenizer.from_pretrained("sshleifer/distilbart-cnn-12-6")

# Get the number of tokens in the full text.
token_count = len(tokenizer.encode(text, truncation=False))

# If the text is within the model's limit, summarize it directly.
if token_count <= MAX_SEQUENCE_LENGTH:
summary = summarizer(text, max_length=150, min_length=50, do_sample=False)
return summary[0]['summary_text']
else:
# If the text is too long, split it into chunks, summarize, and then refine.
return refine_summary_from_chunks(text, summarizer, tokenizer)

except Exception as e:
print(f"Error summarizing with LLM: {e}", file=sys.stderr)
# Fallback to a basic summarization or return an error message
return "Could not generate AI summary. Full response below."

def refine_summary_from_chunks(text, summarizer, tokenizer):
"""
Splits the input text into chunks, summarizes each one, and then performs a second
summarization pass on the combined summaries to remove redundancy.
"""
# Split the text into sentences to avoid cutting in the middle of one.
sentences = text.split('. ')
chunks = []
current_chunk = ""

for sentence in sentences:
# Check if adding the next sentence exceeds the max token limit.
temp_chunk = current_chunk + ('. ' if current_chunk else '') + sentence
if len(tokenizer.encode(temp_chunk, truncation=False)) <= MAX_SEQUENCE_LENGTH:
current_chunk = temp_chunk
else:
# The chunk is full, add it to the list and start a new one.
chunks.append(current_chunk)
current_chunk = sentence

# Add the last chunk if it's not empty.
if current_chunk:
chunks.append(current_chunk)

# First pass: summarize each chunk
intermediate_summaries = []
for i, chunk in enumerate(chunks):
print(f"Summarizing chunk {i+1} of {len(chunks)}...", file=sys.stderr)
summary = summarizer(chunk, max_length=150, min_length=50, do_sample=False)
intermediate_summaries.append(summary[0]['summary_text'])

# Second pass: combine and refine the summaries
combined_summary_text = " ".join(intermediate_summaries)
print("Refining combined summaries into a final output...", file=sys.stderr)

# The refinement summary can be shorter to focus on key points.
final_summary = summarizer(combined_summary_text, max_length=150, min_length=50, do_sample=False)

return final_summary[0]['summary_text']


if __name__ == "__main__":
if len(sys.argv) < 3:
print("Usage: python ai_summarize.py <input_file_path> <output_file_path>", file=sys.stderr)
sys.exit(1)

input_file_path = sys.argv[1]
output_file_path = sys.argv[2]

# Check if the input file exists
if not os.path.exists(input_file_path):
print(f"Error: Input file not found at {input_file_path}", file=sys.stderr)
sys.exit(1)

with open(input_file_path, 'r', encoding='utf-8') as f:
full_text = f.read()

# Use the LLM to summarize the text
summary = summarize_text_with_llm(full_text)
if summary == "":
print("Error: Could not generate summary.")
sys.exit(1)

summary_len = len(summary)
if summary_len < 50:
print("Error: Generated summary is too short to be meaningful.")
sys.exit(1)

resp_len = len(full_text) + summary_len
issue_comment_body = (
"## Summary \n\n"
f"{summary}\n\n"
)

if resp_len < 65000:
full_text = full_text.replace("## ", "### ")
issue_comment_body += (
"## Details \n\n"
f"{full_text}\n\n"
)

# Add the disclaimer
issue_comment_body += (
"---\n"
"**In case of issue, share mount command, config file and debug-logs to investigate further.**\n\n"
"---\n"
"*Disclaimer: This summary is AI-generated and may not be fully accurate.*"
)

with open(output_file_path, 'w', encoding='utf-8') as f:
f.write(issue_comment_body)
71 changes: 71 additions & 0 deletions .github/scripts/deepwiki_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import asyncio
import os
import sys
from contextlib import AsyncExitStack
from typing import Optional
import subprocess

import httpx
from mcp import ClientSession
from mcp.client.sse import sse_client
from dotenv import load_dotenv

load_dotenv()

# The URL for the DeepWiki MCP server's SSE transport
MCP_SSE_URL = "https://mcp.deepwiki.com/sse" # Example URL

class MCPClient:
def __init__(self):
self.session: Optional[ClientSession] = None
self.exit_stack = AsyncExitStack()

async def connect_to_sse_server(self, server_url: str):
# print("Connecting to MCP SSE server...")
self._streams_context = sse_client(url=server_url)
streams = await self._streams_context.__aenter__()
self._session_context = ClientSession(*streams)
self.session: ClientSession = await self._session_context.__aenter__()
await self.session.initialize()
# print("Connected and initialized.")

async def cleanup(self):
if self._session_context:
await self._session_context.__aexit__(None, None, None)
if self._streams_context:
await self._streams_context.__aexit__(None, None, None)

async def ask_deepwiki(self, repo: str, question: str) -> str:
if not self.session:
raise RuntimeError("Client not connected.")

# The MCP SDK handles the JSON-RPC call for you
result = await self.session.call_tool(
"ask_question", {"repoName": repo, "question": question}
)

# Join the list of content parts into a single string
return result.content

async def main(repo, title, body):
client = MCPClient()
try:
await client.connect_to_sse_server(server_url=MCP_SSE_URL)

question = f"{title}\n\n{body}"
response = await client.ask_deepwiki(repo, question)
print (response)

finally:
await client.cleanup()

if __name__ == "__main__":
if len(sys.argv) < 4:
print("Usage: python deepwiki_query.py <repo> <question title> <question body>")
sys.exit(1)

repo_arg = sys.argv[1]
issue_title = sys.argv[2]
issue_body = sys.argv[3]

asyncio.run(main(repo_arg, issue_title, issue_body))
78 changes: 78 additions & 0 deletions .github/scripts/extract_text.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import re
import sys

def extract_text_field(text_content: str) -> str:
"""
Extracts the 'text' field from a string representation of a TextContent object.

Args:
text_content: The input string containing the TextContent representation.

Returns:
The extracted text field as a string, or an empty string if not found.
"""
# The new pattern uses a greedy match `(.*)` to capture all characters,
# including newlines (re.DOTALL). The positive lookahead `(?=...)`
# ensures that the match ends exactly before the specific trailing part
# of the string (', annotations=None, meta=None)]'). This prevents
# the regex from stopping at single quotes within the text content.
try:
pattern = ""
if 'text="' in text_content:
pattern = r"text=\"(.*)\"(?=, annotations=None, meta=None\)])"
elif "text='" in text_content:
pattern = r"text='(.*)'(?=, annotations=None, meta=None\)])"
else:
print("Error: The input text does not contain a recognizable text field.")
sys.exit(1)

match = re.search(pattern, text_content, re.DOTALL)
if match:
decoded_text = match.group(1).encode('utf-8').decode('unicode_escape')

# Remove everything after either "## Notes\n" or "Notes:\n"
for delimiter in ["## Notes\n", "Notes:\n"]:
if delimiter in decoded_text:
decoded_text = decoded_text.split(delimiter)[0]


# Replace all double quotes with single quotes
decoded_text = decoded_text.replace('"', "'")

return decoded_text.strip()
else:
print("No match found for the text field.")
sys.exit(1)

except re.error as e:
print(f"Regular expression error: {e}")

return ""

if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python extract_text.py <path_to_input_file>")
sys.exit(1)

file_path = sys.argv[1]

try:
with open(file_path, 'r', encoding='utf-8') as f:
input_text = f.read()

input_len = len(input_text)
if input_len < 100:
print("Input text is too short to process.")
sys.exit(1)

extracted_content = extract_text_field(input_text)
if extracted_content:
print(extracted_content)
else:
print("Error: Could not extract text content.")
sys.exit(1)

except FileNotFoundError:
print(f"Error: The file '{file_path}' was not found.")
except Exception as e:
print(f"An unexpected error occurred: {e}")
Loading
Loading