Skip to content
This repository was archived by the owner on Mar 10, 2026. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ a graph representation of your source code, the graph name should be the same as
the name of the folder you've requested to analyze, for the example above a graph named:
"GraphRAG-SDK".

At the moment only the Python and C languages are supported, we do intend to support additional languages.
At the moment Python, Java, and C# languages are supported, we do intend to support additional languages.

At this point you can explore and query your source code using various tools
Here are several options:
Expand Down
4 changes: 2 additions & 2 deletions api/analyzers/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def add_symbols(self, entity: Entity) -> None:
pass

@abstractmethod
def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> Entity:
def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> list[Entity]:
"""
Resolve a symbol to an entity.

Expand All @@ -138,7 +138,7 @@ def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_
symbol (Node): The symbol node.

Returns:
Entity: The entity.
list[Entity]: The resolved entities.
"""

pass
Expand Down
Empty file.
144 changes: 144 additions & 0 deletions api/analyzers/csharp/analyzer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
import subprocess
from pathlib import Path

from multilspy import SyncLanguageServer
from ...entities.entity import Entity
from ...entities.file import File
from typing import Optional
from ..analyzer import AbstractAnalyzer

import tree_sitter_c_sharp as tscsharp
from tree_sitter import Language, Node, QueryCursor

import logging
logger = logging.getLogger('code_graph')

class CSharpAnalyzer(AbstractAnalyzer):
def __init__(self) -> None:
super().__init__(Language(tscsharp.language()))

def _captures(self, pattern: str, node: Node) -> dict:
"""Run a tree-sitter query and return captures dict."""
query = self.language.query(pattern)
cursor = QueryCursor(query)
return cursor.captures(node)

def add_dependencies(self, path: Path, files: list[Path]):
if Path(f"{path}/temp_deps_cs").is_dir():
return
if any(Path(f"{path}").glob("*.csproj")) or any(Path(f"{path}").glob("*.sln")):
subprocess.run(["dotnet", "restore"], cwd=str(path))
Comment on lines +26 to +30
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

add_dependencies idempotency guard never fires — dotnet restore runs on every invocation.

Path(f"{path}/temp_deps_cs").is_dir() is checked on line 27, but nothing in this function (or anywhere else) creates the temp_deps_cs directory. The guard is therefore permanently False and dotnet restore will run on every call. This contrasts with the Python analyzer (checks venv/ which it creates) and the Java analyzer (creates temp_deps/ before scanning it).

Additionally, unlike Java (files.extend(...)) and Python (files.extend(...)), files is never populated here, so no C# dependency source files are queued for analysis. Ruff also flags the files parameter as unused (ARG002).

🐛 Proposed fix — create the sentinel directory after a successful restore
 def add_dependencies(self, path: Path, files: list[Path]):
     if Path(f"{path}/temp_deps_cs").is_dir():
         return
     if any(Path(f"{path}").glob("*.csproj")) or any(Path(f"{path}").glob("*.sln")):
-        subprocess.run(["dotnet", "restore"], cwd=str(path))
+        result = subprocess.run(["dotnet", "restore"], cwd=str(path))
+        if result.returncode == 0:
+            Path(f"{path}/temp_deps_cs").mkdir(exist_ok=True)
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def add_dependencies(self, path: Path, files: list[Path]):
if Path(f"{path}/temp_deps_cs").is_dir():
return
if any(Path(f"{path}").glob("*.csproj")) or any(Path(f"{path}").glob("*.sln")):
subprocess.run(["dotnet", "restore"], cwd=str(path))
def add_dependencies(self, path: Path, files: list[Path]):
if Path(f"{path}/temp_deps_cs").is_dir():
return
if any(Path(f"{path}").glob("*.csproj")) or any(Path(f"{path}").glob("*.sln")):
result = subprocess.run(["dotnet", "restore"], cwd=str(path))
if result.returncode == 0:
Path(f"{path}/temp_deps_cs").mkdir(exist_ok=True)
🧰 Tools
🪛 Ruff (0.15.1)

[warning] 26-26: Unused method argument: files

(ARG002)


[error] 30-30: Starting a process with a partial executable path

(S607)

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@api/analyzers/csharp/analyzer.py` around lines 26 - 30, The add_dependencies
function never creates the sentinel directory and never populates the files
list; modify add_dependencies to run subprocess.run(["dotnet","restore"],
cwd=str(path), check=True) only when a .csproj or .sln exists, and after a
successful restore create the sentinel
Path(f"{path}/temp_deps_cs").mkdir(parents=True, exist_ok=True) so the
idempotency guard can short-circuit future calls; also populate the files
argument (e.g., extend files with Path(path).glob("**/*.cs") or other C# source
patterns) so C# source files are queued for analysis and the files parameter is
actually used.


def get_entity_label(self, node: Node) -> str:
if node.type == 'class_declaration':
return "Class"
elif node.type == 'interface_declaration':
return "Interface"
elif node.type == 'enum_declaration':
return "Enum"
elif node.type == 'struct_declaration':
return "Struct"
elif node.type == 'method_declaration':
return "Method"
elif node.type == 'constructor_declaration':
return "Constructor"
raise ValueError(f"Unknown entity type: {node.type}")

def get_entity_name(self, node: Node) -> str:
if node.type in ['class_declaration', 'interface_declaration', 'enum_declaration',
'struct_declaration', 'method_declaration', 'constructor_declaration']:
name_node = node.child_by_field_name('name')
if name_node is None:
return ''
return name_node.text.decode('utf-8')
raise ValueError(f"Unknown entity type: {node.type}")

def get_entity_docstring(self, node: Node) -> Optional[str]:
if node.type in ['class_declaration', 'interface_declaration', 'enum_declaration',
'struct_declaration', 'method_declaration', 'constructor_declaration']:
# Walk back through contiguous comment siblings to collect
# multi-line XML doc comments (each /// line is a separate node)
lines = []
sibling = node.prev_sibling
while sibling and sibling.type == "comment":
lines.insert(0, sibling.text.decode('utf-8'))
sibling = sibling.prev_sibling
return '\n'.join(lines) if lines else None
raise ValueError(f"Unknown entity type: {node.type}")

def get_entity_types(self) -> list[str]:
return ['class_declaration', 'interface_declaration', 'enum_declaration',
'struct_declaration', 'method_declaration', 'constructor_declaration']

def add_symbols(self, entity: Entity) -> None:
if entity.node.type in ['class_declaration', 'struct_declaration']:
base_list_captures = self._captures("(base_list (_) @base_type)", entity.node)
if 'base_type' in base_list_captures:
first = True
for base_type in base_list_captures['base_type']:
if first and entity.node.type == 'class_declaration':
# NOTE: Without semantic analysis, we cannot distinguish a base
# class from an interface in C# base_list. By convention, the
# base class is listed first; if a class only implements
# interfaces, this will produce a spurious base_class edge that
# the LSP resolution in second_pass can correct.
entity.add_symbol("base_class", base_type)
first = False
else:
entity.add_symbol("implement_interface", base_type)
elif entity.node.type == 'interface_declaration':
base_list_captures = self._captures("(base_list (_) @base_type)", entity.node)
if 'base_type' in base_list_captures:
for base_type in base_list_captures['base_type']:
entity.add_symbol("extend_interface", base_type)
elif entity.node.type in ['method_declaration', 'constructor_declaration']:
captures = self._captures("(invocation_expression) @reference.call", entity.node)
if 'reference.call' in captures:
for caller in captures['reference.call']:
entity.add_symbol("call", caller)
captures = self._captures("(parameter_list (parameter type: (_) @parameter))", entity.node)
if 'parameter' in captures:
for parameter in captures['parameter']:
entity.add_symbol("parameters", parameter)
if entity.node.type == 'method_declaration':
return_type = entity.node.child_by_field_name('type')
if return_type:
entity.add_symbol("return_type", return_type)

def is_dependency(self, file_path: str) -> bool:
return "temp_deps_cs" in file_path

def resolve_path(self, file_path: str, path: Path) -> str:
return file_path

def resolve_type(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, node: Node) -> list[Entity]:
res = []
for file, resolved_node in self.resolve(files, lsp, file_path, path, node):
type_dec = self.find_parent(resolved_node, ['class_declaration', 'interface_declaration', 'enum_declaration', 'struct_declaration'])
if type_dec in file.entities:
res.append(file.entities[type_dec])
return res

def resolve_method(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, node: Node) -> list[Entity]:
res = []
if node.type == 'invocation_expression':
func_node = node.child_by_field_name('function')
if func_node and func_node.type == 'member_access_expression':
func_node = func_node.child_by_field_name('name')
if func_node:
node = func_node
for file, resolved_node in self.resolve(files, lsp, file_path, path, node):
method_dec = self.find_parent(resolved_node, ['method_declaration', 'constructor_declaration', 'class_declaration', 'interface_declaration', 'enum_declaration', 'struct_declaration'])
if method_dec and method_dec.type in ['class_declaration', 'interface_declaration', 'enum_declaration', 'struct_declaration']:
continue
if method_dec in file.entities:
res.append(file.entities[method_dec])
return res

def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> list[Entity]:
if key in ["implement_interface", "base_class", "extend_interface", "parameters", "return_type"]:
return self.resolve_type(files, lsp, file_path, path, symbol)
elif key in ["call"]:
return self.resolve_method(files, lsp, file_path, path, symbol)
else:
raise ValueError(f"Unknown key {key}")
2 changes: 1 addition & 1 deletion api/analyzers/java/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def resolve_method(self, files: dict[Path, File], lsp: SyncLanguageServer, file_
res.append(file.entities[method_dec])
return res

def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> Entity:
def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> list[Entity]:
if key in ["implement_interface", "base_class", "extend_interface", "parameters", "return_type"]:
return self.resolve_type(files, lsp, file_path, path, symbol)
elif key in ["call"]:
Expand Down
2 changes: 1 addition & 1 deletion api/analyzers/python/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def resolve_method(self, files: dict[Path, File], lsp: SyncLanguageServer, file_
res.append(file.entities[method_dec])
return res

def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> Entity:
def resolve_symbol(self, files: dict[Path, File], lsp: SyncLanguageServer, file_path: Path, path: Path, key: str, symbol: Node) -> list[Entity]:
if key in ["base_class", "parameters", "return_type"]:
return self.resolve_type(files, lsp, file_path, path, symbol)
elif key in ["call"]:
Expand Down
14 changes: 11 additions & 3 deletions api/analyzers/source_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
# from .c.analyzer import CAnalyzer
from .java.analyzer import JavaAnalyzer
from .python.analyzer import PythonAnalyzer
from .csharp.analyzer import CSharpAnalyzer

from multilspy import SyncLanguageServer
from multilspy.multilspy_config import MultilspyConfig
Expand All @@ -24,7 +25,8 @@
# '.c': CAnalyzer(),
# '.h': CAnalyzer(),
'.py': PythonAnalyzer(),
'.java': JavaAnalyzer()}
'.java': JavaAnalyzer(),
'.cs': CSharpAnalyzer()}

class NullLanguageServer:
def start_server(self):
Expand Down Expand Up @@ -136,7 +138,12 @@
lsps[".py"] = SyncLanguageServer.create(config, logger, str(path))
else:
lsps[".py"] = NullLanguageServer()
with lsps[".java"].start_server(), lsps[".py"].start_server():
if any(path.rglob('*.cs')):

Check failure on line 141 in api/analyzers/source_analyzer.py

View check run for this annotation

GitHub Advanced Security / CodeQL

Uncontrolled data used in path expression

This path depends on a [user-provided value](1). This path depends on a [user-provided value](2).

Check failure

Code scanning / CodeQL

Uncontrolled data used in path expression High

This path depends on a
user-provided value
.
This path depends on a
user-provided value
.
config = MultilspyConfig.from_dict({"code_language": "csharp"})
lsps[".cs"] = SyncLanguageServer.create(config, logger, str(path))
else:
lsps[".cs"] = NullLanguageServer()
Comment on lines +141 to +145
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

CodeQL: path traversal via user-controlled path in rglob.

path flows from the API request body into path.rglob('*.cs') without sanitization. A crafted path (e.g., containing ../) could cause the scanner to traverse outside the intended directory. CodeQL flagged this as a security failure. The pre-existing Java/Python blocks at lines 131 and 136 carry the same risk, but this new block adds another instance.

Resolve and validate path against an allowed base before using it in any rglob call. For example:

🔒 Suggested mitigation pattern
# In analyze_local_folder or analyze_sources, resolve and validate path early:
resolved = Path(path).resolve()
# Optionally assert it's within an allowed root
🧰 Tools
🪛 GitHub Check: CodeQL

[failure] 141-141: Uncontrolled data used in path expression
This path depends on a user-provided value.
This path depends on a user-provided value.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@api/analyzers/source_analyzer.py` around lines 141 - 145, The rglob call uses
the user-controlled variable path directly, allowing path traversal; before any
use of path.rglob (including the C# block that creates SyncLanguageServer via
SyncLanguageServer.create and MultilspyConfig.from_dict), resolve and validate
the incoming path in the entry routines (e.g.,
analyze_local_folder/analyze_sources): convert to a Path and call .resolve(),
then ensure the resolved path is within an allowed base/root (reject or
normalize paths that escape the root, e.g., via ..), and only then pass the safe
resolved path to path.rglob and to SyncLanguageServer.create; apply the same
validation for the Java/Python blocks that also use path.rglob.

with lsps[".java"].start_server(), lsps[".py"].start_server(), lsps[".cs"].start_server():
files_len = len(self.files)
for i, file_path in enumerate(files):
file = self.files[file_path]
Expand Down Expand Up @@ -166,7 +173,8 @@
self.second_pass(graph, files, path)

def analyze_sources(self, path: Path, ignore: list[str], graph: Graph) -> None:
files = list(path.rglob("*.java")) + list(path.rglob("*.py"))
path = path.resolve()

Check failure on line 176 in api/analyzers/source_analyzer.py

View check run for this annotation

GitHub Advanced Security / CodeQL

Uncontrolled data used in path expression

This path depends on a [user-provided value](1). This path depends on a [user-provided value](2).

Check failure

Code scanning / CodeQL

Uncontrolled data used in path expression High

This path depends on a
user-provided value
.
This path depends on a
user-provided value
.
files = list(path.rglob("*.java")) + list(path.rglob("*.py")) + list(path.rglob("*.cs"))

Check failure on line 177 in api/analyzers/source_analyzer.py

View check run for this annotation

GitHub Advanced Security / CodeQL

Uncontrolled data used in path expression

This path depends on a [user-provided value](1). This path depends on a [user-provided value](2).

Check failure on line 177 in api/analyzers/source_analyzer.py

View check run for this annotation

GitHub Advanced Security / CodeQL

Uncontrolled data used in path expression

This path depends on a [user-provided value](1). This path depends on a [user-provided value](2).

Check failure on line 177 in api/analyzers/source_analyzer.py

View check run for this annotation

GitHub Advanced Security / CodeQL

Uncontrolled data used in path expression

This path depends on a [user-provided value](1). This path depends on a [user-provided value](2).

Check failure

Code scanning / CodeQL

Uncontrolled data used in path expression High

This path depends on a
user-provided value
.
This path depends on a
user-provided value
.

Check failure

Code scanning / CodeQL

Uncontrolled data used in path expression High

This path depends on a
user-provided value
.
This path depends on a
user-provided value
.

Check failure

Code scanning / CodeQL

Uncontrolled data used in path expression High

This path depends on a
user-provided value
.
This path depends on a
user-provided value
.
# First pass analysis of the source code
self.first_pass(path, files, ignore, graph)

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ dependencies = [
"tree-sitter-c>=0.24.1,<0.25.0",
"tree-sitter-python>=0.25.0,<0.26.0",
"tree-sitter-java>=0.23.5,<0.24.0",
"tree-sitter-c-sharp>=0.23.1,<0.24.0",
"flask>=3.1.0,<4.0.0",
"python-dotenv>=1.0.1,<2.0.0",
"multilspy @ git+https://github.com/AviAvni/multilspy.git@python-init-params",
Expand Down
48 changes: 48 additions & 0 deletions tests/source_files/csharp/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
using System;

namespace TestProject
{
public interface ILogger
{
void Log(string message);
}

public class ConsoleLogger : ILogger
{
public void Log(string message)
{
Console.WriteLine(message);
}
}

/// <summary>
/// Represents a task to be executed.
/// </summary>
public class Task
{
public string Name { get; set; }
public int Duration { get; set; }

private ILogger _logger;

public Task(string name, int duration, ILogger logger)
{
Name = name;
Duration = duration;
_logger = logger;
_logger.Log("Task created: " + name);
}

public bool Execute()
{
_logger.Log("Executing: " + Name);
return true;
}

public void Abort(float delay)
{
_logger.Log("Aborting: " + Name);
Execute();
}
}
}
69 changes: 69 additions & 0 deletions tests/test_csharp_analyzer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import os
import unittest

from api import SourceAnalyzer, Graph


class Test_CSharp_Analyzer(unittest.TestCase):
def setUp(self):
self.g = Graph("csharp")

def tearDown(self):
self.g.delete()

def test_analyzer(self):
analyzer = SourceAnalyzer()

# Get the current file path
current_file_path = os.path.abspath(__file__)

# Get the directory of the current file
current_dir = os.path.dirname(current_file_path)

# Append 'source_files/csharp' to the current directory
path = os.path.join(current_dir, 'source_files')
path = os.path.join(path, 'csharp')
path = str(path)

analyzer.analyze_local_folder(path, self.g)

# Verify ILogger interface was detected
q = "MATCH (n:Interface {name: 'ILogger'}) RETURN n LIMIT 1"
res = self.g._query(q).result_set
self.assertEqual(len(res), 1)

# Verify ConsoleLogger class was detected
q = "MATCH (n:Class {name: 'ConsoleLogger'}) RETURN n LIMIT 1"
res = self.g._query(q).result_set
self.assertEqual(len(res), 1)

# Verify Task class was detected
q = "MATCH (n:Class {name: 'Task'}) RETURN n LIMIT 1"
res = self.g._query(q).result_set
self.assertEqual(len(res), 1)

# Verify methods were detected
for method_name in ['Log', 'Execute', 'Abort']:
q = "MATCH (n {name: $name}) RETURN n LIMIT 1"
res = self.g._query(q, {'name': method_name}).result_set
self.assertGreaterEqual(len(res), 1, f"Method {method_name} not found")

# Verify Constructor was detected
q = "MATCH (n:Constructor {name: 'Task'}) RETURN n LIMIT 1"
res = self.g._query(q).result_set
self.assertEqual(len(res), 1)

# Verify DEFINES relationships exist (File -> Class/Interface)
q = "MATCH (f:File)-[:DEFINES]->(n) RETURN count(n)"
res = self.g._query(q).result_set
self.assertGreater(res[0][0], 0)

# Verify class defines methods
q = "MATCH (c:Class {name: 'Task'})-[:DEFINES]->(m) RETURN count(m)"
res = self.g._query(q).result_set
self.assertGreater(res[0][0], 0)

# Verify ConsoleLogger implements ILogger
q = "MATCH (c:Class {name: 'ConsoleLogger'})-[:IMPLEMENTS]->(i:Interface {name: 'ILogger'}) RETURN c, i LIMIT 1"
res = self.g._query(q).result_set
self.assertEqual(len(res), 1)
Loading
Loading