305 lines
14 KiB
Python
305 lines
14 KiB
Python
import os
|
|
import sys
|
|
from pathlib import Path
|
|
import xml.etree.ElementTree as ET
|
|
|
|
|
|
class XMLDocParser:
|
|
"""
|
|
Parses and extracts docs from Doxygen-generated XML.
|
|
"""
|
|
|
|
def __init__(self):
|
|
# Memory for overloaded functions with identical parameter name sets
|
|
self._memory = {}
|
|
# This is useful for investigating functions that cause problems for extract_docstring.
|
|
# Set this to true to have useful information for debugging this class, as in the CLI
|
|
# function at the bottom of this class.
|
|
self._verbose = False
|
|
|
|
def parse_xml(self, xml_file: str):
|
|
"""
|
|
Get the ElementTree of an XML file given the file name.
|
|
If an error occurs, prints a warning and returns None.
|
|
"""
|
|
try:
|
|
return ET.parse(xml_file)
|
|
except FileNotFoundError:
|
|
print(f"Warning: XML file '{xml_file}' not found.")
|
|
return None
|
|
except ET.ParseError:
|
|
print(f"Warning: Failed to parse XML file '{xml_file}'.")
|
|
return None
|
|
|
|
def extract_docstring(self, xml_folder: str, cpp_class: str,
|
|
cpp_method: str, method_args_names: 'list[str]'):
|
|
"""
|
|
Extract the docstrings for a C++ class's method from the Doxygen-generated XML.
|
|
|
|
Args:
|
|
xml_folder (str): The path to the folder that contains all of the Doxygen-generated XML.
|
|
cpp_class (str): The name of the C++ class that contains the function whose docstring is to be extracted.
|
|
cpp_method (str): The name of the C++ method whose docstring is to be extracted.
|
|
method_args_names (list): A list of the names of the cpp_method's parameters.
|
|
"""
|
|
self.print_if_verbose(f"Extracting docs for {cpp_class}.{cpp_method}")
|
|
|
|
# Get all of the member definitions in cpp_class with name cpp_method
|
|
maybe_member_defs = self.get_member_defs(xml_folder, cpp_class,
|
|
cpp_method)
|
|
|
|
# Filter member definitions which don't match the given argument names
|
|
member_defs, ignored_params = self.filter_member_defs(
|
|
maybe_member_defs, method_args_names)
|
|
|
|
# Find which member to get docs from, if there are multiple that match in name and args
|
|
documenting_index = self.determine_documenting_index(
|
|
cpp_class, cpp_method, method_args_names)
|
|
|
|
# Extract the docs for the function that matches cpp_class.cpp_method(*method_args_names).
|
|
return self.get_formatted_docstring(member_defs[documenting_index],
|
|
ignored_params)
|
|
|
|
def get_member_defs(self, xml_folder: str, cpp_class: str,
|
|
cpp_method: str):
|
|
"""Get all of the member definitions in cpp_class with name cpp_method.
|
|
|
|
Args:
|
|
xml_folder (str): The folder containing the Doxygen XML documentation.
|
|
cpp_class (str): The name of the C++ class that contains the function whose docstring is to be extracted.
|
|
cpp_method (str): The name of the C++ method whose docstring is to be extracted.
|
|
|
|
Returns:
|
|
list: All of the member definitions in cpp_class with name cpp_method.
|
|
"""
|
|
xml_folder_path = Path(xml_folder)
|
|
|
|
# Create the path to the Doxygen XML index file.
|
|
xml_index_file = xml_folder_path / "index.xml"
|
|
|
|
# Parse the index file
|
|
index_tree = self.parse_xml(xml_index_file)
|
|
if not index_tree:
|
|
self.print_if_verbose(f"Index file {xml_index_file} was empty.")
|
|
return ""
|
|
|
|
index_root = index_tree.getroot()
|
|
|
|
# Find the compound with name == cpp_class
|
|
class_index = index_root.find(f"./*[name='{cpp_class}']")
|
|
|
|
if class_index is None:
|
|
self.print_if_verbose(
|
|
f"Could not extract docs for {cpp_class}.{cpp_method}; class not found in index file."
|
|
)
|
|
return ""
|
|
|
|
# Create the path to the file with the documentation for cpp_class.
|
|
xml_class_file = xml_folder_path / class_index.attrib['refid'] + '.xml'
|
|
|
|
# Parse the class file
|
|
class_tree = self.parse_xml(xml_class_file)
|
|
if not class_tree:
|
|
self.print_if_verbose(f"Class file {xml_class_file} was empty.")
|
|
return ""
|
|
|
|
class_root = class_tree.getroot()
|
|
|
|
# Find the member(s) in cpp_class with name == cpp_method
|
|
maybe_member_defs = class_root.findall(
|
|
f"compounddef/sectiondef//*[name='{cpp_method}']")
|
|
|
|
return maybe_member_defs
|
|
|
|
def filter_member_defs(self, maybe_member_defs: list,
|
|
method_args_names: list):
|
|
"""
|
|
Remove member definitions which do not match the supplied argument names list.
|
|
|
|
Args:
|
|
maybe_member_defs (list): The list of all member definitions in the class which share the same name.
|
|
method_args_names (list): The list of argument names in the definition of the function whose documentation is desired.
|
|
Supplying the argument names allows for the filtering of overloaded functions with the same name but different arguments.
|
|
|
|
Returns:
|
|
tuple[list, list]: (the filtered member definitions, parameters which should be ignored because they are optional)
|
|
"""
|
|
member_defs = []
|
|
|
|
# Optional parameters we should ignore if we encounter them in the docstring
|
|
ignored_params = []
|
|
|
|
# Filter out the members which don't match the method_args_names
|
|
for maybe_member_def in maybe_member_defs:
|
|
self.print_if_verbose(
|
|
f"Investigating member_def with argstring {maybe_member_def.find('argsstring').text}"
|
|
)
|
|
# Find the number of required parameters and the number of total parameters from the
|
|
# Doxygen XML for this member_def
|
|
params = maybe_member_def.findall("param")
|
|
num_tot_params = len(params)
|
|
# Calculate required params by subtracting the number of optional params (params where defval is
|
|
# set--defval means default value) from the number of total params
|
|
num_req_params = num_tot_params - sum([
|
|
1 if param.find("defval") is not None else 0
|
|
for param in params
|
|
])
|
|
|
|
# If the number of parameters in method_args_names matches neither number, eliminate this member_def
|
|
# This is done because wrap generates a python wrapper function twice for every function with
|
|
# optional parameters: one with none of the optional parameters, and one with all of the optional
|
|
# parameters, required.
|
|
if len(method_args_names) != num_req_params and len(
|
|
method_args_names) != num_tot_params:
|
|
self.print_if_verbose(
|
|
f"Wrong number of parameters: got {len(method_args_names)}, expected required {num_req_params} or total {num_tot_params}."
|
|
)
|
|
continue
|
|
|
|
# If the parameter names don't match, eliminate this member_def
|
|
eliminate = False
|
|
for i, arg_name in enumerate(method_args_names):
|
|
# Try to find the name of the parameter in the XML
|
|
param_name = params[i].find(
|
|
"declname"
|
|
) # declname is the tag that usually contains the param name
|
|
# If we couldn't find the declname, try the defname (used uncommonly)
|
|
if param_name is None:
|
|
param_name = params[i].find("defname")
|
|
if param_name is None:
|
|
# Can't find the name for this parameter. This may be an unreachable statement but Doxygen is
|
|
# not well-documented enough to rely on a <declname> or a <defname> always being defined inside a <param>.
|
|
eliminate = True
|
|
continue
|
|
# Eliminate if any param name doesn't match the expected name
|
|
if arg_name != param_name.text:
|
|
eliminate = True
|
|
if eliminate:
|
|
self.print_if_verbose("Names didn't match.")
|
|
continue
|
|
|
|
# At this point, this member_def can be assumed to be the desired function (or is indistinguishable
|
|
# from it based on all of the reliable information we have--if this is the case, we need to rely on
|
|
# the _memory to give the correct docs for each.)
|
|
member_defs.append(maybe_member_def)
|
|
self.print_if_verbose("Confirmed as correct function.")
|
|
|
|
# Remember which parameters to ignore, if any
|
|
for i in range(len(method_args_names), num_tot_params):
|
|
ignored_params.append(params[i].find("declname").text)
|
|
|
|
return member_defs, ignored_params
|
|
|
|
def determine_documenting_index(self, cpp_class: str, cpp_method: str,
|
|
method_args_names: list,
|
|
member_defs: list):
|
|
"""
|
|
Determine which member definition to retrieve documentation from, if there are multiple.
|
|
|
|
Args:
|
|
cpp_class (str): The name of the C++ class that contains the function whose docstring is to be extracted.
|
|
cpp_method (str): The name of the C++ method whose docstring is to be extracted.
|
|
method_args_names (list): A list of the names of the cpp_method's parameters.
|
|
member_defs (list): All of the member definitions of cpp_class which match cpp_method in name
|
|
and whose arguments have the same names as method_args_names.
|
|
|
|
Returns:
|
|
int: The index indicating which member definition to document.
|
|
"""
|
|
# If there are multiple member defs that match the method args names,
|
|
# remember how many we've encountered already so that we can return
|
|
# the docs for the first one we haven't yet extracted.
|
|
# This is only relevant if there are overloaded functions where the
|
|
# parameter types are different but the parameter names are the same,
|
|
# e.g. foo(int bar) and foo(string bar). The parameter types cannot be
|
|
# relied on because they cannot be assumed to be the same between GTSAM
|
|
# implementation and pybind11 generated wrapper, e.g. OptionalJacobian
|
|
# in GTSAM becomes Eigen::Matrix in the pybind11 code.
|
|
documenting_index = 0
|
|
if len(member_defs) > 1:
|
|
function_key = f"{cpp_class}.{cpp_method}({','.join(method_args_names) if method_args_names else ''})"
|
|
if function_key in self._memory:
|
|
self._memory[function_key] += 1
|
|
documenting_index = self._memory[function_key]
|
|
else:
|
|
self._memory[function_key] = 0
|
|
|
|
return documenting_index
|
|
|
|
def get_formatted_docstring(self,
|
|
member_def: 'xml.etree.ElementTree.Element',
|
|
ignored_params: list):
|
|
"""Gets the formatted docstring for the supplied XML element representing a member definition.
|
|
|
|
Args:
|
|
member_def (xml.etree.ElementTree.Element): The member definition to document.
|
|
ignored_params (list): The optional parameters which should be ignored, if any.
|
|
|
|
Returns:
|
|
str: The formatted docstring.
|
|
"""
|
|
docstring = ""
|
|
|
|
brief_description = member_def.find(".//briefdescription")
|
|
detailed_description = member_def.find(".//detaileddescription")
|
|
|
|
# Add the brief description first, if it exists.
|
|
if brief_description is not None:
|
|
for para in brief_description.findall("para"):
|
|
docstring += "".join(t for t in para.itertext() if t.strip())
|
|
|
|
# Add the detailed description. This includes the parameter list and the return value.
|
|
if detailed_description is not None:
|
|
docstring += "\n"
|
|
# Add non-parameter detailed description
|
|
for element in list(detailed_description):
|
|
if element.tag == "para" and "parameterlist" not in [
|
|
e.tag for e in element
|
|
]:
|
|
docstring += "".join(
|
|
t for t in element.itertext() if t.strip()) + " "
|
|
|
|
# Add parameter docs
|
|
parameter_list = detailed_description.find(".//parameterlist")
|
|
if parameter_list is not None:
|
|
for i, parameter_item in enumerate(
|
|
parameter_list.findall(".//parameteritem")):
|
|
name = parameter_item.find(".//parametername").text
|
|
desc = parameter_item.find(
|
|
".//parameterdescription/para").text
|
|
if name not in ignored_params:
|
|
docstring += f"{name.strip() if name else f'[Parameter {i}]'}: {desc.strip() if desc else 'No description provided'}\n"
|
|
|
|
# Add return value docs
|
|
return_sect = detailed_description.find(".//simplesect")
|
|
if return_sect is not None and return_sect.attrib[
|
|
"kind"] == "return" and return_sect.find(
|
|
"para").text is not None:
|
|
docstring += f"Returns: {return_sect.find('para').text.strip()}"
|
|
|
|
return docstring.strip()
|
|
|
|
def print_if_verbose(self, text: str):
|
|
"""
|
|
Print text if the parser is in verbose mode.
|
|
"""
|
|
if self._verbose:
|
|
print(text)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) != 5:
|
|
print(
|
|
"Usage: python xml_parser.py <doxygen_xml_folder> <cpp_class> <cpp_method> <method_args_names (comma-separated)>"
|
|
)
|
|
else:
|
|
parser = XMLDocParser()
|
|
parser._verbose = True
|
|
xml_file = sys.argv[1]
|
|
extracted_doc = parser.extract_docstring(xml_file, sys.argv[2],
|
|
sys.argv[3],
|
|
sys.argv[4].split(","))
|
|
|
|
print()
|
|
print(extracted_doc.strip())
|