diff options
author | 2019-12-20 23:36:03 -0500 | |
---|---|---|
committer | 2019-12-21 06:18:16 +0100 | |
commit | 1bfa9bda3f10627a9798edfc65472d59bc9ffeba (patch) | |
tree | bf711164c7a4ad4dbf18b353941b223af272e011 /bin | |
parent | Makefile: make build_search_documents.py a prerequisite for documents.js (diff) | |
download | devmanual-1bfa9bda3f10627a9798edfc65472d59bc9ffeba.tar.gz devmanual-1bfa9bda3f10627a9798edfc65472d59bc9ffeba.tar.bz2 devmanual-1bfa9bda3f10627a9798edfc65472d59bc9ffeba.zip |
bin/build_search_documents.py: fix aggressive whitespace stripping
In stringify_node(), we aggressively strip the whitespaces around
children nodes. This results in something like
"<c>SLOT</c>, <c>:SLOT<c/>" being parsed as "SLOT,:SLOT",
removing the white space between ',' and ':'.
Signed-off-by: Göktürk Yüksek <gokturk@gentoo.org>
Signed-off-by: Ulrich Müller <ulm@gentoo.org>
Diffstat (limited to 'bin')
-rwxr-xr-x | bin/build_search_documents.py | 18 |
1 files changed, 14 insertions, 4 deletions
diff --git a/bin/build_search_documents.py b/bin/build_search_documents.py index 3816fdb..e19dce6 100755 --- a/bin/build_search_documents.py +++ b/bin/build_search_documents.py @@ -18,22 +18,32 @@ def stringify_node(parent: ET.Element) -> str: parent -- the node to convert to a string """ + # We usually have something like: + # <p>\nText + # Left strip the whitespace. if parent.text: text = parent.text.lstrip() else: text = str() + # For each child, strip the tags and append to text + # along with the tail text following it. + # The tail may include '\n' if it spans multiple lines. + # We will worry about those on return, not now. for child in parent.getchildren(): # The '<d/>' tag is simply a fancier '-' character if child.tag == 'd': text += '-' if child.text: - text += child.text.lstrip() + text += child.text if child.tail: - text += child.tail.rstrip() + text += child.tail - text += parent.tail.rstrip() - return text.replace('\n', ' ') + # A paragraph typically ends with: + # Text\n</p> + # Right strip any spurious whitespace. + # Finally, get rid of any intermediate newlines. + return text.rstrip().replace('\n', ' ') def process_node(documents: list, node: ET.Element, name: str, url: str) -> None: |