diff options
Diffstat (limited to 'src/main/java/com/google/devtools/build/docgen/DocCheckerUtils.java')
-rw-r--r-- | src/main/java/com/google/devtools/build/docgen/DocCheckerUtils.java | 95 |
1 files changed, 95 insertions, 0 deletions
diff --git a/src/main/java/com/google/devtools/build/docgen/DocCheckerUtils.java b/src/main/java/com/google/devtools/build/docgen/DocCheckerUtils.java new file mode 100644 index 0000000000..3b64362b51 --- /dev/null +++ b/src/main/java/com/google/devtools/build/docgen/DocCheckerUtils.java @@ -0,0 +1,95 @@ +// Copyright 2014 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.devtools.build.docgen; + +import com.google.common.collect.ImmutableSet; + +import java.util.ArrayDeque; +import java.util.Deque; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * A utility class to check the generated documentations. + */ +public class DocCheckerUtils { + + // TODO(bazel-team): remove elements from this list and clean up the tested documentations. + private static final ImmutableSet<String> UNCHECKED_HTML_TAGS = ImmutableSet.<String>of( + "br", "li", "ul", "p"); + + private static final Pattern TAG_PATTERN = Pattern.compile( + "<([/]?[a-z0-9_]+)" + + "([^>]*)" + + ">", + Pattern.CASE_INSENSITIVE); + + private static final Pattern COMMENT_PATTERN = Pattern.compile( + "<!--.*?-->", + Pattern.CASE_INSENSITIVE); + + /** + * Returns the first unmatched html tag of srcs or null if no such tag exists. + * Note that this check is not performed on br, ul, li and p tags. The method also + * prints some help in case an unmatched tag is found. The check is performed + * inside comments too. + */ + public static String getFirstUnclosedTagAndPrintHelp(String src) { + return getFirstUnclosedTag(src, true); + } + + static String getFirstUnclosedTag(String src) { + return getFirstUnclosedTag(src, false); + } + + // TODO(bazel-team): run this on the Skylark docs too. + private static String getFirstUnclosedTag(String src, boolean printHelp) { + Matcher commentMatcher = COMMENT_PATTERN.matcher(src); + src = commentMatcher.replaceAll(""); + Matcher tagMatcher = TAG_PATTERN.matcher(src); + Deque<String> tagStack = new ArrayDeque<>(); + while (tagMatcher.find()) { + String tag = tagMatcher.group(1); + String rest = tagMatcher.group(2); + String strippedTag = tag.substring(1); + + // Ignoring self closing tags. + if (!rest.endsWith("/") + // Ignoring unchecked tags. + && !UNCHECKED_HTML_TAGS.contains(tag) && !UNCHECKED_HTML_TAGS.contains(strippedTag)) { + if (tag.startsWith("/")) { + // Closing tag. Removing '/' from the beginning. + tag = strippedTag; + String lastTag = tagStack.removeLast(); + if (!lastTag.equals(tag)) { + if (printHelp) { + System.err.println( + "Unclosed tag: " + lastTag + "\n" + + "Trying to close with: " + tag + "\n" + + "Stack of open tags: " + tagStack + "\n" + + "Last 200 characters:\n" + + src.substring(Math.max(tagMatcher.start() - 200, 0), tagMatcher.start())); + } + return lastTag; + } + } else { + // Starting tag. + tagStack.addLast(tag); + } + } + } + return null; + } +} |