aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/main/java/com/google/devtools/build/docgen/DocCheckerUtils.java
blob: 0388897027c5b93e4e74475744836fb6fed4830a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
// Copyright 2014 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.devtools.build.docgen;

import com.google.common.collect.ImmutableSet;

import java.util.ArrayDeque;
import java.util.Deque;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * A utility class to check the generated documentations.
 */
public class DocCheckerUtils {

  // TODO(bazel-team): remove elements from this list and clean up the tested documentations.
  private static final ImmutableSet<String> UNCHECKED_HTML_TAGS = ImmutableSet.<String>of(
      "br", "li", "ul", "p");

  private static final Pattern TAG_PATTERN = Pattern.compile(
        "<([/]?[a-z0-9_]+)"
      + "([^>]*)"
      + ">",
      Pattern.CASE_INSENSITIVE);

  private static final Pattern COMMENT_PATTERN = Pattern.compile(
      "<!--.*?-->",
      Pattern.CASE_INSENSITIVE);

  /**
   * Returns the first unmatched html tag of srcs or null if no such tag exists.
   * Note that this check is not performed on br, ul, li and p tags. The method also
   * prints some help in case an unmatched tag is found. The check is performed
   * inside comments too.
   */
  public static String getFirstUnclosedTagAndPrintHelp(String src) {
    return getFirstUnclosedTag(src, true);
  }

  static String getFirstUnclosedTag(String src) {
    return getFirstUnclosedTag(src, false);
  }

  // TODO(bazel-team): run this on the Skylark docs too.
  private static String getFirstUnclosedTag(String src, boolean printHelp) {
    Matcher commentMatcher = COMMENT_PATTERN.matcher(src);
    src = commentMatcher.replaceAll("");
    Matcher tagMatcher = TAG_PATTERN.matcher(src);
    Deque<String> tagStack = new ArrayDeque<>();
    while (tagMatcher.find()) {
      String tag = tagMatcher.group(1);
      String rest = tagMatcher.group(2);
      String strippedTag = tag.substring(1);

      // Ignoring self closing tags.
      if (!rest.endsWith("/")
          // Ignoring unchecked tags.
          && !UNCHECKED_HTML_TAGS.contains(tag) && !UNCHECKED_HTML_TAGS.contains(strippedTag)) {
        if (tag.startsWith("/")) {
          // Closing tag. Removing '/' from the beginning.
          tag = strippedTag;
          String lastTag = tagStack.removeLast();
          if (!lastTag.equals(tag)) {
            if (printHelp) {
              System.err.println(
                    "Unclosed tag: " + lastTag + "\n"
                  + "Trying to close with: " + tag + "\n"
                  + "Stack of open tags: " + tagStack + "\n"
                  + "Last 200 characters:\n"
                  + src.substring(Math.max(tagMatcher.start() - 200, 0), tagMatcher.start()));
            }
            return lastTag;
          }
        } else {
          // Starting tag.
          tagStack.addLast(tag);
        }
      }
    }
    return null;
  }
}