diff options
author | Lukacs Berki <lberki@google.com> | 2015-09-15 07:31:50 +0000 |
---|---|---|
committer | John Field <jfield@google.com> | 2015-09-15 20:26:34 +0000 |
commit | 73b6c795b179ffd95eb9bef030dc89585a3b394e (patch) | |
tree | cd329c27b4be73cb76a6de79b9c101a70b7117de /third_party | |
parent | c87357735b331b709232e84dcda415fc1f081053 (diff) |
Make ijar appropriately prune InnerClasses, at the cost of a lot of unholy hacks.
--
MOS_MIGRATED_REVID=103068956
Diffstat (limited to 'third_party')
-rw-r--r-- | third_party/ijar/classfile.cc | 319 | ||||
-rwxr-xr-x | third_party/ijar/test/ijar_test.sh | 121 |
2 files changed, 430 insertions, 10 deletions
diff --git a/third_party/ijar/classfile.cc b/third_party/ijar/classfile.cc index 39d24f9dd4..0a359d019b 100644 --- a/third_party/ijar/classfile.cc +++ b/third_party/ijar/classfile.cc @@ -34,6 +34,7 @@ #include <stdlib.h> #include <string.h> +#include <set> #include <string> #include <vector> @@ -99,6 +100,8 @@ struct Constant; // TODO(adonovan) these globals are unfortunate static std::vector<Constant*> const_pool_in; // input constant pool static std::vector<Constant*> const_pool_out; // output constant_pool +static std::set<std::string> used_class_names; +static Constant * class_name; // Returns the Constant object, given an index into the input constant pool. // Note: constant(0) == NULL; this invariant is exploited by the @@ -138,6 +141,10 @@ struct Constant { // calling slot() on them in turn. virtual void Keep() {} + bool Kept() { + return slot_ != 0; + } + // Returns the index of this constant in the output class's constant // pool, assigning a slot if not already done. u2 slot() { @@ -163,6 +170,13 @@ struct Constant { u1 tag_; }; +// Extracts class names from a signature and puts them into the global +// variable used_class_names. +// +// desc: the descriptor class names should be extracted from. +// p: the position where the extraction should tart. +void ExtractClassNames(const std::string& desc, size_t* p); + // See sec.4.4.1 of JVM spec. struct Constant_Class : Constant { @@ -397,6 +411,7 @@ struct Attribute { virtual ~Attribute() {} virtual void Write(u1 *&p) = 0; + virtual void ExtractClassNames() {} void WriteProlog(u1 *&p, u2 length) { put_u2be(p, attribute_name_->slot()); @@ -464,10 +479,49 @@ struct InnerClassesAttribute : Attribute { } void Write(u1 *&p) { - WriteProlog(p, 2 + entries_.size() * 8); - put_u2be(p, entries_.size()); - for (size_t ii = 0; ii < entries_.size(); ++ii) { - Entry *entry = entries_[ii]; + std::set<int> kept_entries; + // We keep an entry if the constant referring to the inner class is already + // kept. Then we mark its outer class and its class name as kept, too, then + // iterate until a fixed point is reached. + int entry_count; + int iteration = 0; + + do { + entry_count = kept_entries.size(); + for (int i_entry = 0; i_entry < entries_.size(); ++i_entry) { + Entry* entry = entries_[i_entry]; + if (entry->inner_class_info->Kept() || + used_class_names.find(entry->inner_class_info->Display()) + != used_class_names.end() || + entry->outer_class_info == class_name || + entry->outer_class_info == NULL || + entry->inner_name == NULL) { + kept_entries.insert(i_entry); + + // These are zero for anonymous inner classes + if (entry->outer_class_info != NULL) { + entry->outer_class_info->slot(); + } + + if (entry->inner_name != NULL) { + entry->inner_name->slot(); + } + } + } + iteration += 1; + } while (entry_count != kept_entries.size()); + + if (kept_entries.size() == 0) { + return; + } + + WriteProlog(p, 2 + kept_entries.size() * 8); + put_u2be(p, kept_entries.size()); + + for (std::set<int>::iterator it = kept_entries.begin(); + it != kept_entries.end(); + ++it) { + Entry *entry = entries_[*it]; put_u2be(p, entry->inner_class_info == NULL ? 0 : entry->inner_class_info->slot()); @@ -516,6 +570,7 @@ struct EnclosingMethodAttribute : Attribute { struct ElementValue { virtual ~ElementValue() {} virtual void Write(u1 *&p) = 0; + virtual void ExtractClassNames() {} static ElementValue* Read(const u1 *&p); u1 tag_; u4 length_; @@ -555,6 +610,12 @@ struct ClassTypeElementValue : ElementValue { put_u1(p, tag_); put_u2be(p, class_info_->slot()); } + + virtual void ExtractClassNames() { + size_t idx = 0; + devtools_ijar::ExtractClassNames(class_info_->Display(), &idx); + } + static ClassTypeElementValue *Read(const u1 *&p) { ClassTypeElementValue *value = new ClassTypeElementValue; value->class_info_ = constant(get_u2be(p)); @@ -570,6 +631,12 @@ struct ArrayTypeElementValue : ElementValue { } } + virtual void ExtractClassNames() { + for (int i = 0; i < values_.size(); i++) { + values_[i]->ExtractClassNames(); + } + } + void Write(u1 *&p) { put_u1(p, tag_); put_u2be(p, values_.size()); @@ -597,6 +664,12 @@ struct Annotation { } } + void ExtractClassNames() { + for (size_t i = 0; i < element_value_pairs_.size(); i++) { + element_value_pairs_[i]->element_value_->ExtractClassNames(); + } + } + void Write(u1 *&p) { put_u2be(p, type_->slot()); put_u2be(p, element_value_pairs_.size()); @@ -662,6 +735,10 @@ struct TypeAnnotation { delete annotation_; } + void ExtractClassNames() { + annotation_->ExtractClassNames(); + } + void Write(u1 *&p) { put_u1(p, target_type_); target_info_->Write(p); @@ -873,6 +950,10 @@ struct AnnotationDefaultAttribute : Attribute { default_value_->Write(p); } + virtual void ExtractClassNames() { + default_value_->ExtractClassNames(); + } + ElementValue *default_value_; }; @@ -913,6 +994,11 @@ struct SignatureAttribute : Attribute { put_u2be(p, signature_->slot()); } + virtual void ExtractClassNames() { + size_t signature_idx = 0; + devtools_ijar::ExtractClassNames(signature_->Display(), &signature_idx); + } + Constant *signature_; }; @@ -954,6 +1040,12 @@ struct AnnotationsAttribute : Attribute { return attr; } + virtual void ExtractClassNames() { + for (int i = 0; i < annotations_.size(); i++) { + annotations_[i]->ExtractClassNames(); + } + } + void Write(u1 *&p) { WriteProlog(p, -1); u1 *payload_start = p - 4; @@ -990,6 +1082,15 @@ struct ParameterAnnotationsAttribute : Attribute { return attr; } + virtual void ExtractClassNames() { + for (size_t i = 0; i < parameter_annotations_.size(); i++) { + const std::vector<Annotation*>& annotations = parameter_annotations_[i]; + for (size_t j = 0; j < annotations.size(); j++) { + annotations[j]->ExtractClassNames(); + } + } + } + void Write(u1 *&p) { WriteProlog(p, -1); u1 *payload_start = p - 4; @@ -1022,6 +1123,12 @@ struct TypeAnnotationsAttribute : Attribute { return attr; } + virtual void ExtractClassNames() { + for (int i = 0; i < type_annotations_.size(); i++) { + type_annotations_[i]->ExtractClassNames(); + } + } + void Write(u1 *&p) { WriteProlog(p, -1); u1 *payload_start = p - 4; @@ -1072,6 +1179,12 @@ struct HasAttrs { delete attributes[i]; } } + + void ExtractClassNames() { + for (int i = 0; i < attributes.size(); i++) { + attributes[i]->ExtractClassNames(); + } + } }; // A field or method. @@ -1163,6 +1276,23 @@ struct ClassFile : HasAttrs { for (size_t ii = 0; ii < methods.size(); ++ii) { methods[ii]->Write(p); } + + Attribute* inner_classes = NULL; + + // Make the inner classes attribute the last, so that it can know which + // constants were needed + for (size_t ii = 0; ii < attributes.size(); ii++) { + if (attributes[ii]->attribute_name_->Display() == "InnerClasses") { + inner_classes = attributes[ii]; + attributes.erase(attributes.begin() + ii); + break; + } + } + + if (inner_classes != NULL) { + attributes.push_back(inner_classes); + } + WriteAttrs(p); } @@ -1229,10 +1359,19 @@ void HasAttrs::ReadAttrs(const u1 *&p) { } void HasAttrs::WriteAttrs(u1 *&p) { - put_u2be(p, attributes.size()); + u1* p_size = p; + + put_u2be(p, 0); + int n_written_attrs = 0; for (size_t ii = 0; ii < attributes.size(); ii++) { + u1* before = p; attributes[ii]->Write(p); + if (p != before) { + n_written_attrs++; + } } + + put_u2be(p_size, n_written_attrs); } // See sec.4.4 of JVM spec. @@ -1406,6 +1545,8 @@ static ClassFile *ReadClass(const void *classdata, size_t length) { clazz->access_flags = get_u2be(p); clazz->this_class = constant(get_u2be(p)); + class_name = clazz->this_class; + u2 super_class_id = get_u2be(p); clazz->super_class = super_class_id == 0 ? NULL : constant(super_class_id); @@ -1441,7 +1582,170 @@ static ClassFile *ReadClass(const void *classdata, size_t length) { return clazz; } +// In theory, '/' is also reserved, but it's okay if we just parse package +// identifiers as part of the class name. Note that signatures are UTF-8, but +// this works just as well as in plain ASCII. +static const char *SIGNATURE_NON_IDENTIFIER_CHARS = ".;[<>:"; + +void Expect(const std::string& desc, size_t* p, char expected) { + if (desc[*p] != expected) { + fprintf(stderr, "Expected '%c' in '%s' at %zd in signature\n", + expected, desc.substr(*p).c_str(), *p); + exit(1); + } + + *p += 1; +} + +// These functions form a crude recursive descent parser for descriptors and +// signatures in class files (see JVM spec 4.3). +// +// This parser is a bit more liberal than the spec, but this should be fine, +// because it accepts all valid class files and croaks only on invalid ones. +void ParseFromClassTypeSignature(const std::string& desc, size_t* p); +void ParseSimpleClassTypeSignature(const std::string& desc, size_t* p); +void ParseClassTypeSignatureSuffix(const std::string& desc, size_t* p); +void ParseIdentifier(const std::string& desc, size_t* p); +void ParseTypeArgumentsOpt(const std::string& desc, size_t* p); +void ParseMethodDescriptor(const std::string& desc, size_t* p); + +void ParseClassTypeSignature(const std::string& desc, size_t* p) { + Expect(desc, p, 'L'); + ParseSimpleClassTypeSignature(desc, p); + ParseClassTypeSignatureSuffix(desc, p); + Expect(desc, p, ';'); +} + +void ParseSimpleClassTypeSignature(const std::string& desc, size_t* p) { + ParseIdentifier(desc, p); + ParseTypeArgumentsOpt(desc, p); +} + +void ParseClassTypeSignatureSuffix(const std::string& desc, size_t* p) { + while (desc[*p] == '.') { + *p += 1; + ParseSimpleClassTypeSignature(desc, p); + } +} + +void ParseIdentifier(const std::string& desc, size_t* p) { + size_t next = desc.find_first_of(SIGNATURE_NON_IDENTIFIER_CHARS, *p); + std::string id = desc.substr(*p, next - *p); + used_class_names.insert(id); + *p = next; +} + +void ParseTypeArgumentsOpt(const std::string& desc, size_t* p) { + if (desc[*p] != '<') { + return; + } + + *p += 1; + while (desc[*p] != '>') { + switch (desc[*p]) { + case '*': + *p += 1; + break; + + case '+': + case '-': + *p += 1; + ExtractClassNames(desc, p); + break; + + default: + ExtractClassNames(desc, p); + break; + } + } + + *p += 1; +} + +void ParseMethodDescriptor(const std::string& desc, size_t* p) { + Expect(desc, p, '('); + while (desc[*p] != ')') { + ExtractClassNames(desc, p); + } + + Expect(desc, p, ')'); + ExtractClassNames(desc, p); +} + +void ParseFormalTypeParameters(const std::string& desc, size_t* p) { + Expect(desc, p, '<'); + while (desc[*p] != '>') { + ParseIdentifier(desc, p); + Expect(desc, p, ':'); + if (desc[*p] != ':' && desc[*p] != '>') { + ExtractClassNames(desc, p); + } + + while (desc[*p] == ':') { + Expect(desc, p, ':'); + ExtractClassNames(desc, p); + } + } + + Expect(desc, p, '>'); +} + +void ExtractClassNames(const std::string& desc, size_t* p) { + switch (desc[*p]) { + case '<': + ParseFormalTypeParameters(desc, p); + ExtractClassNames(desc, p); + break; + + case 'L': + ParseClassTypeSignature(desc, p); + break; + + case '[': + *p += 1; + ExtractClassNames(desc, p); + break; + + case 'T': + *p += 1; + ParseIdentifier(desc, p); + Expect(desc, p, ';'); + break; + + case '(': + ParseMethodDescriptor(desc, p); + break; + + case 'B': + case 'C': + case 'D': + case 'F': + case 'I': + case 'J': + case 'S': + case 'Z': + case 'V': + *p += 1; + break; + + default: + fprintf(stderr, "Invalid signature %s\n", desc.substr(*p).c_str()); + } +} + void ClassFile::WriteClass(u1 *&p) { + used_class_names.clear(); + std::vector<Member *> members; + members.insert(members.end(), fields.begin(), fields.end()); + members.insert(members.end(), methods.begin(), methods.end()); + ExtractClassNames(); + for (int i = 0; i < members.size(); i++) { + Member *member = members[i]; + size_t idx = 0; + devtools_ijar::ExtractClassNames(member->descriptor->Display(), &idx); + member->ExtractClassNames(); + } + // We have to write the body out before the header in order to reference // the essential constants and populate the output constant pool: u1 *body = new u1[length]; @@ -1466,11 +1770,6 @@ void StripClass(u1 *&classdata_out, const u1 *classdata_in, size_t in_length) { // beginning of the output phase; calls to Constant::slot() will // fail if called prior to this. const_pool_out.push_back(NULL); - - // TODO(bazel-team): We should only keep classes in the InnerClass attributes - // if they're used in the output. The entries can then be cleaned out of the - // constant pool in the normal way. - clazz->WriteClass(classdata_out); delete clazz; diff --git a/third_party/ijar/test/ijar_test.sh b/third_party/ijar/test/ijar_test.sh index f091144927..52ff68cc8e 100755 --- a/third_party/ijar/test/ijar_test.sh +++ b/third_party/ijar/test/ijar_test.sh @@ -43,6 +43,11 @@ source ${DIR}/testenv.sh || { echo "testenv.sh not found!" >&2; exit 1; } ## Tools # Ensure that javac is absolute [[ "$JAVAC" =~ ^/ ]] || JAVAC="$PWD/$JAVAC" +[[ "$JAR" =~ ^/ ]] || JAR="$PWD/$JAR" +[[ "$IJAR" =~ ^/ ]] || IJAR="$PWD/$IJAR" +[[ "$UNZIP" =~ ^/ ]] || UNZIP="$PWD/$UNZIP" +[[ "$ZIP" =~ ^/ ]] || ZIP="$PWD/$ZIP" +[[ "$JAVAP" =~ ^/ ]] || JAVAP="$PWD/$JAVAP" IJAR_SRCDIR=$(dirname ${IJAR}) A_JAR=$TEST_TMPDIR/A.jar @@ -165,6 +170,7 @@ function test_ijar_output() { # Check that no private class members are found: lines=$($JAVAP -private -classpath $A_JAR A | grep priv | wc -l) + cp $A_JAR /tmp/ajar.jar check_eq 2 $lines "Input jar should have 2 private members!" lines=$($JAVAP -private -classpath $A_INTERFACE_JAR A | grep priv | wc -l) check_eq 0 $lines "Interface jar should have no private members!" @@ -374,4 +380,119 @@ function test_corrupted_end_of_centraldir() { expect_log "missing end of central directory record" } +function test_inner_class_argument() { + cd $TEST_TMPDIR + + mkdir -p a b c + cat > a/A.java <<EOF +package a; + +public class A { + public static class A2 { + public int n; + } +} +EOF + + cat > b/B.java <<EOF +package b; +import a.A; + +public class B { + public static void b(A.A2 arg) { + System.out.println(arg.n); + } +} +EOF + + cat > c/C.java <<EOF +package c; +import b.B; + +public class C { + public static void c() { + B.b(null); + } +} +EOF + + $JAVAC a/A.java b/B.java + $JAR cf lib.jar {a,b}/*.class + $JAVAC -cp lib.jar c/C.java + +} + +function test_inner_class_pruning() { + cd $TEST_TMPDIR + + mkdir -p lib/l {one,two,three}/a + + cat > lib/l/L.java <<EOF +package l; + +public class L { + public static class I { + public static class J { + public static int number() { + return 3; + } + } + public static int number() { + return 2; + } + } +} +EOF + + cat > one/a/A.java <<EOF +package a; + +public class A { + public static void message() { + System.out.println("hello " + 1); + } +} +EOF + + cat > two/a/A.java <<EOF +package a; + +import l.L; + +public class A { + public static void message() { + System.out.println("hello " + L.I.number()); + } +} +EOF + + cat > three/a/A.java <<EOF +package a; + +import l.L; + +public class A { + public static void message() { + System.out.println("hello " + L.I.J.number()); + } +} +EOF + + $JAVAC lib/l/L.java + (cd lib; $JAR cf lib.jar l/*.class) + $JAVAC one/a/A.java + (cd one; $JAR cf one.jar a/*.class) + $JAVAC two/a/A.java -classpath lib/lib.jar + (cd two; $JAR cf two.jar a/*.class) + $JAVAC three/a/A.java -classpath lib/lib.jar + (cd three; $JAR cf three.jar a/*.class) + + $IJAR one/one.jar one/one-ijar.jar + $IJAR one/one.jar two/two-ijar.jar + $IJAR one/one.jar three/three-ijar.jar + + cmp one/one-ijar.jar two/two-ijar.jar + cmp one/one-ijar.jar three/three-ijar.jar +} + run_suite "ijar tests" |