aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/tools/singlejar/input_jar.cc
blob: 08007d5fd4ebd5d5323e61d367f7b3fc50980a8a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
// Copyright 2016 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "src/tools/singlejar/input_jar.h"

bool InputJar::Open(const std::string &path) {
  if (!path_.empty()) {
    diag_errx(1, "%s:%d: This instance is already handling %s\n", __FILE__,
              __LINE__, path_.c_str());
  }
  if (!mapped_file_.Open(path)) {
    diag_warn("%s:%d: Cannot open input jar %s", __FILE__, __LINE__,
              path.c_str());
    mapped_file_.Close();
    return false;
  }
  if (mapped_file_.size() < sizeof(ECD)) {
    diag_warnx(
        "%s:%d: %s is only 0x%lx"
        " bytes long, should be at least 0x%lx bytes long",
        __FILE__, __LINE__, path.c_str(), mapped_file_.size(), sizeof(ECD));
    mapped_file_.Close();
    return false;
  }

  // Now locate End of Central Directory (ECD) record.
  auto ecd_min = mapped_file_.end() - 65536 - sizeof(ECD);
  if (ecd_min < mapped_file_.start()) {
    ecd_min = mapped_file_.start();
  }

  const ECD *ecd = nullptr;
  for (auto ecd_ptr = mapped_file_.end() - sizeof(ECD); ecd_ptr >= ecd_min;
       --ecd_ptr) {
    if (reinterpret_cast<const ECD *>(ecd_ptr)->is()) {
      ecd = reinterpret_cast<const ECD *>(ecd_ptr);
      break;
    }
  }
  if (ecd == nullptr) {
    diag_warnx("%s:%d: Cannot locate  ECD record in %s", __FILE__, __LINE__,
               path.c_str());
    mapped_file_.Close();
    return false;
  }

  /* Find Central Directory and preamble size. We want to handle the case
   * where a Jar/Zip file contains a preamble (an arbitrary data before the
   * first entry) and 'zip -A' was not called to adjust the offsets, so all
   * the offsets are off by the preamble size. In the 32-bit case (that is,
   * there is no ECD64Locator+ECD64), ECD immediately follows the last CDH,
   * ECD immediately follows the Central Directory, and contains its size, so
   * Central Directory can be found reliably. We then use its stated location,
   * which ECD contains, too, to calculate the preamble size.  In the 64-bit
   * case, there are ECD64 and ECD64Locator records between the end of the
   * Central Directory and the ECD, the calculation is similar, with the
   * exception of the logic to find the actual start of the ECD64.
   * ECD64Locator contains only its position in the file, which is off by
   * preamble size, but does not contain the actual size of ECD64, which in
   * theory is variable (the fixed fields may be followed by some custom data,
   * with the total size saved in ECD64::remaining_size and thus unavailable
   * until we find ECD64.  We assume that the custom data is missing.
   */

  // First, sanity checks.
  uint32_t cen_position = ecd->cen_offset32();
  if (!ziph::zfield_has_ext64(cen_position)) {
    if (!mapped_file_.mapped(mapped_file_.address(cen_position))) {
      diag_warnx("%s:%d: %s is corrupt: Central Directory location 0x%" PRIx32
                 " is invalid",
                 __FILE__, __LINE__, path.c_str(), cen_position);
      mapped_file_.Close();
      return false;
    }
    if (mapped_file_.offset(ecd) < cen_position) {
      diag_warnx("%s:%d: %s is corrupt: End of Central Directory at 0x%" PRIx64
                 " precedes Central Directory at 0x%" PRIx32,
                 __FILE__, __LINE__, path.c_str(), mapped_file_.offset(ecd),
                 cen_position);
      mapped_file_.Close();
      return false;
    }
  }
  uint32_t cen_size = ecd->cen_size32();
  if (!ziph::zfield_has_ext64(cen_size)) {
    if (cen_size > mapped_file_.offset(ecd)) {
      diag_warnx("%s:%d: %s is corrupt: Central Directory size 0x%" PRIx32
                 " is too large",
                 __FILE__, __LINE__, path.c_str(), cen_size);
      mapped_file_.Close();
      return false;
    }
  }
  if (cen_size == 0) {
    // Empty archive, let cdh_ point to End of Central Directory.
    cdh_ = reinterpret_cast<const CDH *>(ecd);
    preamble_size_ = mapped_file_.offset(cdh_) - cen_position;
  } else {
    auto ecd64loc = reinterpret_cast<const ECD64Locator *>(
        ziph::byte_ptr(ecd) - sizeof(ECD64Locator));
    if (ecd64loc->is()) {
      auto ecd64 = reinterpret_cast<const ECD64 *>(ziph::byte_ptr(ecd64loc) -
                                                   sizeof(ECD64));
      if (!ecd64->is()) {
        diag_warnx(
            "%s:%d: %s is corrupt, expected ECD64 record at offset 0x%" PRIx64
            " is missing",
            __FILE__, __LINE__, path.c_str(), mapped_file_.offset(ecd64));
        mapped_file_.Close();
        return false;
      }
      cdh_ = reinterpret_cast<const CDH *>(ziph::byte_ptr(ecd64) -
                                           ecd64->cen_size());
      preamble_size_ = mapped_file_.offset(cdh_) - ecd64->cen_offset();
      // Find CEN and preamble size.
    } else {
      if (ziph::zfield_has_ext64(cen_size) ||
          ziph::zfield_has_ext64(cen_position)) {
        diag_warnx(
            "%s:%d: %s is corrupt, expected ECD64 locator record at "
            "offset 0x%" PRIx64 " is missing",
            __FILE__, __LINE__, path.c_str(), mapped_file_.offset(ecd64loc));
        return false;
      }
      cdh_ = reinterpret_cast<const CDH *>(ziph::byte_ptr(ecd) - cen_size);
      preamble_size_ = mapped_file_.offset(cdh_) - cen_position;
    }
    if (!cdh_->is()) {
      diag_warnx(
          "%s:%d: In %s, expected central file header signature at "
          "offset0x%" PRIx64,
          __FILE__, __LINE__, path.c_str(), mapped_file_.offset(cdh_));
      mapped_file_.Close();
      return false;
    }
  }
  path_ = path;
  return true;
}

bool InputJar::Close() {
  mapped_file_.Close();
  path_.clear();
  return true;
}