src/tools/android/java/com/google/devtools/build/android/ziputils/SplitZip.java


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452

// Copyright 2015 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.devtools.build.android.ziputils;

import static com.google.devtools.build.android.ziputils.DataDescriptor.EXTCRC;
import static com.google.devtools.build.android.ziputils.DataDescriptor.EXTLEN;
import static com.google.devtools.build.android.ziputils.DataDescriptor.EXTSIZ;
import static com.google.devtools.build.android.ziputils.DirectoryEntry.CENCRC;
import static com.google.devtools.build.android.ziputils.DirectoryEntry.CENLEN;
import static com.google.devtools.build.android.ziputils.DirectoryEntry.CENSIZ;
import static com.google.devtools.build.android.ziputils.DirectoryEntry.CENTIM;
import static com.google.devtools.build.android.ziputils.LocalFileHeader.LOCFLG;
import static com.google.devtools.build.android.ziputils.LocalFileHeader.LOCTIM;
import static java.nio.charset.StandardCharsets.UTF_8;

import com.google.common.base.Preconditions;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;

/**
 * Extracts entries from a set of input archives, and copies them to N output archive of
 * approximately equal size, while attempting to split archives on package (directory) boundaries.
 * Optionally, accept a list of entries to be added to the first output archive, splitting
 * remaining entries by package boundaries.
 */
public class SplitZip implements EntryHandler {
  private boolean verbose = false;
  private final List<ZipIn> inputs;
  private final List<ZipOut> outputs;
  private String filterFile;
  private InputStream filterInputStream;
  private String resourceFile;
  private Date date;
  private DosTime dosTime;
  // Internal state variables:
  private boolean finished = false;
  private Set<String> filter;
  private ZipOut[] zipOuts;
  private ZipOut resourceOut;
  private final Map<String, ZipOut> assignments = new HashMap<>();
  private final Map<String, CentralDirectory> centralDirectories;
  private final Set<String> classes = new TreeSet<>();

  /**
   * Creates an un-configured {@code SplitZip} instance.
   */
  public SplitZip() {
    inputs = new ArrayList<>();
    outputs = new ArrayList<>();
    centralDirectories = new HashMap<>();
  }

  /**
   * Configures a resource file. By default, resources are output in the initial shard.
   * If a resource file is specified, resources are written to this instead.
   * @param resourceFile in not {@code null}, the name of a file in which to output resources.
   * @return this object.
   */
  public SplitZip setResourceFile(String resourceFile) {
    this.resourceFile = resourceFile;
    return this;
  }

  // Package private for testing with mock file
  SplitZip setResourceFile(ZipOut resOut) {
    resourceOut = resOut;
    return this;
  }

  /**
   * Gets the name of the resource output file. If no resource output file is configured, resources
   * are output in the initial shard.
   * @return the name of the resource output file, or {@code null} if no file has been configured.
   */
  public String getResourceFile() {
    return resourceFile;
  }

  /**
   * Configures a file containing a list of files to be included in the first output archive.
   *
   * @param clFile path of class file list.
   * @return this object
   */
  public SplitZip setMainClassListFile(String clFile) {
    filterFile = clFile;
    return this;
  }

  // Package private for testing with mock file
  SplitZip setMainClassListFile(InputStream clInputStream) {
    filterInputStream = clInputStream;
    return this;
  }

  /**
   * Gets the path of the file listing the content of the initial shard.
   * @return return path of file list file, or {@code null} if not set.
   */
  public String getMainClassListFile() {
    return filterFile;
  }

  /**
   * Configures verbose mode.
   *
   * @param flag set to {@code true} to turn on verbose mode.
   * @return this object
   */
  public SplitZip setVerbose(boolean flag) {
    verbose = flag;
    return this;
  }

  /**
   * Gets the verbosity mode..
   * @return {@code true} iff verbose mode is enabled
   */
  public boolean isVerbose() {
    return verbose;
  }

  /**
   * Sets date to overwrite timestamp of copied entries. Setting the date to {@code null} means
   * using the date and time information in the input file. Set an explicit date to override.
   *
   * @param date modified date and time to set for entries in output.
   * @return this object.
   */
  public SplitZip setEntryDate(Date date) {
    this.date = date;
    this.dosTime = date == null ? null : new DosTime(date);
    return this;
  }

  /**
   * Sets date to {@link DosTime#DOS_EPOCH}.
   * @return this object.
   */
  public SplitZip useDefaultEntryDate() {
    this.date = DosTime.DOS_EPOCH;
    this.dosTime = DosTime.EPOCH;
    return this;
  }

  /**
   * Gets the entry modified date.
   */
  public Date getEntryDate() {
    return date;
  }

  /**
   * Configures multiple input file locations.
   *
   * @param inputs list of input locations.
   * @return this object
   * @throws java.io.IOException
   */
  public SplitZip addInputs(Iterable<String> inputs) throws IOException {
    for (String i : inputs) {
      addInput(i);
    }
    return this;
  }

  /**
   * Configures an input location. An input file must be a zip archive.
   *
   * @param filename path for an input location.
   * @return this object
   * @throws java.io.IOException
   */
  public SplitZip addInput(String filename) throws IOException {
    if (filename != null) {
      inputs.add(new ZipIn(new FileInputStream(filename).getChannel(), filename));
    }
    return this;
  }

  // Package private, for testing using mock file system.
  SplitZip addInput(ZipIn in) throws IOException {
    Preconditions.checkNotNull(in);
    inputs.add(in);
    return this;
  }

  /**
   * Configures multiple output file locations.
   *
   * @param outputs list of output files.
   * @return this object
   * @throws java.io.IOException
   */
  public SplitZip addOutputs(Iterable<String> outputs) throws IOException {
    for (String o : outputs) {
      addOutput(o);
    }
    return this;
  }

  /**
   * Configures an output location.
   *
   * @param output path for an output location.
   * @return this object
   * @throws java.io.IOException
   */
  public SplitZip addOutput(String output) throws IOException {
    Preconditions.checkNotNull(output);
    outputs.add(new ZipOut(new FileOutputStream(output, false).getChannel(), output));
    return this;
  }

  // Package private for testing with mock file
  SplitZip addOutput(ZipOut output) throws IOException {
    Preconditions.checkNotNull(output);
    outputs.add(output);
    return this;
  }

  /**
   * Executes this {@code SplitZip}, reading content from the configured input locations, creating
   * the specified number of archives, in the configured output directory.
   *
   * @return this object
   * @throws java.io.IOException
   */
  public SplitZip run() throws IOException {
    verbose("SplitZip: Splitting in: " + outputs.size());
    verbose("SplitZip: with filter: " + filterFile);
    checkConfig();
    // Prepare output files
    zipOuts = outputs.toArray(new ZipOut[outputs.size()]);
    if (resourceFile != null) {
      resourceOut = new ZipOut(new FileOutputStream(resourceFile, false).getChannel(),
          resourceFile);
    } else if (resourceOut == null) { // may have been set for testing
      resourceOut = zipOuts[0];
    }

    // Read directories of input files
    for (ZipIn zip : inputs) {
      zip.endOfCentralDirectory();
      centralDirectories.put(zip.getFilename(), zip.centralDirectory());
      zip.centralDirectory();
    }
    // Assign input entries to output files
    split();
    // Copy entries to the assigned output files
    for (ZipIn zip : inputs) {
      zip.scanEntries(this);
    }
    return this;
  }
  
  /**
   * Copies an entry to the assigned output files. Called for each entry in the input files. 
   * @param in
   * @param header
   * @param dirEntry
   * @param data
   * @throws IOException 
   */
  @Override
  public void handle(ZipIn in, LocalFileHeader header, DirectoryEntry dirEntry,
      ByteBuffer data) throws IOException {
    String localFilename = header.getFilename();
    ZipOut out = assignments.remove(localFilename);
    if (out == null) {
      // Skip unassigned file;
      return;
    }
    if (dirEntry == null) {
      // Shouldn't get here, as there should be no assignment.
      System.out.println("Warning: no directory entry");
      return;
    }
    // Clone directory entry
    DirectoryEntry entryOut = out.nextEntry(dirEntry);
    if (dosTime != null) {
      // Overwrite time stamp
      header.set(LOCTIM, dosTime.time);
      entryOut.set(CENTIM, dosTime.time);
    }
    out.write(header);
    out.write(data);
    if ((header.get(LOCFLG) & LocalFileHeader.SIZE_MASKED_FLAG) != 0) {
      // Instead of this, we could fix the header with the size information
      // from the directory entry. For now, keep the entry encoded as-is.
      DataDescriptor desc = DataDescriptor.allocate()
          .set(EXTCRC, dirEntry.get(CENCRC))
          .set(EXTSIZ, dirEntry.get(CENSIZ))
          .set(EXTLEN, dirEntry.get(CENLEN));
      out.write(desc);
    }
  }

  /**
   * Writes any remaining output data to the output stream.
   *
   * @throws IOException if the output stream or the filter throws an IOException
   * @throws IllegalStateException if this method was already called earlier
   */
  public void finish() throws IOException {
    checkNotFinished();
    finished = true;
    if (resourceOut != null) {
      resourceOut.finish();
    }
    for (ZipOut zo : zipOuts) {
      zo.finish();
    }
  }

  /**
   * Writes any remaining output data to the output stream and closes it.
   *
   * @throws IOException if the output stream or the filter throws an IOException
   */
  public void close() throws IOException {
    if (!finished) {
      finish();
    }
    if (resourceOut != null) {
      resourceOut.close();
    }
    for (ZipOut zo : zipOuts) {
      zo.close();
    }
  }

  private void checkNotFinished() {
    if (finished) {
      throw new IllegalStateException();
    }
  }

  /**
   * Validates configuration before execution.
   */
  private void checkConfig() throws IOException {
    if (outputs.size() < 1) {
      throw new IllegalStateException("Require at least one output file");
    }
    filter = filterFile == null && filterInputStream == null ? null : readPaths(filterFile);
  }

  /**
   * Parses the entries and assign each entry to an output file.
   */
  private void split() {
    for (ZipIn in : inputs) {
      CentralDirectory cdir = centralDirectories.get(in.getFilename());
      for (DirectoryEntry entry : cdir.list()) {
        String filename = entry.getFilename();
        if (filename.endsWith(".class")) {
          // Only pass classes to the splitter, so that it can do the best job
          // possible distributing them across output files.
          classes.add(filename);
        } else if (!filename.endsWith("/")) {
          // Non class files (resources) are either assigned to the first
          // output file, or to a specified resource output file.
          assignments.put(filename, resourceOut);
        }
      }
    }
    Splitter entryFilter = new Splitter(outputs.size(), classes.size());
    if (filter != null) {
      // Assign files in the filter to the first output file.
      entryFilter.assign(filter);
      entryFilter.nextShard(); // minimal initial shard
    }
    for (String path : classes) {
      int assignment = entryFilter.assign(path);
      Preconditions.checkState(assignment >= 0 && assignment < zipOuts.length);
      assignments.put(path, zipOuts[assignment]);
    }
  }

  /**
   * Reads paths of classes required in first shard. For testing purposes, this relies
   * on the file system configured for the {@code Zip} library class.
   */
  private Set<String> readPaths(String fileName) throws IOException {
    Set<String> paths = new HashSet<>();
    BufferedReader reader = null;
    try {
      if (filterInputStream == null) {
        filterInputStream = new FileInputStream(fileName);
      }
      reader = new BufferedReader(new InputStreamReader(filterInputStream, UTF_8));
      String line;
      while (null != (line = reader.readLine())) {
        paths.add(fixPath(line));
      }
      return paths;
    } finally {
      if (reader != null) {
        reader.close();
      }
    }
  }

  // TODO(bazel-team): Got this from 'dx'. I'm not sure we need this part. Keep it for now,
  // to make sure we read the main dex list the exact same way that dx would.
  private String fixPath(String path) {
    if (File.separatorChar == '\\') {
      path = path.replace('\\', '/');
    }
    int index = path.lastIndexOf("/./");
    if (index != -1) {
      return path.substring(index + 3);
    }
    if (path.startsWith("./")) {
      return path.substring(2);
    }
    return path;
  }

  private void verbose(String msg) {
    if (verbose) {
      System.out.println(msg);
    }
  }
}