aboutsummaryrefslogtreecommitdiffhomepage
path: root/tools/objc/j2objc_dead_code_pruner.py
blob: 9f787cbf0a0c1d2f636e955c7e48f975f5063a2f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
#!/usr/bin/python2.7

# Copyright 2015 The Bazel Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http:#www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""A script for J2ObjC dead code removal in Blaze.

This script removes unused J2ObjC-translated classes from compilation and
linking by:
  1. Build a class dependency tree among translated source files.
  2. Use user-provided Java class entry points to get a list of reachable
     classes.
  3. Go through all translated source files and rewrite unreachable ones with
     dummy content.
"""

import argparse
from collections import OrderedDict
import multiprocessing
import os
import Queue
import re
import shutil
import subprocess
import threading

PRUNED_SRC_CONTENT = 'static int DUMMY_unused __attribute__((unused,used)) = 0;'


def BuildReachabilityTree(dependency_mapping_files, file_open=open):
  """Builds a reachability tree using entries from dependency mapping files.

  Args:
    dependency_mapping_files: A comma separated list of J2ObjC-generated
        dependency mapping files.
    file_open: Reference to the builtin open function so it may be
        overridden for testing.
  Returns:
    A dict mapping J2ObjC-generated source files to the corresponding direct
    dependent source files.
  """
  tree = dict()
  for dependency_mapping_file in dependency_mapping_files.split(','):
    with file_open(dependency_mapping_file, 'r') as f:
      for line in f:
        entry = line.strip().split(':')[0]
        dep = line.strip().split(':')[1]
        if entry in tree:
          tree[entry].append(dep)
        else:
          tree[entry] = [dep]
  return tree


def BuildHeaderMapping(header_mapping_files, file_open=open):
  """Builds a mapping between Java classes and J2ObjC-translated header files.

  Args:
    header_mapping_files: A comma separated list of J2ObjC-generated
        header mapping files.
    file_open: Reference to the builtin open function so it may be
        overridden for testing.
  Returns:
    An ordered dict mapping Java class names to corresponding J2ObjC-translated
    source files.
  """
  header_mapping = OrderedDict()
  for header_mapping_file in header_mapping_files.split(','):
    with file_open(header_mapping_file, 'r') as f:
      for line in f:
        java_class_name = line.strip().split('=')[0]
        transpiled_file_name = os.path.splitext(line.strip().split('=')[1])[0]
        header_mapping[java_class_name] = transpiled_file_name
  return header_mapping


def BuildReachableFileSet(entry_classes, reachability_tree, header_mapping,
                          archive_source_file_mapping=None):
  """Builds a set of reachable translated files from entry Java classes.

  Args:
    entry_classes: A comma separated list of Java entry classes.
    reachability_tree: A dict mapping translated files to their direct
        dependencies.
    header_mapping: A dict mapping Java class names to translated source files.
    archive_source_file_mapping: A dict mapping source files to the associated
        archive file that contains them.
  Returns:
    A set of reachable translated files from the given list of entry classes.
  Raises:
    Exception: If there is an entry class that is not being transpiled in this
        j2objc_library.
  """
  transpiled_entry_files = []
  for entry_class in entry_classes.split(','):
    if entry_class not in header_mapping:
      raise Exception(entry_class +
                      'is not in the transitive Java deps of included ' +
                      'j2objc_library rules.')
    transpiled_entry_files.append(header_mapping[entry_class])

  # Translated files going into the same static library archive with duplicated
  # base names also need to be added to the set of entry files.
  #
  # This edge case is ignored because we currently cannot correctly perform
  # dead code removal in this case. The object file entries in static library
  # archives are named by the base names of the original source files. If two
  # source files (e.g., foo/bar.m, bar/bar.m) go into the same archive and
  # share the same base name (bar.m), their object file entries inside the
  # archive will have the same name (bar.o). We cannot correctly handle this
  # case because current archive tools (ar, ranlib, etc.) do not handle this
  # case very well.
  if archive_source_file_mapping:
    transpiled_entry_files.extend(_DuplicatedFiles(archive_source_file_mapping))

  # Translated files from package-info.java are also added to the entry files
  # because they are needed to resolve ObjC class names with prefixes and these
  # files may also have dependencies.
  for transpiled_file in reachability_tree:
    if transpiled_file.endswith('package-info'):
      transpiled_entry_files.append(transpiled_file)

  reachable_files = set()
  for transpiled_entry_file in transpiled_entry_files:
    reachable_files.add(transpiled_entry_file)
    current_level_deps = []
    # We need to check if the transpiled file is in the reachability tree
    # because J2ObjC protos are not analyzed for dead code stripping and
    # therefore are not in the reachability tree at all.
    if transpiled_entry_file in reachability_tree:
      current_level_deps = reachability_tree[transpiled_entry_file]
    while current_level_deps:
      next_level_deps = []
      for dep in current_level_deps:
        if dep not in reachable_files:
          reachable_files.add(dep)
          if dep in reachability_tree:
            next_level_deps.extend(reachability_tree[dep])
      current_level_deps = next_level_deps
  return reachable_files


def PruneFiles(input_files, output_files, objc_file_path, reachable_files,
               file_open=open, file_shutil=shutil):
  """Copies over translated files and remove the contents of unreachable files.

  Args:
    input_files: A comma separated list of input source files to prune. It has
        a one-on-one pair mapping with the output_file list.
    output_files: A comma separated list of output source files to write pruned
        source files to. It has a one-on-one pair mapping with the input_file
        list.
    objc_file_path: The file path which represents a directory where the
        generated ObjC files reside.
    reachable_files: A set of reachable source files.
    file_open: Reference to the builtin open function so it may be
        overridden for testing.
    file_shutil: Reference to the builtin shutil module so it may be
        overridden for testing.
  Returns:
    None.
  """
  file_queue = Queue.Queue()
  for input_file, output_file in zip(input_files.split(','),
                                     output_files.split(',')):
    file_queue.put((input_file, output_file))

  for _ in xrange(multiprocessing.cpu_count()):
    t = threading.Thread(target=_PruneFile, args=(file_queue,
                                                  reachable_files,
                                                  objc_file_path,
                                                  file_open,
                                                  file_shutil))
    t.start()

  file_queue.join()


def _PruneFile(file_queue, reachable_files, objc_file_path, file_open=open,
               file_shutil=shutil):
  while True:
    try:
      input_file, output_file = file_queue.get_nowait()
    except Queue.Empty:
      return
    file_name = os.path.relpath(os.path.splitext(input_file)[0],
                                objc_file_path)
    if file_name in reachable_files:
      file_shutil.copy(input_file, output_file)
    else:
      f = file_open(output_file, 'w')
      # Use a static variable scoped to the source file to supress
      # the "has no symbols" linker warning for empty object files.
      f.write(PRUNED_SRC_CONTENT)
      f.close()
    file_queue.task_done()


def _DuplicatedFiles(archive_source_file_mapping):
  """Returns a list of file with duplicated base names in each archive file.

  Args:
    archive_source_file_mapping: A dict mapping source files to the associated
        archive file that contains them.
  Returns:
    A list containg files with duplicated base names.
  """
  duplicated_files = []
  dict_with_duplicates = dict()

  for archive, source_files in archive_source_file_mapping.iteritems():
    for source_file in source_files:
      file_basename = os.path.basename(source_file)
      file_without_ext = os.path.splitext(source_file)[0]
      if file_basename in dict_with_duplicates:
        dict_with_duplicates[file_basename].append(file_without_ext)
      else:
        dict_with_duplicates[file_basename] = [file_without_ext]
    for basename in dict_with_duplicates:
      if len(dict_with_duplicates[basename]) > 1:
        duplicated_files.extend(dict_with_duplicates[basename])
    dict_with_duplicates = dict()

  return duplicated_files


def BuildArchiveSourceFileMapping(archive_source_mapping_files, file_open):
  """Builds a mapping between archive files and their associated source files.

  Args:
    archive_source_mapping_files: A comma separated list of J2ObjC-generated
        mapping between archive files and their associated source files.
    file_open: Reference to the builtin open function so it may be
        overridden for testing.
  Returns:
    A dict mapping between archive files and their associated source files.
  """
  tree = dict()
  for archive_source_mapping_file in archive_source_mapping_files.split(','):
    with file_open(archive_source_mapping_file, 'r') as f:
      for line in f:
        entry = line.strip().split(':')[0]
        dep = line.strip().split(':')[1]
        if entry in tree:
          tree[entry].append(dep)
        else:
          tree[entry] = [dep]
  return tree


def PruneSourceFiles(input_files, output_files, dependency_mapping_files,
                     header_mapping_files, entry_classes, objc_file_path,
                     file_open=open, file_shutil=shutil):
  """Copies over translated files and remove the contents of unreachable files.

  Args:
    input_files: A comma separated list of input source files to prune. It has
        a one-on-one pair mapping with the output_file list.
    output_files: A comma separated list of output source files to write pruned
        source files to. It has a one-on-one pair mapping with the input_file
        list.
    dependency_mapping_files: A comma separated list of J2ObjC-generated
        dependency mapping files.
    header_mapping_files: A comma separated list of J2ObjC-generated
        header mapping files.
    entry_classes: A comma separated list of Java entry classes.
    objc_file_path: The file path which represents a directory where the
        generated ObjC files reside.
    file_open: Reference to the builtin open function so it may be
        overridden for testing.
    file_shutil: Reference to the builtin shutil module so it may be
        overridden for testing.
  """
  reachability_file_mapping = BuildReachabilityTree(
      dependency_mapping_files, file_open)
  header_map = BuildHeaderMapping(header_mapping_files, file_open)
  reachable_files_set = BuildReachableFileSet(entry_classes,
                                              reachability_file_mapping,
                                              header_map)
  PruneFiles(input_files,
             output_files,
             objc_file_path,
             reachable_files_set,
             file_open,
             file_shutil)


def MatchObjectNamesInArchive(xcrunwrapper, archive, object_names):
  """Returns object names matching their identity in an archive file.

  The linker that blaze uses appends an md5 hash to object file
  names prior to inclusion in the archive file. Thus, object names
  such as 'foo.o' need to be matched to their appropriate name in
  the archive file, such as 'foo_<hash>.o'.

  Args:
    xcrunwrapper: A wrapper script over xcrun.
    archive: The location of the archive file.
    object_names: The expected basenames of object files to match,
        sans extension. For example 'foo' (not 'foo.o').
  Returns:
    A list of basenames of matching members of the given archive
  """
  ar_contents_cmd = '%s ar -t %s' % (xcrunwrapper, archive)
  real_object_names = subprocess.check_output(ar_contents_cmd, shell=True)
  expected_object_name_regex = '^(?:%s)_[0-9a-f]{32}.o' % (
      '|'.join([re.escape(name) for name in object_names]))
  return re.findall(expected_object_name_regex, real_object_names,
                    flags=re.MULTILINE)


def PruneArchiveFile(input_archive, output_archive, dummy_archive,
                     dependency_mapping_files, header_mapping_files,
                     archive_source_mapping_files, entry_classes, xcrunwrapper,
                     file_open=open):
  """Remove unreachable objects from archive file.

  Args:
    input_archive: The source archive file to prune.
    output_archive: The location of the pruned archive file.
    dummy_archive: A dummy archive file that contains no object.
    dependency_mapping_files: A comma separated list of J2ObjC-generated
        dependency mapping files.
    header_mapping_files: A comma separated list of J2ObjC-generated
        header mapping files.
    archive_source_mapping_files: A comma separated list of J2ObjC-generated
        mapping between archive files and their associated source files.
    entry_classes: A comma separated list of Java entry classes.
    xcrunwrapper: A wrapper script over xcrun.
    file_open: Reference to the builtin open function so it may be
        overridden for testing.
  """
  reachability_file_mapping = BuildReachabilityTree(
      dependency_mapping_files, file_open)
  header_map = BuildHeaderMapping(header_mapping_files, file_open)
  archive_source_file_mapping = BuildArchiveSourceFileMapping(
      archive_source_mapping_files, file_open)
  reachable_files_set = BuildReachableFileSet(entry_classes,
                                              reachability_file_mapping,
                                              header_map,
                                              archive_source_file_mapping)

  j2objc_cmd = ''
  if input_archive in archive_source_file_mapping:
    source_files = archive_source_file_mapping[input_archive]
    unreachable_object_names = []

    for source_file in source_files:
      if os.path.splitext(source_file)[0] not in reachable_files_set:
        unreachable_object_names.append(
            os.path.basename(os.path.splitext(source_file)[0]))

    # There are unreachable objects in the archive to prune
    if unreachable_object_names:
      # If all objects in the archive are unreachable, just copy over a dummy
      # archive that contains no object
      if len(unreachable_object_names) == len(source_files):
        j2objc_cmd = 'cp %s %s' % (dummy_archive, output_archive)
      # Else we need to prune the archive of unreachable objects
      else:
        # Copy the input archive to the output location
        j2objc_cmd += 'cp %s %s;' % (input_archive, output_archive)
        # Make the output archive editable
        j2objc_cmd += 'chmod +w %s;' % (output_archive)
        # Remove the unreachable objects from the archive
        unreachable_object_names = MatchObjectNamesInArchive(
            xcrunwrapper, input_archive, unreachable_object_names)
        j2objc_cmd += '%s ar -d -s %s %s;' % (
            xcrunwrapper, output_archive, ' '.join(unreachable_object_names))
        # Update the table of content of the archive file
        j2objc_cmd += '%s ranlib -a %s' % (xcrunwrapper, output_archive)
    # There are no unreachable objects, we just copy over the original archive
    else:
      j2objc_cmd = 'cp %s %s' % (input_archive, output_archive)
  # The archive cannot be pruned by J2ObjC dead code removal, just copy over
  # the original archive
  else:
    j2objc_cmd = 'cp %s %s' % (input_archive, output_archive)

  subprocess.check_output(j2objc_cmd, stderr=subprocess.STDOUT, shell=True)


if __name__ == '__main__':
  parser = argparse.ArgumentParser(fromfile_prefix_chars='@')

  # TODO(rduan): Remove these three flags once J2ObjC compile actions are fully
  # moved to the edges.
  parser.add_argument(
      '--input_files',
      help=('The comma-separated file paths of translated source files to '
            'prune.'))
  parser.add_argument(
      '--output_files',
      help='The comma-separated file paths of pruned source files to write to.')
  parser.add_argument(
      '--objc_file_path',
      help='The file path which represents a directory where the generated ObjC'
      ' files reside')

  parser.add_argument(
      '--input_archive',
      help=('The path of the translated archive to prune.'))
  parser.add_argument(
      '--output_archive',
      help='The path of the pruned archive file to write to.')
  parser.add_argument(
      '--dummy_archive',
      help='The dummy archive file that contains no symbol.')
  parser.add_argument(
      '--dependency_mapping_files',
      help='The comma-separated file paths of dependency mapping files.')
  parser.add_argument(
      '--header_mapping_files',
      help='The comma-separated file paths of header mapping files.')
  parser.add_argument(
      '--archive_source_mapping_files',
      help='The comma-separated file paths of archive to source mapping files.'
           'These mapping files should contain mappings between the '
           'translated source files and the archive file compiled from those '
           'source files.')
  parser.add_argument(
      '--entry_classes',
      help=('The comma-separated list of Java entry classes to be used as entry'
            ' point of the dead code anlysis.'))
  parser.add_argument(
      '--xcrunwrapper',
      help=('The xcrun wrapper script.'))

  args = parser.parse_args()

  if not args.entry_classes:
    raise Exception('J2objC dead code removal is on but no entry class is ',
                    'specified in any j2objc_library targets in the transitive',
                    ' closure')
  if args.input_archive and args.output_archive:
    PruneArchiveFile(
        args.input_archive,
        args.output_archive,
        args.dummy_archive,
        args.dependency_mapping_files,
        args.header_mapping_files,
        args.archive_source_mapping_files,
        args.entry_classes,
        args.xcrunwrapper)
  else:
    # TODO(rduan): Remove once J2ObjC compile actions are fully moved to the
    # edges.
    PruneSourceFiles(
        args.input_files,
        args.output_files,
        args.dependency_mapping_files,
        args.header_mapping_files,
        args.entry_classes,
        args.objc_file_path)