aboutsummaryrefslogtreecommitdiffhomepage
path: root/csharp/src/ProtoMunge/Program.cs
blob: 53dc3501dcceabc31496ada4f567d72ce35efb91 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
#region Copyright notice and license

// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc.  All rights reserved.
// http://github.com/jskeet/dotnet-protobufs/
// Original C++/Java/Python code:
// http://code.google.com/p/protobuf/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#endregion

using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using Google.ProtocolBuffers.Descriptors;

namespace Google.ProtocolBuffers.ProtoMunge
{
    /// <summary>
    /// Utility console application which takes a message descriptor and a corresponding message,
    /// and produces a new message with similar but random data. The data is the same length
    /// as the original, but with random values within appropriate bands. (For instance, a compressed
    /// integer in the range 0-127 will end up as another integer in the same range, to keep the length
    /// the same.)
    /// TODO(jonskeet): Potentially refactor to use an instance instead, making it simpler to
    /// be thread-safe for external use.
    /// </summary>
    public sealed class Program
    {
        private static readonly Random rng = new Random();

        private static int Main(string[] args)
        {
            if (args.Length != 3)
            {
                Console.Error.WriteLine("Usage: ProtoMunge <descriptor type name> <input data> <output file>");
                Console.Error.WriteLine(
                    "The descriptor type name is the fully-qualified message name, including assembly.");
                Console.Error.WriteLine(
                    "(At a future date it may be possible to do this without building the .NET assembly at all.)");
                return 1;
            }
            IMessage defaultMessage;
            try
            {
                defaultMessage = MessageUtil.GetDefaultMessage(args[0]);
            }
            catch (ArgumentException e)
            {
                Console.Error.WriteLine(e.Message);
                return 1;
            }
            try
            {
                IBuilder builder = defaultMessage.WeakCreateBuilderForType();
                byte[] inputData = File.ReadAllBytes(args[1]);
                builder.WeakMergeFrom(ByteString.CopyFrom(inputData));
                IMessage original = builder.WeakBuild();
                IMessage munged = Munge(original);
                if (original.SerializedSize != munged.SerializedSize)
                {
                    throw new Exception("Serialized sizes don't match");
                }
                File.WriteAllBytes(args[2], munged.ToByteArray());
                return 0;
            }
            catch (Exception e)
            {
                Console.Error.WriteLine("Error: {0}", e.Message);
                Console.Error.WriteLine();
                Console.Error.WriteLine("Detailed exception information: {0}", e);
                return 1;
            }
        }

        /// <summary>
        /// Munges a message recursively.
        /// </summary>
        /// <returns>A new message of the same type as the original message,
        /// but munged so that all the data is desensitised.</returns>
        private static IMessage Munge(IMessage message)
        {
            IBuilder builder = message.WeakCreateBuilderForType();
            foreach (var pair in message.AllFields)
            {
                if (pair.Key.IsRepeated)
                {
                    foreach (object singleValue in (IEnumerable) pair.Value)
                    {
                        builder.WeakAddRepeatedField(pair.Key, CheckedMungeValue(pair.Key, singleValue));
                    }
                }
                else
                {
                    builder[pair.Key] = CheckedMungeValue(pair.Key, pair.Value);
                }
            }
            IMessage munged = builder.WeakBuild();
            if (message.SerializedSize != munged.SerializedSize)
            {
                Console.WriteLine("Sub message sizes: {0}/{1}", message.SerializedSize, munged.SerializedSize);
            }
            return munged;
        }

        /// <summary>
        /// Munges a single value and checks that the length ends up the same as it was before.
        /// </summary>
        private static object CheckedMungeValue(FieldDescriptor fieldDescriptor, object value)
        {
            int currentSize = CodedOutputStream.ComputeFieldSize(fieldDescriptor.FieldType, fieldDescriptor.FieldNumber,
                                                                 value);
            object mungedValue = MungeValue(fieldDescriptor, value);
            int mungedSize = CodedOutputStream.ComputeFieldSize(fieldDescriptor.FieldType, fieldDescriptor.FieldNumber,
                                                                mungedValue);
            // Exceptions log more easily than assertions
            if (currentSize != mungedSize)
            {
                throw new Exception("Munged value had wrong size. Field type: " + fieldDescriptor.FieldType
                                    + "; old value: " + value + "; new value: " + mungedValue);
            }
            return mungedValue;
        }

        /// <summary>
        /// Munges a single value of the specified field descriptor. (i.e. if the field is
        /// actually a repeated int, this method receives a single int value to munge, and
        /// is called multiple times).
        /// </summary>
        private static object MungeValue(FieldDescriptor fieldDescriptor, object value)
        {
            switch (fieldDescriptor.FieldType)
            {
                case FieldType.SInt64:
                case FieldType.Int64:
                    return (long) MungeVarint64((ulong) (long) value);
                case FieldType.UInt64:
                    return MungeVarint64((ulong) value);
                case FieldType.SInt32:
                    return (int) MungeVarint32((uint) (int) value);
                case FieldType.Int32:
                    return MungeInt32((int) value);
                case FieldType.UInt32:
                    return MungeVarint32((uint) value);
                case FieldType.Double:
                    return rng.NextDouble();
                case FieldType.Float:
                    return (float) rng.NextDouble();
                case FieldType.Fixed64:
                    {
                        byte[] data = new byte[8];
                        rng.NextBytes(data);
                        return BitConverter.ToUInt64(data, 0);
                    }
                case FieldType.Fixed32:
                    {
                        byte[] data = new byte[4];
                        rng.NextBytes(data);
                        return BitConverter.ToUInt32(data, 0);
                    }
                case FieldType.Bool:
                    return rng.Next(2) == 1;
                case FieldType.String:
                    return MungeString((string) value);
                case FieldType.Group:
                case FieldType.Message:
                    return Munge((IMessage) value);
                case FieldType.Bytes:
                    return MungeByteString((ByteString) value);
                case FieldType.SFixed64:
                    {
                        byte[] data = new byte[8];
                        rng.NextBytes(data);
                        return BitConverter.ToInt64(data, 0);
                    }
                case FieldType.SFixed32:
                    {
                        byte[] data = new byte[4];
                        rng.NextBytes(data);
                        return BitConverter.ToInt32(data, 0);
                    }
                case FieldType.Enum:
                    return MungeEnum(fieldDescriptor, (EnumValueDescriptor) value);
                default:
                    // TODO(jonskeet): Different exception?
                    throw new ArgumentException("Invalid field descriptor");
            }
        }

        private static object MungeString(string original)
        {
            foreach (char c in original)
            {
                if (c > 127)
                {
                    throw new ArgumentException("Can't handle non-ascii yet");
                }
            }
            char[] chars = new char[original.Length];
            // Convert to pure ASCII - no control characters.
            for (int i = 0; i < chars.Length; i++)
            {
                chars[i] = (char) rng.Next(32, 127);
            }
            return new string(chars);
        }

        /// <summary>
        /// Int32 fields are slightly strange - we need to keep the sign the same way it is:
        /// negative numbers can munge to any other negative number (it'll always take
        /// 10 bytes) but positive numbers have to stay positive, so we can't use the
        /// full range of 32 bits.
        /// </summary>
        private static int MungeInt32(int value)
        {
            if (value < 0)
            {
                return rng.Next(int.MinValue, 0);
            }
            int length = CodedOutputStream.ComputeRawVarint32Size((uint) value);
            uint min = length == 1 ? 0 : 1U << ((length - 1)*7);
            uint max = length == 5 ? int.MaxValue : (1U << (length*7)) - 1;
            return (int) NextRandomUInt64(min, max);
        }

        private static uint MungeVarint32(uint original)
        {
            int length = CodedOutputStream.ComputeRawVarint32Size(original);
            uint min = length == 1 ? 0 : 1U << ((length - 1)*7);
            uint max = length == 5 ? uint.MaxValue : (1U << (length*7)) - 1;
            return (uint) NextRandomUInt64(min, max);
        }

        private static ulong MungeVarint64(ulong original)
        {
            int length = CodedOutputStream.ComputeRawVarint64Size(original);
            ulong min = length == 1 ? 0 : 1UL << ((length - 1)*7);
            ulong max = length == 10 ? ulong.MaxValue : (1UL << (length*7)) - 1;
            return NextRandomUInt64(min, max);
        }

        /// <summary>
        /// Returns a random number in the range [min, max] (both inclusive).
        /// </summary>    
        private static ulong NextRandomUInt64(ulong min, ulong max)
        {
            if (min > max)
            {
                throw new ArgumentException("min must be <= max; min=" + min + "; max = " + max);
            }
            ulong range = max - min;
            // This isn't actually terribly good at very large ranges - but it doesn't really matter for the sake
            // of this program.
            return min + (ulong) (range*rng.NextDouble());
        }

        private static object MungeEnum(FieldDescriptor fieldDescriptor, EnumValueDescriptor original)
        {
            // Find all the values which get encoded to the same size as the current value, and pick one at random
            int originalSize = CodedOutputStream.ComputeRawVarint32Size((uint) original.Number);
            List<EnumValueDescriptor> sameSizeValues = new List<EnumValueDescriptor>();
            foreach (EnumValueDescriptor candidate in fieldDescriptor.EnumType.Values)
            {
                if (CodedOutputStream.ComputeRawVarint32Size((uint) candidate.Number) == originalSize)
                {
                    sameSizeValues.Add(candidate);
                }
            }
            return sameSizeValues[rng.Next(sameSizeValues.Count)];
        }

        private static object MungeByteString(ByteString byteString)
        {
            byte[] data = new byte[byteString.Length];
            rng.NextBytes(data);
            return ByteString.CopyFrom(data);
        }
    }
}