testing/libfuzzer/fuzzers/url.proto - gn - Git at Google

 // Copyright 2017 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 // This file contains the definition of the Url protobuf used in the
 // url_parse_proto_fuzzer that is meant to serve as an example for future
 // Chromium fuzzers that use libprotobuf-mutator.
 // We will consider the format of a URL for this fuzzer, to be
 // [scheme:][//[user[:password]@]host[:port]][/path][?query][#value]
 // There may be some URLs Chromium treats as valid that this syntax does not
 // capture. However, we will ignore them for the sake of simplicity.
 // It is recommended to read this file in conjunction with
 // convert_protobuf_to_string() in url_parse_proto_fuzzer.cc as logic in this
 // function is sometimes used to ensure that the Url Protocol Buffer obeys the
 // syntax we have defined for URLs. Though reading it is completely unecessary
 // for understand this fuzzer, we have roughly followed RFC 3986
 // (https://tools.ietf.org/html/rfc3986) which defines the syntax of URIs (which
 // URLs are a subset of).

 syntax = "proto2";

 package url_parse_proto_fuzzer;

 // Here we define the format for a Url Protocol Buffer. This will be passed to
 // our fuzzer function.
 message Url {
   // If there is a scheme, then it must be followed by a colon. A scheme is in
   // practice not required in a URL. Therefore, we will define the scheme as
   // optional but ensure it is followed by a colon in our conversion code if it
   // is included.
   optional string scheme = 1;

   enum Slash {
     NONE = 0; // Seperate path segments using ""
     FORWARD = 1; // Seperate path segments using /
     BACKWARD = 2; // Seperate path segments using \
   }
   // The syntax rules of the two slashes that precede the host in a URL are
   // surprisingly complex. They are not required, even if a scheme is included
   // (http:example.com is treated as valid), and are valid even if a scheme is
   // not included (//example.com is treated as file:///example.com). They can
   // even be backslashes (http:\\example.com and http\/example.com are both
   // valid) and there can be any number of them (http:/example.com and
   // http://////example.com are both valid).
   // We will therefore define slashes as a list of enum values (repeated Slash).
   // In our conversion code, this will be read to append the appropriate kind and
   // appropriate number of slashes to the URL.
   repeated Slash slashes = 2 [packed=true];


   // The [user:password@] part of the URL shown above is called the userinfo.
   // Userinfo is not mandatory, but if it is included in a URL, then it must
   // contain a string called user. There is another optional field in userinfo
   // called the password. If a password is included, the user must be separated
   // from it by ":". In either case, the userinfo must be separated from the
   // host by "@". A URL must have a host if it has a userinfo.
   // These requirements will be ensured by the conversion code.
   message Userinfo {
     required string user = 1;
     optional string password = 2;
   }
   optional Userinfo userinfo = 3;

   // Hosts, like most else in our Url definition, are optional (there are
   // are URLs such as data URLs that do not have hosts).
   optional string host = 4;

   // ports are unsigned integers between 1-2^16. The closest type to this in the
   // proto2 format is uint32. Also if a port number is specified it must be
   // preceded by a colon (consider "google.com80" 80 will be interpreted as part
   // of the host). The conversion code will ensure this is the case.
   optional uint32 port = 5;

   // The rules for the path are somewhat complex. A path is not required,
   // however if it follows a port or host, it must start with "/" according
   // to the RFC, though Chromium accepts "\" as it converts all backslashes to
   // slashes. It does not need to start with "/" if there is no host (in data
   // URLs for example). Thus we will define path as a repeated string where each
   // member contains a segment of the path and will be preceded by the
   // path_separator. The one exception to this is for the first segment if
   // path_seperator == NONE and there is a non empty path and host, then the
   // first segment will be preceeded by "/".
   repeated string path = 6;
   required Slash path_separator = 7 [default = FORWARD];

   // A query must preceded by "?". This will be ensured in the conversion
   // code. Queries can have many components which the converter will separate
   // using "&", as is the convention.
   repeated string query = 8;

   // A fragment must preceded by "#". This will be ensured in the conversion
   // code.
   optional string fragment = 9;
 }
	// Copyright 2017 The Chromium Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	// This file contains the definition of the Url protobuf used in the
	// url_parse_proto_fuzzer that is meant to serve as an example for future
	// Chromium fuzzers that use libprotobuf-mutator.
	// We will consider the format of a URL for this fuzzer, to be
	// [scheme:][//[user[:password]@]host[:port]][/path][?query][#value]
	// There may be some URLs Chromium treats as valid that this syntax does not
	// capture. However, we will ignore them for the sake of simplicity.
	// It is recommended to read this file in conjunction with
	// convert_protobuf_to_string() in url_parse_proto_fuzzer.cc as logic in this
	// function is sometimes used to ensure that the Url Protocol Buffer obeys the
	// syntax we have defined for URLs. Though reading it is completely unecessary
	// for understand this fuzzer, we have roughly followed RFC 3986
	// (https://tools.ietf.org/html/rfc3986) which defines the syntax of URIs (which
	// URLs are a subset of).

	syntax = "proto2";

	package url_parse_proto_fuzzer;

	// Here we define the format for a Url Protocol Buffer. This will be passed to
	// our fuzzer function.
	message Url {
	// If there is a scheme, then it must be followed by a colon. A scheme is in
	// practice not required in a URL. Therefore, we will define the scheme as
	// optional but ensure it is followed by a colon in our conversion code if it
	// is included.
	optional string scheme = 1;

	enum Slash {
	NONE = 0; // Seperate path segments using ""
	FORWARD = 1; // Seperate path segments using /
	BACKWARD = 2; // Seperate path segments using \
	}
	// The syntax rules of the two slashes that precede the host in a URL are
	// surprisingly complex. They are not required, even if a scheme is included
	// (http:example.com is treated as valid), and are valid even if a scheme is
	// not included (//example.com is treated as file:///example.com). They can
	// even be backslashes (http:\\example.com and http\/example.com are both
	// valid) and there can be any number of them (http:/example.com and
	// http://////example.com are both valid).
	// We will therefore define slashes as a list of enum values (repeated Slash).
	// In our conversion code, this will be read to append the appropriate kind and
	// appropriate number of slashes to the URL.
	repeated Slash slashes = 2 [packed=true];


	// The [user:password@] part of the URL shown above is called the userinfo.
	// Userinfo is not mandatory, but if it is included in a URL, then it must
	// contain a string called user. There is another optional field in userinfo
	// called the password. If a password is included, the user must be separated
	// from it by ":". In either case, the userinfo must be separated from the
	// host by "@". A URL must have a host if it has a userinfo.
	// These requirements will be ensured by the conversion code.
	message Userinfo {
	required string user = 1;
	optional string password = 2;
	}
	optional Userinfo userinfo = 3;

	// Hosts, like most else in our Url definition, are optional (there are
	// are URLs such as data URLs that do not have hosts).
	optional string host = 4;

	// ports are unsigned integers between 1-2^16. The closest type to this in the
	// proto2 format is uint32. Also if a port number is specified it must be
	// preceded by a colon (consider "google.com80" 80 will be interpreted as part
	// of the host). The conversion code will ensure this is the case.
	optional uint32 port = 5;

	// The rules for the path are somewhat complex. A path is not required,
	// however if it follows a port or host, it must start with "/" according
	// to the RFC, though Chromium accepts "\" as it converts all backslashes to
	// slashes. It does not need to start with "/" if there is no host (in data
	// URLs for example). Thus we will define path as a repeated string where each
	// member contains a segment of the path and will be preceded by the
	// path_separator. The one exception to this is for the first segment if
	// path_seperator == NONE and there is a non empty path and host, then the
	// first segment will be preceeded by "/".
	repeated string path = 6;
	required Slash path_separator = 7 [default = FORWARD];

	// A query must preceded by "?". This will be ensured in the conversion
	// code. Queries can have many components which the converter will separate
	// using "&", as is the convention.
	repeated string query = 8;

	// A fragment must preceded by "#". This will be ensured in the conversion
	// code.
	optional string fragment = 9;
	}