# Copyright 2014 - present MongoDB, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. use v5.8.0; use strict; use warnings; package MongoDB::_Protocol; use version; our $VERSION = 'v2.2.2'; use MongoDB::_Constants; use MongoDB::Error; use MongoDB::_Types qw/ to_IxHash /; use Compress::Zlib (); use constant { OP_REPLY => 1, # Reply to a client request. responseTo is set OP_UPDATE => 2001, # update document OP_INSERT => 2002, # insert new document RESERVED => 2003, # formerly used for OP_GET_BY_OID OP_QUERY => 2004, # query a collection OP_GET_MORE => 2005, # Get more data from a query. See Cursors OP_DELETE => 2006, # Delete documents OP_KILL_CURSORS => 2007, # Tell database client is done with a cursor OP_COMPRESSED => 2012, # wire compression OP_MSG => 2013, # generic bi-directional op code }; use constant { PERL58 => $] lt '5.010', MIN_REPLY_LENGTH => 4 * 5 + 8 + 4 * 2, MAX_REQUEST_ID => 2**31 - 1, }; # Perl < 5.10, pack doesn't have endianness modifiers, and the MongoDB wire # protocol mandates little-endian order. For 5.10, we can use modifiers but # before that we only work on platforms that are natively little-endian. We # die during configuration on big endian platforms on 5.8 use constant { P_HEADER => PERL58 ? "l4" : "l<4", }; # These ops all include P_HEADER already use constant { P_UPDATE => PERL58 ? "l5Z*l" : "l<5Z*l<", P_INSERT => PERL58 ? "l5Z*" : "l<5Z*", P_QUERY => PERL58 ? "l5Z*l2" : "l<5Z*l<2", P_GET_MORE => PERL58 ? "l5Z*la8" : "l<5Z*l PERL58 ? "l5Z*l" : "l<5Z*l<", P_KILL_CURSORS => PERL58 ? "l6(a8)*" : "l<6(a8)*", P_REPLY_HEADER => PERL58 ? "l5a8l2" : "l<5a8l<2", P_COMPRESSED => PERL58 ? "l6C" : "l<6C", P_MSG => PERL58 ? "l5" : "l<5", P_MSG_PL_1 => PERL58 ? "lZ*" : "l length(pack P_HEADER, 0, 0, 0, 0), # length for OP_COMPRESSED P_COMPRESSED_PREFIX_LENGTH => length(pack P_COMPRESSED, 0, 0, 0, 0, 0, 0, 0), P_MSG_PREFIX_LENGTH => length(pack P_MSG, 0, 0, 0, 0, 0), }; # struct OP_MSG { # MsgHeader header; // standard message header, with opCode 2013 # uint32 flagBits; # Section+ sections; # [uint32 checksum;] # }; # # struct Section { # uint8 payloadType; # union payload { # document document; // payloadType == 0 # struct sequence { // payloadType == 1 # int32 size; # cstring identifier; # document* documents; # }; # }; # }; use constant { P_SECTION_PAYLOAD_TYPE => "C", P_SECTION_SEQUENCE_SIZE => PERL58 ? "l" : "l<", }; use constant { P_SECTION_HEADER => P_SECTION_PAYLOAD_TYPE . P_SECTION_SEQUENCE_SIZE, P_SECTION_PAYLOAD_TYPE_LENGTH => length( pack P_SECTION_PAYLOAD_TYPE, 0 ), P_SECTION_SEQUENCE_SIZE_LENGTH => length( pack P_SECTION_SEQUENCE_SIZE, 0 ), }; # Takes a command, returns sections ready for joining sub prepare_sections { my ( $codec, $cmd ) = @_; my %split_commands = ( insert => 'documents', update => 'updates', delete => 'deletes', ); $cmd = to_IxHash( $cmd ); # Command is always first key in cmd my $command = do { my @keys = $cmd->Keys; $keys[0] }; my $ident = $split_commands{ $command }; if ( defined $ident ) { my $collection = $cmd->FETCH( $command ); my $docs = $cmd->FETCH( $ident ); # Assumes only a single split on the commands return ( { type => 0, documents => [ [ # Done specifically to not alter $cmd. # The command ($command from earlier) is assumed to be # first in the Keys set map { $_ eq $ident ? () : ( $_, $cmd->FETCH( $_ ) ) } $cmd->Keys() ] ], }, { type => 1, identifier => $ident, documents => $docs, } ); } else { # Not a recognised command to split, just set up ready for later return ( { type => 0, documents => [ $cmd ], } ); } } # encode_section # # MongoDB::_Protocol::encode_section( $codec, { # type => 0, # 0 or 1 # identifier => undef, # optional in type 0 # documents => [ $cmd ] # must be an array of documents # }); # # Takes a section hashref and encodes it for joining sub encode_section { my ( $codec, $section ) = @_; my $type = $section->{type}; my $ident = $section->{identifier}; my @docs = map { $codec->encode_one( $_ ) } @{ $section->{documents} }; my $pl; if ( $type == 0 ) { # Assume a single doc if payload type is 0 $pl = $docs[0]; } elsif ( $type == 1 ) { $pl = pack( P_MSG_PL_1, 0, $ident ) . join( '', @docs ); # calculate size substr( $pl, 0, 4, pack( P_SECTION_SEQUENCE_SIZE, length( $pl ) ) ); } else { MongoDB::ProtocolError->throw("Encode: Unsupported section payload type"); } # Prepend the section type $pl = pack( P_SECTION_PAYLOAD_TYPE, $type ) . $pl; return $pl; } # decode_section # # MongoDB::_Protocol::decode_section( $section ) # # Takes an encoded section and decodes it, exactly the opposite of encode_section. sub decode_section { my ( $doc ) = @_; my ( $type, $ident, @enc_docs ); my $section = {}; ( $type ) = unpack( 'C', $doc ); my $payload = substr( $doc, P_SECTION_PAYLOAD_TYPE_LENGTH ); $section->{ type } = $type; # Pull size off and double check. Size is in the same place regardless of # payload type, as its a similar struct to a raw document my ( $pl_size ) = unpack( P_SECTION_SEQUENCE_SIZE, $payload ); unless ( $pl_size == length( $payload ) ) { MongoDB::ProtocolError->throw("Decode: Section size incorrect"); } if ( $type == 0 ) { # payload is a raw document push @enc_docs, $payload; } elsif ( $type == 1 ) { $payload = substr( $payload, P_SECTION_SEQUENCE_SIZE_LENGTH ); # Pull out then remove ( $ident ) = unpack( 'Z*', $payload ); $section->{ identifier } = $ident; $payload = substr( $payload, length ( $ident ) + 1 ); # add one for null termination while ( length $payload ) { my $doc_size = unpack( P_SECTION_SEQUENCE_SIZE, $payload ); my $doc = substr( $payload, 0, $doc_size ); $payload = substr( $payload, $doc_size ); push @enc_docs, $doc; } } else { MongoDB::ProtocolError->throw("Decode: Unsupported section payload type"); } $section->{ documents } = \@enc_docs; return $section; } # method split_sections( $msg ) # # Splits sections based on their payload length header. Returns an array of # sections in packed form sub split_sections { my $msg = shift; my @sections; while ( length $msg ) { # get first section length my ( undef, $section_length ) = unpack( P_SECTION_HEADER, $msg ); # Add the payload type length as we reached over it for the length my $section = substr( $msg, 0, $section_length + P_SECTION_PAYLOAD_TYPE_LENGTH ); push @sections, decode_section( $section ); $msg = substr( $msg, $section_length + P_SECTION_PAYLOAD_TYPE_LENGTH ); } return @sections; } use constant { MSG_FB_CHECKSUM => 0, MSG_FB_MORE_TO_COME => 1, }; sub write_msg { my ( $codec, $flags, $cmd ) = @_; my $flagbits = 0; # checksum is reserved for future use if ( $flags ) { $flagbits = ( $flags->{checksum_present} ? 1 << MSG_FB_CHECKSUM : 0 ) | ( $flags->{more_to_come} ? 1 << MSG_FB_MORE_TO_COME : 0 ); } my $request_id = int( rand( MAX_REQUEST_ID ) ); my @sections = prepare_sections( $codec, $cmd ); my $encoded_sections = join ('', ( map { encode_section( $codec, $_ ) } @sections ) ); my $msg = pack( P_MSG, 0, $request_id, 0, OP_MSG, 0 ) . $encoded_sections; substr( $msg, 0, 4, pack( P_INT32, length($msg) ) ); return ( $msg, $request_id ); } # struct OP_COMPRESSED { # MsgHeader header; // standard message header # int32_t originalOpcode; // wrapped op code # int32_t uncompressedSize; // size of deflated wo. header # uint8_t compressorId; // compressor # char* compressedMessage; // compressed contents # }; # Note that Zlib is in perl core (since 5.9.3) so shouldnt need lazy loading sub _assert_zstd { MongoDB::UsageError->throw(qq/Compress::Zstd must be installed to support zstd compression\n/) unless eval { require Compress::Zstd }; } sub _assert_snappy { MongoDB::UsageError->throw(qq/Compress::Snappy must be installed to support snappy compression\n/) unless eval { require Compress::Snappy }; } # decompressors indexed by ID. my @DECOMPRESSOR = ( # none sub { shift }, # snappy sub { Compress::Snappy::decompress(shift) }, # zlib sub { Compress::Zlib::uncompress(shift) }, # zstd sub { Compress::Zstd::decompress(shift) }, ); # construct compressor by name with options sub get_compressor { my ($name, $comp_opt) = @_; if ($name eq 'none') { return { id => 0, callback => sub { shift }, }; } elsif ($name eq 'snappy') { _assert_snappy(); return { id => 1, callback => sub { Compress::Snappy::compress(shift) }, }; } elsif ($name eq 'zlib') { my $level = $comp_opt->{zlib_compression_level}; $level = undef if defined $level and $level < 0; return { id => 2, callback => sub { return Compress::Zlib::compress( $_[0], defined($level) ? $level : Compress::Zlib::Z_DEFAULT_COMPRESSION(), ); }, }; } elsif ($name eq 'zstd') { _assert_zstd(); return { id => 3, callback => sub { Compress::Zstd::compress(shift) }, }; } else { MongoDB::ProtocolError->throw("Unknown compressor '$name'"); } } # compress message sub compress { my ($msg, $compressor) = @_; my ($len, $request_id, $response_to, $op_code) = unpack(P_HEADER, $msg); $msg = substr $msg, P_HEADER_LENGTH; my $msg_comp = pack( P_COMPRESSED, 0, $request_id, $response_to, OP_COMPRESSED, $op_code, length($msg), $compressor->{id}, ).$compressor->{callback}->($msg); substr($msg_comp, 0, 4, pack(P_INT32, length($msg_comp))); return $msg_comp; } # attempt to uncompress message # messages that aren't OP_COMPRESSED are returned as-is sub try_uncompress { my ($msg) = @_; my ($len, $request_id, $response_to, $op_code, $orig_op_code, $orig_len, $comp_id) = unpack(P_COMPRESSED, $msg); return $msg if $op_code != OP_COMPRESSED; $msg = substr $msg, P_COMPRESSED_PREFIX_LENGTH; my $decompressor = $DECOMPRESSOR[$comp_id] or MongoDB::ProtocolError->throw("Unknown compressor ID '$comp_id'"); my $decomp_msg = $decompressor->($msg); my $done = pack(P_HEADER, $orig_len, $request_id, $response_to, $orig_op_code) .$decomp_msg; return $done; } # struct OP_UPDATE { # MsgHeader header; // standard message header # int32 ZERO; // 0 - reserved for future use # cstring fullCollectionName; // "dbname.collectionname" # int32 flags; // bit vector. see below # document selector; // the query to select the document # document update; // specification of the update to perform # } use constant { U_UPSERT => 0, U_MULTI_UPDATE => 1, }; sub write_update { my ( $ns, $selector, $update, $flags ) = @_; utf8::encode($ns); my $request_id = int( rand( MAX_REQUEST_ID ) ); my $bitflags = 0; if ($flags) { $bitflags = ( $flags->{upsert} ? 1 << U_UPSERT : 0 ) | ( $flags->{multi} ? 1 << U_MULTI_UPDATE : 0 ); } my $msg = pack( P_UPDATE, 0, $request_id, 0, OP_UPDATE, 0, $ns, $bitflags ) . $selector . $update; substr( $msg, 0, 4, pack( P_INT32, length($msg) ) ); return $msg, $request_id; } # struct OP_INSERT { # MsgHeader header; // standard message header # int32 flags; // bit vector - see below # cstring fullCollectionName; // "dbname.collectionname" # document* documents; // one or more documents to insert into the collection # } use constant { I_CONTINUE_ON_ERROR => 0, }; sub write_insert { my ( $ns, $bson_docs, $flags ) = @_; utf8::encode($ns); my $request_id = int( rand( MAX_REQUEST_ID ) ); my $bitflags = 0; if ($flags) { $bitflags = ( $flags->{continue_on_error} ? 1 << I_CONTINUE_ON_ERROR : 0 ); } my $msg = pack( P_INSERT, 0, $request_id, 0, OP_INSERT, $bitflags, $ns ) . $bson_docs; substr( $msg, 0, 4, pack( P_INT32, length($msg) ) ); return $msg, $request_id; } # struct OP_QUERY { # MsgHeader header; // standard message header # int32 flags; // bit vector of query options. See below for details. # cstring fullCollectionName ; // "dbname.collectionname" # int32 numberToSkip; // number of documents to skip # int32 numberToReturn; // number of documents to return # // in the first OP_REPLY batch # document query; // query object. See below for details. # [ document returnFieldsSelector; ] // Optional. Selector indicating the fields # // to return. See below for details. # } use constant { Q_TAILABLE => 1, Q_SLAVE_OK => 2, Q_NO_CURSOR_TIMEOUT => 4, Q_AWAIT_DATA => 5, Q_EXHAUST => 6, # unsupported (PERL-282) Q_PARTIAL => 7, }; sub write_query { my ( $ns, $query, $fields, $skip, $batch_size, $flags ) = @_; utf8::encode($ns); my $bitflags = 0; if ($flags) { $bitflags = ( $flags->{tailable} ? 1 << Q_TAILABLE : 0 ) | ( $flags->{slave_ok} ? 1 << Q_SLAVE_OK : 0 ) | ( $flags->{await_data} ? 1 << Q_AWAIT_DATA : 0 ) | ( $flags->{immortal} ? 1 << Q_NO_CURSOR_TIMEOUT : 0 ) | ( $flags->{partial} ? 1 << Q_PARTIAL : 0 ); } my $request_id = int( rand( MAX_REQUEST_ID ) ); my $msg = pack( P_QUERY, 0, $request_id, 0, OP_QUERY, $bitflags, $ns, $skip, $batch_size ) . $query . ( defined $fields && length $fields ? $fields : '' ); substr( $msg, 0, 4, pack( P_INT32, length($msg) ) ); return ( $msg, $request_id ); } # struct { # MsgHeader header; // standard message header # int32 ZERO; // 0 - reserved for future use # cstring fullCollectionName; // "dbname.collectionname" # int32 numberToReturn; // number of documents to return # int64 cursorID; // cursorID from the OP_REPLY # } # We treat cursor_id as an opaque string so we don't have to depend # on 64-bit integer support sub write_get_more { my ( $ns, $cursor_id, $batch_size ) = @_; utf8::encode($ns); my $request_id = int( rand( MAX_REQUEST_ID ) ); my $msg = pack( P_GET_MORE, 0, $request_id, 0, OP_GET_MORE, 0, $ns, $batch_size, _pack_cursor_id($cursor_id) ); substr( $msg, 0, 4, pack( P_INT32, length($msg) ) ); return ( $msg, $request_id ); } # struct { # MsgHeader header; // standard message header # int32 ZERO; // 0 - reserved for future use # cstring fullCollectionName; // "dbname.collectionname" # int32 flags; // bit vector - see below for details. # document selector; // query object. See below for details. # } use constant { D_SINGLE_REMOVE => 0, }; sub write_delete { my ( $ns, $selector, $flags ) = @_; utf8::encode($ns); my $request_id = int( rand( MAX_REQUEST_ID ) ); my $bitflags = 0; if ($flags) { $bitflags = ( $flags->{just_one} ? 1 << D_SINGLE_REMOVE : 0 ); } my $msg = pack( P_DELETE, 0, $request_id, 0, OP_DELETE, 0, $ns, $bitflags ) . $selector; substr( $msg, 0, 4, pack( P_INT32, length($msg) ) ); return $msg, $request_id; } # legacy alias { no warnings 'once'; *write_remove = \&write_delete; } # struct { # MsgHeader header; // standard message header # int32 ZERO; // 0 - reserved for future use # int32 numberOfCursorIDs; // number of cursorIDs in message # int64* cursorIDs; // sequence of cursorIDs to close # } sub write_kill_cursors { my (@cursors) = map _pack_cursor_id($_), @_; my $request_id = int( rand( MAX_REQUEST_ID ) ); my $msg = pack( P_KILL_CURSORS, 0, $request_id, 0, OP_KILL_CURSORS, 0, scalar(@cursors), @cursors ); substr( $msg, 0, 4, pack( P_INT32, length($msg) ) ); return $msg, $request_id; } # struct { # // MessageHeader # int32 messageLength; // total message size, including this # int32 requestID; // identifier for this message # int32 responseTo; // requestID from the original request # int32 opCode; // request type - see table below # // OP_REPLY fields # int32 responseFlags; // bit vector - see details below # int64 cursorID; // cursor id if client needs to do get more's # int32 startingFrom; // where in the cursor this reply is starting # int32 numberReturned; // number of documents in the reply # document* documents; // documents # } # We treat cursor_id as an opaque string so we don't have to depend # on 64-bit integer support # flag bits relevant to drivers use constant { R_CURSOR_NOT_FOUND => 0, R_QUERY_FAILURE => 1, R_AWAIT_CAPABLE => 3, }; sub parse_reply { my ( $msg, $request_id ) = @_; MongoDB::ProtocolError->throw("response was truncated") if length($msg) < MIN_REPLY_LENGTH; $msg = try_uncompress($msg); my ( $len, $msg_id, $response_to, $opcode, $bitflags, $cursor_id, $starting_from, $number_returned ) = unpack( P_MSG, $msg ); # pre-check all conditions using a modifier in one statement for speed; # disambiguate afterwards only if an error exists do { if ( length($msg) < $len ) { MongoDB::ProtocolError->throw("response was truncated"); } if ( $opcode != OP_REPLY && $opcode != OP_MSG ) { MongoDB::ProtocolError->throw("response was not OP_REPLY or OP_MSG"); } if ( $response_to != $request_id ) { MongoDB::ProtocolError->throw( "response ID ($response_to) did not match request ID ($request_id)"); } } if ( length($msg) < $len ) || ( ( $opcode != OP_REPLY ) && ( $opcode != OP_MSG ) ) || ( $response_to != $request_id ); if ( $opcode == OP_MSG ) { # XXX Extract and check checksum - future support of crc32c my @sections = split_sections( substr( $msg, P_MSG_PREFIX_LENGTH ) ); # We have none of the other stuff? maybe flags... and an array of docs? erm return { flags => { checksum_present => vec( $bitflags, MSG_FB_CHECKSUM, 1 ), more_to_come => vec( $bitflags, MSG_FB_MORE_TO_COME, 1 ), }, # XXX Assumes the server never sends a type 1 payload. May change in future docs => $sections[0]->{documents}->[0] }; } else { # Yes its two unpacks but its just easier than mapping through to the right size ( $len, $msg_id, $response_to, $opcode, $bitflags, $cursor_id, $starting_from, $number_returned ) = unpack( P_REPLY_HEADER, $msg ); } # returns non-zero cursor_id as blessed object to identify it as an # 8-byte opaque ID rather than an ambiguous Perl scalar. N.B. cursors # from commands are handled differently: they are perl integers or # else Math::BigInt objects substr( $msg, 0, MIN_REPLY_LENGTH, '' ), return { flags => { cursor_not_found => vec( $bitflags, R_CURSOR_NOT_FOUND, 1 ), query_failure => vec( $bitflags, R_QUERY_FAILURE, 1 ), }, cursor_id => ( ( $cursor_id eq CURSOR_ZERO ) ? 0 : bless( \$cursor_id, "MongoDB::_CursorID" ) ), starting_from => $starting_from, number_returned => $number_returned, docs => $msg, }; } #--------------------------------------------------------------------------# # utility functions #--------------------------------------------------------------------------# # CursorID's can come in 3 forms: # # 1. MongoDB::CursorID object (a blessed reference to an 8-byte string) # 2. A perl scalar (an integer) # 3. A Math::BigInt object (64 bit integer on 32-bit perl) # # The _pack_cursor_id function converts any of them to a packed Int64 for # use in OP_GET_MORE or OP_KILL_CURSORS sub _pack_cursor_id { my $cursor_id = shift; if ( ref($cursor_id) eq "MongoDB::_CursorID" ) { $cursor_id = $$cursor_id; } elsif ( ref($cursor_id) eq "Math::BigInt" ) { my $as_hex = $cursor_id->as_hex; # big-endian hex substr( $as_hex, 0, 2, '' ); # remove "0x" my $len = length($as_hex); substr( $as_hex, 0, 0, "0" x ( 16 - $len ) ) if $len < 16; # pad to quad length $cursor_id = pack( "H*", $as_hex ); # packed big-endian $cursor_id = reverse($cursor_id); # reverse to little-endian } elsif (HAS_INT64) { # pack doesn't have endianness modifiers before perl 5.10. # We die during configuration on big-endian platforms on 5.8 $cursor_id = pack( $] lt '5.010' ? "q" : "q<", $cursor_id ); } else { # we on 32-bit perl *and* have a cursor ID that fits in 32 bits, # so pack it as long and pad out to a quad $cursor_id = pack( $] lt '5.010' ? "l" : "l<", $cursor_id ) . ( "\0" x 4 ); } return $cursor_id; } 1; # vim: ts=4 sts=4 sw=4 et: