// Copyright 2013 The Chromium Authors // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "net/socket/tcp_socket.h" #include #include #include #include #include #include "base/atomicops.h" #include "base/files/file_path.h" #include "base/files/file_util.h" #include "base/functional/bind.h" #include "base/lazy_instance.h" #include "base/logging.h" #include "base/metrics/histogram_macros.h" #include "base/posix/eintr_wrapper.h" #include "base/strings/string_number_conversions.h" #include "base/strings/string_piece.h" #include "base/time/time.h" #include "build/build_config.h" #include "net/base/address_list.h" #include "net/base/io_buffer.h" #include "net/base/ip_endpoint.h" #include "net/base/net_errors.h" #include "net/base/network_activity_monitor.h" #include "net/base/network_change_notifier.h" #include "net/base/sockaddr_storage.h" #include "net/base/sys_addrinfo.h" #include "net/base/tracing.h" #include "net/http/http_util.h" #include "net/log/net_log.h" #include "net/log/net_log_event_type.h" #include "net/log/net_log_source.h" #include "net/log/net_log_source_type.h" #include "net/log/net_log_values.h" #include "net/socket/socket_net_log_params.h" #include "net/socket/socket_options.h" #include "net/socket/socket_posix.h" #include "net/socket/socket_tag.h" #include "net/traffic_annotation/network_traffic_annotation.h" #if BUILDFLAG(IS_ANDROID) #include "net/android/network_library.h" #endif // BUILDFLAG(IS_ANDROID) // If we don't have a definition for TCPI_OPT_SYN_DATA, create one. #if !defined(TCPI_OPT_SYN_DATA) #define TCPI_OPT_SYN_DATA 32 #endif // Fuchsia defines TCP_INFO, but it's not implemented. // TODO(crbug.com/758294): Enable TCP_INFO on Fuchsia once it's implemented // there (see NET-160). #if defined(TCP_INFO) && !BUILDFLAG(IS_FUCHSIA) #define HAVE_TCP_INFO #endif namespace net { namespace { // SetTCPKeepAlive sets SO_KEEPALIVE. bool SetTCPKeepAlive(int fd, bool enable, int delay) { // Enabling TCP keepalives is the same on all platforms. int on = enable ? 1 : 0; if (setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on))) { PLOG(ERROR) << "Failed to set SO_KEEPALIVE on fd: " << fd; return false; } // If we disabled TCP keep alive, our work is done here. if (!enable) return true; // A delay of 0 doesn't work, and is the default, so ignore that and rely on // whatever the OS defaults are once we turned it on above. if (delay) { #if BUILDFLAG(IS_LINUX) || BUILDFLAG(IS_CHROMEOS) || BUILDFLAG(IS_ANDROID) // Setting the keepalive interval varies by platform. // Set seconds until first TCP keep alive. if (setsockopt(fd, SOL_TCP, TCP_KEEPIDLE, &delay, sizeof(delay))) { PLOG(ERROR) << "Failed to set TCP_KEEPIDLE on fd: " << fd; return false; } // Set seconds between TCP keep alives. if (setsockopt(fd, SOL_TCP, TCP_KEEPINTVL, &delay, sizeof(delay))) { PLOG(ERROR) << "Failed to set TCP_KEEPINTVL on fd: " << fd; return false; } #elif BUILDFLAG(IS_APPLE) if (setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE, &delay, sizeof(delay))) { PLOG(ERROR) << "Failed to set TCP_KEEPALIVE on fd: " << fd; return false; } #endif } return true; } #if defined(HAVE_TCP_INFO) // Returns a zero value if the transport RTT is unavailable. base::TimeDelta GetTransportRtt(SocketDescriptor fd) { // It is possible for the value returned by getsockopt(TCP_INFO) to be // legitimately zero due to the way the RTT is calculated where fractions are // rounded down. This is specially true for virtualized environments with // paravirtualized clocks. // // If getsockopt(TCP_INFO) succeeds and the tcpi_rtt is zero, this code // assumes that the RTT got rounded down to zero and rounds it back up to this // value so that callers can assume that no packets defy the laws of physics. constexpr uint32_t kMinValidRttMicros = 1; tcp_info info; // Reset |tcpi_rtt| to verify if getsockopt() actually updates |tcpi_rtt|. info.tcpi_rtt = 0; socklen_t info_len = sizeof(tcp_info); if (getsockopt(fd, IPPROTO_TCP, TCP_INFO, &info, &info_len) != 0) return base::TimeDelta(); // Verify that |tcpi_rtt| in tcp_info struct was updated. Note that it's // possible that |info_len| is shorter than |sizeof(tcp_info)| which implies // that only a subset of values in |info| may have been updated by // getsockopt(). if (info_len < static_cast(offsetof(tcp_info, tcpi_rtt) + sizeof(info.tcpi_rtt))) { return base::TimeDelta(); } return base::Microseconds(std::max(info.tcpi_rtt, kMinValidRttMicros)); } #endif // defined(TCP_INFO) #if BUILDFLAG(IS_APPLE) && !BUILDFLAG(CRONET_BUILD) // Returns true if `socket` is connected to 0.0.0.0, false otherwise. // For detecting slow socket close due to a MacOS bug // (https://crbug.com/1194888). bool PeerIsZeroIPv4(const TCPSocketPosix& socket) { IPEndPoint peer; if (socket.GetPeerAddress(&peer) != OK) return false; return peer.address().IsIPv4() && peer.address().IsZero(); } #endif // BUILDFLAG(IS_APPLE) && !BUILDFLAG(CRONET_BUILD) } // namespace //----------------------------------------------------------------------------- TCPSocketPosix::TCPSocketPosix( std::unique_ptr socket_performance_watcher, NetLog* net_log, const NetLogSource& source) : socket_performance_watcher_(std::move(socket_performance_watcher)), net_log_(NetLogWithSource::Make(net_log, NetLogSourceType::SOCKET)) { net_log_.BeginEventReferencingSource(NetLogEventType::SOCKET_ALIVE, source); } TCPSocketPosix::TCPSocketPosix( std::unique_ptr socket_performance_watcher, NetLogWithSource net_log_source) : socket_performance_watcher_(std::move(socket_performance_watcher)), net_log_(net_log_source) { net_log_.BeginEvent(NetLogEventType::SOCKET_ALIVE); } TCPSocketPosix::~TCPSocketPosix() { net_log_.EndEvent(NetLogEventType::SOCKET_ALIVE); Close(); } int TCPSocketPosix::Open(AddressFamily family) { DCHECK(!socket_); socket_ = std::make_unique(); int rv = socket_->Open(ConvertAddressFamily(family)); if (rv != OK) socket_.reset(); if (rv == OK && tag_ != SocketTag()) tag_.Apply(socket_->socket_fd()); return rv; } int TCPSocketPosix::BindToNetwork(handles::NetworkHandle network) { DCHECK(IsValid()); DCHECK(!IsConnected()); #if BUILDFLAG(IS_ANDROID) return net::android::BindToNetwork(socket_->socket_fd(), network); #else NOTIMPLEMENTED(); return ERR_NOT_IMPLEMENTED; #endif // BUILDFLAG(IS_ANDROID) } int TCPSocketPosix::AdoptConnectedSocket(SocketDescriptor socket, const IPEndPoint& peer_address) { DCHECK(!socket_); SockaddrStorage storage; if (!peer_address.ToSockAddr(storage.addr, &storage.addr_len) && // For backward compatibility, allows the empty address. !(peer_address == IPEndPoint())) { return ERR_ADDRESS_INVALID; } socket_ = std::make_unique(); int rv = socket_->AdoptConnectedSocket(socket, storage); if (rv != OK) socket_.reset(); if (rv == OK && tag_ != SocketTag()) tag_.Apply(socket_->socket_fd()); return rv; } int TCPSocketPosix::AdoptUnconnectedSocket(SocketDescriptor socket) { DCHECK(!socket_); socket_ = std::make_unique(); int rv = socket_->AdoptUnconnectedSocket(socket); if (rv != OK) socket_.reset(); if (rv == OK && tag_ != SocketTag()) tag_.Apply(socket_->socket_fd()); return rv; } int TCPSocketPosix::Bind(const IPEndPoint& address) { DCHECK(socket_); SockaddrStorage storage; if (!address.ToSockAddr(storage.addr, &storage.addr_len)) return ERR_ADDRESS_INVALID; return socket_->Bind(storage); } int TCPSocketPosix::Listen(int backlog) { DCHECK(socket_); return socket_->Listen(backlog); } int TCPSocketPosix::Accept(std::unique_ptr* tcp_socket, IPEndPoint* address, CompletionOnceCallback callback) { DCHECK(tcp_socket); DCHECK(!callback.is_null()); DCHECK(socket_); DCHECK(!accept_socket_); net_log_.BeginEvent(NetLogEventType::TCP_ACCEPT); int rv = socket_->Accept( &accept_socket_, base::BindOnce(&TCPSocketPosix::AcceptCompleted, base::Unretained(this), tcp_socket, address, std::move(callback))); if (rv != ERR_IO_PENDING) rv = HandleAcceptCompleted(tcp_socket, address, rv); return rv; } int TCPSocketPosix::Connect(const IPEndPoint& address, CompletionOnceCallback callback) { DCHECK(socket_); if (!logging_multiple_connect_attempts_) LogConnectBegin(AddressList(address)); net_log_.BeginEvent(NetLogEventType::TCP_CONNECT_ATTEMPT, [&] { return CreateNetLogIPEndPointParams(&address); }); SockaddrStorage storage; if (!address.ToSockAddr(storage.addr, &storage.addr_len)) return ERR_ADDRESS_INVALID; int rv = socket_->Connect( storage, base::BindOnce(&TCPSocketPosix::ConnectCompleted, base::Unretained(this), std::move(callback))); if (rv != ERR_IO_PENDING) rv = HandleConnectCompleted(rv); return rv; } bool TCPSocketPosix::IsConnected() const { if (!socket_) return false; return socket_->IsConnected(); } bool TCPSocketPosix::IsConnectedAndIdle() const { return socket_ && socket_->IsConnectedAndIdle(); } int TCPSocketPosix::Read(IOBuffer* buf, int buf_len, CompletionOnceCallback callback) { DCHECK(socket_); DCHECK(!callback.is_null()); int rv = socket_->Read( buf, buf_len, base::BindOnce( &TCPSocketPosix::ReadCompleted, // Grab a reference to |buf| so that ReadCompleted() can still // use it when Read() completes, as otherwise, this transfers // ownership of buf to socket. base::Unretained(this), base::WrapRefCounted(buf), std::move(callback))); if (rv != ERR_IO_PENDING) rv = HandleReadCompleted(buf, rv); return rv; } int TCPSocketPosix::ReadIfReady(IOBuffer* buf, int buf_len, CompletionOnceCallback callback) { DCHECK(socket_); DCHECK(!callback.is_null()); int rv = socket_->ReadIfReady( buf, buf_len, base::BindOnce(&TCPSocketPosix::ReadIfReadyCompleted, base::Unretained(this), std::move(callback))); if (rv != ERR_IO_PENDING) rv = HandleReadCompleted(buf, rv); return rv; } int TCPSocketPosix::CancelReadIfReady() { DCHECK(socket_); return socket_->CancelReadIfReady(); } int TCPSocketPosix::Write( IOBuffer* buf, int buf_len, CompletionOnceCallback callback, const NetworkTrafficAnnotationTag& traffic_annotation) { DCHECK(socket_); DCHECK(!callback.is_null()); CompletionOnceCallback write_callback = base::BindOnce( &TCPSocketPosix::WriteCompleted, // Grab a reference to |buf| so that WriteCompleted() can still // use it when Write() completes, as otherwise, this transfers // ownership of buf to socket. base::Unretained(this), base::WrapRefCounted(buf), std::move(callback)); int rv; rv = socket_->Write(buf, buf_len, std::move(write_callback), traffic_annotation); if (rv != ERR_IO_PENDING) rv = HandleWriteCompleted(buf, rv); return rv; } int TCPSocketPosix::GetLocalAddress(IPEndPoint* address) const { DCHECK(address); if (!socket_) return ERR_SOCKET_NOT_CONNECTED; SockaddrStorage storage; int rv = socket_->GetLocalAddress(&storage); if (rv != OK) return rv; if (!address->FromSockAddr(storage.addr, storage.addr_len)) return ERR_ADDRESS_INVALID; return OK; } int TCPSocketPosix::GetPeerAddress(IPEndPoint* address) const { DCHECK(address); if (!IsConnected()) return ERR_SOCKET_NOT_CONNECTED; SockaddrStorage storage; int rv = socket_->GetPeerAddress(&storage); if (rv != OK) return rv; if (!address->FromSockAddr(storage.addr, storage.addr_len)) return ERR_ADDRESS_INVALID; return OK; } int TCPSocketPosix::SetDefaultOptionsForServer() { DCHECK(socket_); return AllowAddressReuse(); } void TCPSocketPosix::SetDefaultOptionsForClient() { DCHECK(socket_); // This mirrors the behaviour on Windows. See the comment in // tcp_socket_win.cc after searching for "NODELAY". // If SetTCPNoDelay fails, we don't care. SetTCPNoDelay(socket_->socket_fd(), true); // TCP keep alive wakes up the radio, which is expensive on mobile. Do not // enable it there. It's useful to prevent TCP middleboxes from timing out // connection mappings. Packets for timed out connection mappings at // middleboxes will either lead to: // a) Middleboxes sending TCP RSTs. It's up to higher layers to check for this // and retry. The HTTP network transaction code does this. // b) Middleboxes just drop the unrecognized TCP packet. This leads to the TCP // stack retransmitting packets per TCP stack retransmission timeouts, which // are very high (on the order of seconds). Given the number of // retransmissions required before killing the connection, this can lead to // tens of seconds or even minutes of delay, depending on OS. #if !BUILDFLAG(IS_ANDROID) && !BUILDFLAG(IS_IOS) const int kTCPKeepAliveSeconds = 45; SetTCPKeepAlive(socket_->socket_fd(), true, kTCPKeepAliveSeconds); #endif } int TCPSocketPosix::AllowAddressReuse() { DCHECK(socket_); return SetReuseAddr(socket_->socket_fd(), true); } int TCPSocketPosix::SetReceiveBufferSize(int32_t size) { DCHECK(socket_); return SetSocketReceiveBufferSize(socket_->socket_fd(), size); } int TCPSocketPosix::SetSendBufferSize(int32_t size) { DCHECK(socket_); return SetSocketSendBufferSize(socket_->socket_fd(), size); } bool TCPSocketPosix::SetKeepAlive(bool enable, int delay) { if (!socket_) return false; return SetTCPKeepAlive(socket_->socket_fd(), enable, delay); } bool TCPSocketPosix::SetNoDelay(bool no_delay) { if (!socket_) return false; return SetTCPNoDelay(socket_->socket_fd(), no_delay) == OK; } int TCPSocketPosix::SetIPv6Only(bool ipv6_only) { CHECK(socket_); return ::net::SetIPv6Only(socket_->socket_fd(), ipv6_only); } void TCPSocketPosix::Close() { #if BUILDFLAG(IS_APPLE) && !BUILDFLAG(CRONET_BUILD) // A MacOS bug can cause sockets to 0.0.0.0 to take 1 second to close. Log a // trace event for this case so that it can be correlated with jank in traces. // Use the "base" category since "net" isn't enabled by default. See // https://crbug.com/1194888. TRACE_EVENT("base", PeerIsZeroIPv4(*this) ? perfetto::StaticString{"CloseSocketTCP.PeerIsZero"} : perfetto::StaticString{"CloseSocketTCP"}); #endif // BUILDFLAG(IS_APPLE) && !BUILDFLAG(CRONET_BUILD) socket_.reset(); tag_ = SocketTag(); } bool TCPSocketPosix::IsValid() const { return socket_ != nullptr && socket_->socket_fd() != kInvalidSocket; } void TCPSocketPosix::DetachFromThread() { socket_->DetachFromThread(); } void TCPSocketPosix::StartLoggingMultipleConnectAttempts( const AddressList& addresses) { if (!logging_multiple_connect_attempts_) { logging_multiple_connect_attempts_ = true; LogConnectBegin(addresses); } else { NOTREACHED(); } } void TCPSocketPosix::EndLoggingMultipleConnectAttempts(int net_error) { if (logging_multiple_connect_attempts_) { LogConnectEnd(net_error); logging_multiple_connect_attempts_ = false; } else { NOTREACHED(); } } SocketDescriptor TCPSocketPosix::ReleaseSocketDescriptorForTesting() { SocketDescriptor socket_descriptor = socket_->ReleaseConnectedSocket(); socket_.reset(); return socket_descriptor; } SocketDescriptor TCPSocketPosix::SocketDescriptorForTesting() const { return socket_->socket_fd(); } void TCPSocketPosix::ApplySocketTag(const SocketTag& tag) { if (IsValid() && tag != tag_) { tag.Apply(socket_->socket_fd()); } tag_ = tag; } void TCPSocketPosix::AcceptCompleted( std::unique_ptr* tcp_socket, IPEndPoint* address, CompletionOnceCallback callback, int rv) { DCHECK_NE(ERR_IO_PENDING, rv); std::move(callback).Run(HandleAcceptCompleted(tcp_socket, address, rv)); } int TCPSocketPosix::HandleAcceptCompleted( std::unique_ptr* tcp_socket, IPEndPoint* address, int rv) { if (rv == OK) rv = BuildTcpSocketPosix(tcp_socket, address); if (rv == OK) { net_log_.EndEvent(NetLogEventType::TCP_ACCEPT, [&] { return CreateNetLogIPEndPointParams(address); }); } else { net_log_.EndEventWithNetErrorCode(NetLogEventType::TCP_ACCEPT, rv); } return rv; } int TCPSocketPosix::BuildTcpSocketPosix( std::unique_ptr* tcp_socket, IPEndPoint* address) { DCHECK(accept_socket_); SockaddrStorage storage; if (accept_socket_->GetPeerAddress(&storage) != OK || !address->FromSockAddr(storage.addr, storage.addr_len)) { accept_socket_.reset(); return ERR_ADDRESS_INVALID; } *tcp_socket = std::make_unique(nullptr, net_log_.net_log(), net_log_.source()); (*tcp_socket)->socket_ = std::move(accept_socket_); return OK; } void TCPSocketPosix::ConnectCompleted(CompletionOnceCallback callback, int rv) { DCHECK_NE(ERR_IO_PENDING, rv); std::move(callback).Run(HandleConnectCompleted(rv)); } int TCPSocketPosix::HandleConnectCompleted(int rv) { // Log the end of this attempt (and any OS error it threw). if (rv != OK) { net_log_.EndEventWithIntParams(NetLogEventType::TCP_CONNECT_ATTEMPT, "os_error", errno); tag_ = SocketTag(); } else { net_log_.EndEvent(NetLogEventType::TCP_CONNECT_ATTEMPT); NotifySocketPerformanceWatcher(); } // Give a more specific error when the user is offline. if (rv == ERR_ADDRESS_UNREACHABLE && NetworkChangeNotifier::IsOffline()) rv = ERR_INTERNET_DISCONNECTED; if (!logging_multiple_connect_attempts_) LogConnectEnd(rv); return rv; } void TCPSocketPosix::LogConnectBegin(const AddressList& addresses) const { net_log_.BeginEvent(NetLogEventType::TCP_CONNECT, [&] { return addresses.NetLogParams(); }); } void TCPSocketPosix::LogConnectEnd(int net_error) const { if (net_error != OK) { net_log_.EndEventWithNetErrorCode(NetLogEventType::TCP_CONNECT, net_error); return; } net_log_.EndEvent(NetLogEventType::TCP_CONNECT, [&] { net::IPEndPoint local_address; int net_error = GetLocalAddress(&local_address); net::IPEndPoint remote_address; if (net_error == net::OK) net_error = GetPeerAddress(&remote_address); if (net_error != net::OK) return NetLogParamsWithInt("get_address_net_error", net_error); return CreateNetLogAddressPairParams(local_address, remote_address); }); } void TCPSocketPosix::ReadCompleted(const scoped_refptr& buf, CompletionOnceCallback callback, int rv) { DCHECK_NE(ERR_IO_PENDING, rv); std::move(callback).Run(HandleReadCompleted(buf.get(), rv)); } void TCPSocketPosix::ReadIfReadyCompleted(CompletionOnceCallback callback, int rv) { DCHECK_NE(ERR_IO_PENDING, rv); DCHECK_GE(OK, rv); HandleReadCompletedHelper(rv); std::move(callback).Run(rv); } int TCPSocketPosix::HandleReadCompleted(IOBuffer* buf, int rv) { HandleReadCompletedHelper(rv); if (rv < 0) return rv; // Notify the watcher only if at least 1 byte was read. if (rv > 0) NotifySocketPerformanceWatcher(); net_log_.AddByteTransferEvent(NetLogEventType::SOCKET_BYTES_RECEIVED, rv, buf->data()); activity_monitor::IncrementBytesReceived(rv); return rv; } void TCPSocketPosix::HandleReadCompletedHelper(int rv) { if (rv < 0) { NetLogSocketError(net_log_, NetLogEventType::SOCKET_READ_ERROR, rv, errno); } } void TCPSocketPosix::WriteCompleted(const scoped_refptr& buf, CompletionOnceCallback callback, int rv) { DCHECK_NE(ERR_IO_PENDING, rv); std::move(callback).Run(HandleWriteCompleted(buf.get(), rv)); } int TCPSocketPosix::HandleWriteCompleted(IOBuffer* buf, int rv) { if (rv < 0) { NetLogSocketError(net_log_, NetLogEventType::SOCKET_WRITE_ERROR, rv, errno); return rv; } // Notify the watcher only if at least 1 byte was written. if (rv > 0) NotifySocketPerformanceWatcher(); net_log_.AddByteTransferEvent(NetLogEventType::SOCKET_BYTES_SENT, rv, buf->data()); return rv; } void TCPSocketPosix::NotifySocketPerformanceWatcher() { #if defined(HAVE_TCP_INFO) // Check if |socket_performance_watcher_| is interested in receiving a RTT // update notification. if (!socket_performance_watcher_ || !socket_performance_watcher_->ShouldNotifyUpdatedRTT()) { return; } base::TimeDelta rtt = GetTransportRtt(socket_->socket_fd()); if (rtt.is_zero()) return; socket_performance_watcher_->OnUpdatedRTTAvailable(rtt); #endif // defined(TCP_INFO) } bool TCPSocketPosix::GetEstimatedRoundTripTime(base::TimeDelta* out_rtt) const { DCHECK(out_rtt); if (!socket_) return false; #if defined(HAVE_TCP_INFO) base::TimeDelta rtt = GetTransportRtt(socket_->socket_fd()); if (rtt.is_zero()) return false; *out_rtt = rtt; return true; #else return false; #endif // defined(TCP_INFO) } } // namespace net