HDFS-10311: DatanodeConnection::Cancel should not delete the underlying socket. Contributed by James Clampffer
This commit is contained in:
parent
1cbfd6f962
commit
e542db66ee
@ -19,6 +19,7 @@
|
|||||||
#include "common/util.h"
|
#include "common/util.h"
|
||||||
|
|
||||||
#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
|
#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
|
||||||
|
#include <exception>
|
||||||
|
|
||||||
namespace hdfs {
|
namespace hdfs {
|
||||||
|
|
||||||
@ -64,4 +65,34 @@ std::string GetRandomClientName() {
|
|||||||
return ss.str();
|
return ss.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string SafeDisconnect(asio::ip::tcp::socket *sock) {
|
||||||
|
std::string err;
|
||||||
|
if(sock && sock->is_open()) {
|
||||||
|
/**
|
||||||
|
* Even though we just checked that the socket is open it's possible
|
||||||
|
* it isn't in a state where it can properly send or receive. If that's
|
||||||
|
* the case asio will turn the underlying error codes from shutdown()
|
||||||
|
* and close() into unhelpfully named std::exceptions. Due to the
|
||||||
|
* relatively innocuous nature of most of these error codes it's better
|
||||||
|
* to just catch and return a flag so the caller can log failure.
|
||||||
|
**/
|
||||||
|
|
||||||
|
try {
|
||||||
|
sock->shutdown(asio::ip::tcp::socket::shutdown_both);
|
||||||
|
} catch (const std::exception &e) {
|
||||||
|
err = std::string("shutdown() threw") + e.what();
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
sock->close();
|
||||||
|
} catch (const std::exception &e) {
|
||||||
|
// don't append if shutdown() already failed, first failure is the useful one
|
||||||
|
if(err.empty())
|
||||||
|
err = std::string("close() threw") + e.what();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -21,16 +21,22 @@
|
|||||||
#include "hdfspp/status.h"
|
#include "hdfspp/status.h"
|
||||||
|
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
#include <mutex>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
#include <asio/error_code.hpp>
|
#include <asio/error_code.hpp>
|
||||||
#include <openssl/rand.h>
|
#include <openssl/rand.h>
|
||||||
|
|
||||||
#include <google/protobuf/message_lite.h>
|
#include <google/protobuf/message_lite.h>
|
||||||
#include <google/protobuf/io/coded_stream.h>
|
#include <google/protobuf/io/coded_stream.h>
|
||||||
|
#include <asio.hpp>
|
||||||
|
|
||||||
namespace hdfs {
|
namespace hdfs {
|
||||||
|
|
||||||
|
// typedefs based on code that's repeated everywhere
|
||||||
|
typedef std::lock_guard<std::mutex> mutex_guard;
|
||||||
|
|
||||||
|
|
||||||
static inline Status ToStatus(const ::asio::error_code &ec) {
|
static inline Status ToStatus(const ::asio::error_code &ec) {
|
||||||
if (ec) {
|
if (ec) {
|
||||||
return Status(ec.value(), ec.message().c_str());
|
return Status(ec.value(), ec.message().c_str());
|
||||||
@ -71,6 +77,11 @@ bool lock_held(T & mutex) {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Shutdown and close a socket safely; will check if the socket is open and
|
||||||
|
// catch anything thrown by asio.
|
||||||
|
// Returns a string containing error message on failure, otherwise an empty string.
|
||||||
|
std::string SafeDisconnect(asio::ip::tcp::socket *sock);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -47,6 +47,7 @@ DataNodeConnectionImpl::DataNodeConnectionImpl(asio::io_service * io_service,
|
|||||||
void DataNodeConnectionImpl::Connect(
|
void DataNodeConnectionImpl::Connect(
|
||||||
std::function<void(Status status, std::shared_ptr<DataNodeConnection> dn)> handler) {
|
std::function<void(Status status, std::shared_ptr<DataNodeConnection> dn)> handler) {
|
||||||
// Keep the DN from being freed until we're done
|
// Keep the DN from being freed until we're done
|
||||||
|
mutex_guard state_lock(state_lock_);
|
||||||
auto shared_this = shared_from_this();
|
auto shared_this = shared_from_this();
|
||||||
asio::async_connect(*conn_, endpoints_.begin(), endpoints_.end(),
|
asio::async_connect(*conn_, endpoints_.begin(), endpoints_.end(),
|
||||||
[shared_this, handler](const asio::error_code &ec, std::array<asio::ip::tcp::endpoint, 1>::iterator it) {
|
[shared_this, handler](const asio::error_code &ec, std::array<asio::ip::tcp::endpoint, 1>::iterator it) {
|
||||||
@ -55,7 +56,11 @@ void DataNodeConnectionImpl::Connect(
|
|||||||
}
|
}
|
||||||
|
|
||||||
void DataNodeConnectionImpl::Cancel() {
|
void DataNodeConnectionImpl::Cancel() {
|
||||||
conn_.reset();
|
mutex_guard state_lock(state_lock_);
|
||||||
|
std::string err = SafeDisconnect(conn_.get());
|
||||||
|
if(!err.empty()) {
|
||||||
|
LOG_WARN(kBlockReader, << "Error disconnecting socket in DataNodeConnectionImpl::Cancel, " << err);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -23,11 +23,10 @@
|
|||||||
#include "ClientNamenodeProtocol.pb.h"
|
#include "ClientNamenodeProtocol.pb.h"
|
||||||
#include "common/libhdfs_events_impl.h"
|
#include "common/libhdfs_events_impl.h"
|
||||||
#include "common/logging.h"
|
#include "common/logging.h"
|
||||||
|
#include "common/util.h"
|
||||||
|
|
||||||
#include "asio.hpp"
|
#include "asio.hpp"
|
||||||
|
|
||||||
#include <exception>
|
|
||||||
|
|
||||||
namespace hdfs {
|
namespace hdfs {
|
||||||
|
|
||||||
class DataNodeConnection : public AsyncStream {
|
class DataNodeConnection : public AsyncStream {
|
||||||
@ -43,31 +42,19 @@ public:
|
|||||||
|
|
||||||
struct SocketDeleter {
|
struct SocketDeleter {
|
||||||
inline void operator()(asio::ip::tcp::socket *sock) {
|
inline void operator()(asio::ip::tcp::socket *sock) {
|
||||||
if(sock->is_open()) {
|
// Cancel may have already closed the socket.
|
||||||
/**
|
std::string err = SafeDisconnect(sock);
|
||||||
* Even though we just checked that the socket is open it's possible
|
if(!err.empty()) {
|
||||||
* it isn't in a state where it can properly send or receive. If that's
|
LOG_WARN(kBlockReader, << "Error disconnecting socket: " << err);
|
||||||
* the case asio will turn the underlying error codes from shutdown()
|
|
||||||
* and close() into unhelpfully named std::exceptions. Due to the
|
|
||||||
* relatively innocuous nature of most of these error codes it's better
|
|
||||||
* to just catch, give a warning, and move on with life.
|
|
||||||
**/
|
|
||||||
try {
|
|
||||||
sock->shutdown(asio::ip::tcp::socket::shutdown_both);
|
|
||||||
} catch (const std::exception &e) {
|
|
||||||
LOG_WARN(kBlockReader, << "Error calling socket->shutdown");
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
sock->close();
|
|
||||||
} catch (const std::exception &e) {
|
|
||||||
LOG_WARN(kBlockReader, << "Error calling socket->close");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
delete sock;
|
delete sock;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class DataNodeConnectionImpl : public DataNodeConnection, public std::enable_shared_from_this<DataNodeConnectionImpl>{
|
class DataNodeConnectionImpl : public DataNodeConnection, public std::enable_shared_from_this<DataNodeConnectionImpl>{
|
||||||
|
private:
|
||||||
|
// held (briefly) while posting async ops to the asio task queue
|
||||||
|
std::mutex state_lock_;
|
||||||
public:
|
public:
|
||||||
std::unique_ptr<asio::ip::tcp::socket, SocketDeleter> conn_;
|
std::unique_ptr<asio::ip::tcp::socket, SocketDeleter> conn_;
|
||||||
std::array<asio::ip::tcp::endpoint, 1> endpoints_;
|
std::array<asio::ip::tcp::endpoint, 1> endpoints_;
|
||||||
@ -84,19 +71,21 @@ public:
|
|||||||
void Cancel() override;
|
void Cancel() override;
|
||||||
|
|
||||||
void async_read_some(const MutableBuffers &buf,
|
void async_read_some(const MutableBuffers &buf,
|
||||||
std::function<void (const asio::error_code & error,
|
std::function<void (const asio::error_code & error, std::size_t bytes_transferred) > handler)
|
||||||
std::size_t bytes_transferred) > handler) override {
|
override {
|
||||||
event_handlers_->call("DN_read_req", "", "", buf.end() - buf.begin());
|
event_handlers_->call("DN_read_req", "", "", buf.end() - buf.begin());
|
||||||
|
|
||||||
|
|
||||||
|
mutex_guard state_lock(state_lock_);
|
||||||
conn_->async_read_some(buf, handler);
|
conn_->async_read_some(buf, handler);
|
||||||
};
|
};
|
||||||
|
|
||||||
void async_write_some(const ConstBuffers &buf,
|
void async_write_some(const ConstBuffers &buf,
|
||||||
std::function<void (const asio::error_code & error,
|
std::function<void (const asio::error_code & error, std::size_t bytes_transferred) > handler)
|
||||||
std::size_t bytes_transferred) > handler) override {
|
override {
|
||||||
|
|
||||||
event_handlers_->call("DN_write_req", "", "", buf.end() - buf.begin());
|
event_handlers_->call("DN_write_req", "", "", buf.end() - buf.begin());
|
||||||
|
|
||||||
|
mutex_guard state_lock(state_lock_);
|
||||||
conn_->async_write_some(buf, handler);
|
conn_->async_write_some(buf, handler);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
Loading…
Reference in New Issue
Block a user