From paulsheer at gmail.com Thu Jun 1 18:48:00 2017 From: paulsheer at gmail.com (Paul Sheer) Date: Thu, 1 Jun 2017 18:48:00 -0500 Subject: [ace-bugs] ACE Bug report: file descriptor handling in Select_Reactor_Base.cpp In-Reply-To: References: Message-ID: Dear ACE team, Please see the below bug and fix for ACE. We are using ACE in a critical national-infrastructure-level software project and for each new release of ACE we need to apply the below patch to avoid an extremely obscure and rarely-occurring TOTAL LOCKUP of our application. Therefore please take this bug seriously. I am unable to discuss this bug by email, but I will offer my phone number to anyone that wishes an explanation if it is not clear from the text below. I have previously reported this, and my request was ignored. Here is a bug report about this from ACE 5.4.2 from 2004: https://groups.yahoo.com/neo/groups/ace-users/conversations/topics/39957 This fix below has been in our live software for some years now. ACE VERSION: 6.4.0 HOST MACHINE and OPERATING SYSTEM: Linux/x86, ALL TARGET MACHINE and OPERATING SYSTEM, if different from HOST: COMPILER NAME AND VERSION (AND PATCHLEVEL): Linux/x86, ALL THE $ACE_ROOT/ace/config.h FILE: N/A THE $ACE_ROOT/include/makeinclude/platform_macros.GNU FILE: N/A CONTENTS OF $ACE_ROOT/bin/MakeProjectCreator/config/default.features (used by MPC when you generate your own makefiles): N/A AREA/CLASS/EXAMPLE AFFECTED: No examples failed DOES THE PROBLEM AFFECT: COMPILATION? No LINKING? No On Unix systems, did you run make realclean first? Yes EXECUTION? Yes OTHER (please specify)? Corner case bug in file descriptor handling, see below. SYNOPSIS: File descriptor sets become corrupted within ACE. DESCRIPTION: Under certain combinations of adding and removing handlers to file descriptors, adding/removing enabling/disabling callbacks, ACE can fail to respond to a file descriptor -- activity on the file descriptor is ignored by ACE. REPEAT BY: To reproduce remove an intermediate file handle below the max. See code below for the obvious logical error. See https://groups.yahoo.com/neo/groups/ace-users/conversations/topics/39957 See code comments. SAMPLE FIX/WORKAROUND: See following patch: ------------patch------------- --- Select_Reactor_Base.cpp 2016-11-09 16:18:15.183904489 -0500 +++ Select_Reactor_Base.cpp-new 2016-11-09 16:19:23.918035481 -0500 @@ -334,20 +334,22 @@ if (!has_any_wait_mask && !has_any_suspend_mask) { #if defined (ACE_WIN32) if (event_handler != 0 && this->event_handlers_.unbind (pos) == -1) return -1; // Should not happen! #else this->event_handlers_[handle] = 0; if (this->max_handlep1_ == handle + 1) { + ACE_HANDLE save_max_handle = this->max_handlep1_; + // We've deleted the last entry, so we need to figure out // the last valid place in the array that is worth looking // at. ACE_HANDLE const wait_rd_max = this->select_reactor_.wait_set_.rd_mask_.max_set (); ACE_HANDLE const wait_wr_max = this->select_reactor_.wait_set_.wr_mask_.max_set (); ACE_HANDLE const wait_ex_max = this->select_reactor_.wait_set_.ex_mask_.max_set (); @@ -365,21 +367,44 @@ if (this->max_handlep1_ < wait_ex_max) this->max_handlep1_ = wait_ex_max; if (this->max_handlep1_ < suspend_rd_max) this->max_handlep1_ = suspend_rd_max; if (this->max_handlep1_ < suspend_wr_max) this->max_handlep1_ = suspend_wr_max; if (this->max_handlep1_ < suspend_ex_max) this->max_handlep1_ = suspend_ex_max; + ++this->max_handlep1_; + + +// This is fixed here by Paul Sheer ==============> +// +// At this point there may exist a non-NULL event_handlers_[x] where +// x >= this->max_handlep1_ . +// +// To fix this bug we simply scan for non-NULL handlers and +// increase this->max_handlep1_ as appropriate. + + if (save_max_handle > this->max_handlep1_) + { + ACE_HANDLE i; + for (i = save_max_handle - 1; i > this->max_handlep1_ - 1; i--) + { + if (this->event_handlers_[i] != NULL) + { + this->max_handlep1_ = i + 1; + break; + } + } + } } #endif /* ACE_WIN32 */ // The handle has been completely removed. complete_removal = true; } if (event_handler == 0) return -1; ------------end-patch--------------- -------------- next part -------------- --- Select_Reactor_Base.cpp 2016-11-09 16:18:15.183904489 -0500 +++ Select_Reactor_Base.cpp-new 2016-11-09 16:19:23.918035481 -0500 @@ -334,20 +334,22 @@ if (!has_any_wait_mask && !has_any_suspend_mask) { #if defined (ACE_WIN32) if (event_handler != 0 && this->event_handlers_.unbind (pos) == -1) return -1; // Should not happen! #else this->event_handlers_[handle] = 0; if (this->max_handlep1_ == handle + 1) { + ACE_HANDLE save_max_handle = this->max_handlep1_; + // We've deleted the last entry, so we need to figure out // the last valid place in the array that is worth looking // at. ACE_HANDLE const wait_rd_max = this->select_reactor_.wait_set_.rd_mask_.max_set (); ACE_HANDLE const wait_wr_max = this->select_reactor_.wait_set_.wr_mask_.max_set (); ACE_HANDLE const wait_ex_max = this->select_reactor_.wait_set_.ex_mask_.max_set (); @@ -365,21 +367,44 @@ if (this->max_handlep1_ < wait_ex_max) this->max_handlep1_ = wait_ex_max; if (this->max_handlep1_ < suspend_rd_max) this->max_handlep1_ = suspend_rd_max; if (this->max_handlep1_ < suspend_wr_max) this->max_handlep1_ = suspend_wr_max; if (this->max_handlep1_ < suspend_ex_max) this->max_handlep1_ = suspend_ex_max; + ++this->max_handlep1_; + + +// This is fixed here by Paul Sheer ==============> +// +// At this point there may exist a non-NULL event_handlers_[x] where +// x >= this->max_handlep1_ . +// +// To fix this bug we simply scan for non-NULL handlers and +// increase this->max_handlep1_ as appropriate. + + if (save_max_handle > this->max_handlep1_) + { + ACE_HANDLE i; + for (i = save_max_handle - 1; i > this->max_handlep1_ - 1; i--) + { + if (this->event_handlers_[i] != NULL) + { + this->max_handlep1_ = i + 1; + break; + } + } + } } #endif /* ACE_WIN32 */ // The handle has been completely removed. complete_removal = true; } if (event_handler == 0) return -1; From jwillemsen at remedy.nl Fri Jun 2 01:11:21 2017 From: jwillemsen at remedy.nl (Johnny Willemsen) Date: Fri, 2 Jun 2017 08:11:21 +0200 Subject: [ace-bugs] ACE Bug report: file descriptor handling in Select_Reactor_Base.cpp In-Reply-To: References: Message-ID: Hi, Thanks for using the PRF form. Please also provide an extension or new unit test under ACE_wrappers/tests that reproduces this issue. For the test extension and the patch you can open a new pull request at https://github.com/DOCGroup/ACE_TAO Best regards, Johnny Willemsen Remedy IT Postbus 81 | 6930 AB Westervoort | The Netherlands http://www.remedy.nl On 06/02/2017 01:48 AM, Paul Sheer wrote: > Dear ACE team, > > Please see the below bug and fix for ACE. We are using ACE in a > critical national-infrastructure-level software project and for each > new release of ACE we need to apply the below patch to avoid an > extremely obscure and rarely-occurring TOTAL LOCKUP of our > application. Therefore please take this bug seriously. I am unable to > discuss this bug by email, but I will offer my phone number to anyone > that wishes an explanation if it is not clear from the text below. I > have previously reported this, and my request was ignored. Here is a > bug report about this from ACE 5.4.2 from 2004: > https://groups.yahoo.com/neo/groups/ace-users/conversations/topics/39957 > > This fix below has been in our live software for some years now. > > > ACE VERSION: 6.4.0 > > HOST MACHINE and OPERATING SYSTEM: Linux/x86, ALL > > TARGET MACHINE and OPERATING SYSTEM, if different from HOST: > COMPILER NAME AND VERSION (AND PATCHLEVEL): Linux/x86, ALL > > THE $ACE_ROOT/ace/config.h FILE: N/A > > THE $ACE_ROOT/include/makeinclude/platform_macros.GNU FILE: N/A > > CONTENTS OF $ACE_ROOT/bin/MakeProjectCreator/config/default.features > (used by MPC when you generate your own makefiles): N/A > > AREA/CLASS/EXAMPLE AFFECTED: No examples failed > > DOES THE PROBLEM AFFECT: > COMPILATION? No > LINKING? No > On Unix systems, did you run make realclean first? Yes > EXECUTION? Yes > OTHER (please specify)? Corner case bug in file descriptor > handling, see below. > > SYNOPSIS: > > File descriptor sets become corrupted within ACE. > > > DESCRIPTION: > > Under certain combinations of adding and removing handlers to file > descriptors, adding/removing enabling/disabling callbacks, ACE can > fail to respond to a file descriptor -- activity on the file descriptor is > ignored by ACE. > > > REPEAT BY: > > To reproduce remove an intermediate file handle below the max. > See code below for the obvious logical error. > > See https://groups.yahoo.com/neo/groups/ace-users/conversations/topics/39957 > > See code comments. > > > > SAMPLE FIX/WORKAROUND: > > See following patch: > > ------------patch------------- > --- Select_Reactor_Base.cpp 2016-11-09 16:18:15.183904489 -0500 > +++ Select_Reactor_Base.cpp-new 2016-11-09 16:19:23.918035481 -0500 > @@ -334,20 +334,22 @@ > if (!has_any_wait_mask && !has_any_suspend_mask) > { > #if defined (ACE_WIN32) > if (event_handler != 0 && this->event_handlers_.unbind (pos) == -1) > return -1; // Should not happen! > #else > this->event_handlers_[handle] = 0; > > if (this->max_handlep1_ == handle + 1) > { > + ACE_HANDLE save_max_handle = this->max_handlep1_; > + > // We've deleted the last entry, so we need to figure out > // the last valid place in the array that is worth looking > // at. > ACE_HANDLE const wait_rd_max = > this->select_reactor_.wait_set_.rd_mask_.max_set (); > ACE_HANDLE const wait_wr_max = > this->select_reactor_.wait_set_.wr_mask_.max_set (); > ACE_HANDLE const wait_ex_max = > this->select_reactor_.wait_set_.ex_mask_.max_set (); > > @@ -365,21 +367,44 @@ > if (this->max_handlep1_ < wait_ex_max) > this->max_handlep1_ = wait_ex_max; > > if (this->max_handlep1_ < suspend_rd_max) > this->max_handlep1_ = suspend_rd_max; > if (this->max_handlep1_ < suspend_wr_max) > this->max_handlep1_ = suspend_wr_max; > if (this->max_handlep1_ < suspend_ex_max) > this->max_handlep1_ = suspend_ex_max; > > + > ++this->max_handlep1_; > + > + > +// This is fixed here by Paul Sheer ==============> > +// > +// At this point there may exist a non-NULL event_handlers_[x] where > +// x >= this->max_handlep1_ . > +// > +// To fix this bug we simply scan for non-NULL handlers and > +// increase this->max_handlep1_ as appropriate. > + > + if (save_max_handle > this->max_handlep1_) > + { > + ACE_HANDLE i; > + for (i = save_max_handle - 1; i > this->max_handlep1_ - 1; i--) > + { > + if (this->event_handlers_[i] != NULL) > + { > + this->max_handlep1_ = i + 1; > + break; > + } > + } > + } > } > > #endif /* ACE_WIN32 */ > > // The handle has been completely removed. > complete_removal = true; > } > > if (event_handler == 0) > return -1; > ------------end-patch--------------- > > > _______________________________________________ > ace-bugs mailing list > ace-bugs at list.isis.vanderbilt.edu > http://list.isis.vanderbilt.edu/cgi-bin/mailman/listinfo/ace-bugs -------------- next part -------------- An HTML attachment was scrubbed... URL: