Discussion:
[Dnsmasq-discuss] [PATCH] Delay DHCP replies for Raspberry Pi clients
Floris Bos
2017-03-29 12:48:48 UTC
Permalink
The PXE boot firmware implementation of the Raspberry Pi 3
has a bug causing it to fail if it receives replies
instantly.

As a workaround ensure there is a minimum delay of one
second if the client is a Pi.

On Linux it looks up the exact receive time of the UDP
packet with the SIOCGSTAMP ioctl to prevent multiple
delays if multiple packets come in around the same time,
or if there already was a delay caused by a ping check.

Signed-off-by: Floris Bos <***@je-eigen-domein.nl>
---
src/dhcp.c | 19 +++++++++++
src/dnsmasq.c | 102 +++++++++++++++++++++++++++++++++++-----------------------
src/dnsmasq.h | 1 +
3 files changed, 81 insertions(+), 41 deletions(-)

diff --git a/src/dhcp.c b/src/dhcp.c
index 08952c8..203d9d5 100644
--- a/src/dhcp.c
+++ b/src/dhcp.c
@@ -356,6 +356,25 @@ void dhcp_packet(time_t now, int pxe_fd)
#ifdef HAVE_SOCKADDR_SA_LEN
dest.sin_len = sizeof(struct sockaddr_in);
#endif
+
+ if (mess->htype == ARPHRD_ETHER && mess->hlen == ETHER_ADDR_LEN &&
+ mess->chaddr[0] == 0xB8 && mess->chaddr[1] == 0x27 && mess->chaddr[2] == 0xEB)
+ {
+ /* Raspberry Pi 3 boot firmware has a bug in which it fails if it receives DHCP
+ replies instantly. As a workaround ensure there is a delay of at least a second */
+
+ int starttime = now;
+#ifdef HAVE_LINUX_NETWORK
+ struct timeval tv;
+
+ /* Use timestamp of the UDP packet received as start time for delay */
+ if (ioctl(fd, SIOCGSTAMP, &tv) == 0)
+ {
+ starttime = tv.tv_sec;
+ }
+#endif
+ delay_dhcp(starttime, 1, -1, 0, 0);
+ }

if (pxe_fd)
{
diff --git a/src/dnsmasq.c b/src/dnsmasq.c
index d2cc7cc..6ae8296 100644
--- a/src/dnsmasq.c
+++ b/src/dnsmasq.c
@@ -1747,29 +1747,15 @@ int icmp_ping(struct in_addr addr)
{
/* Try and get an ICMP echo from a machine. */

- /* Note that whilst in the three second wait, we check for
- (and service) events on the DNS and TFTP sockets, (so doing that
- better not use any resources our caller has in use...)
- but we remain deaf to signals or further DHCP packets. */
-
- /* There can be a problem using dnsmasq_time() to end the loop, since
- it's not monotonic, and can go backwards if the system clock is
- tweaked, leading to the code getting stuck in this loop and
- ignoring DHCP requests. To fix this, we check to see if select returned
- as a result of a timeout rather than a socket becoming available. We
- only allow this to happen as many times as it takes to get to the wait time
- in quarter-second chunks. This provides a fallback way to end loop. */
-
- int fd, rc;
+ int fd;
struct sockaddr_in saddr;
struct {
struct ip ip;
struct icmp icmp;
} packet;
unsigned short id = rand16();
- unsigned int i, j, timeout_count;
+ unsigned int i, j;
int gotreply = 0;
- time_t start, now;

#if defined(HAVE_LINUX_NETWORK) || defined (HAVE_SOLARIS_NETWORK)
if ((fd = make_icmp_sock()) == -1)
@@ -1799,14 +1785,46 @@ int icmp_ping(struct in_addr addr)
while (retry_send(sendto(fd, (char *)&packet.icmp, sizeof(struct icmp), 0,
(struct sockaddr *)&saddr, sizeof(saddr))));

- for (now = start = dnsmasq_time(), timeout_count = 0;
- (difftime(now, start) < (float)PING_WAIT) && (timeout_count < PING_WAIT * 4);)
+ gotreply = delay_dhcp(dnsmasq_time(), PING_WAIT, fd, addr.s_addr, id);
+
+#if defined(HAVE_LINUX_NETWORK) || defined(HAVE_SOLARIS_NETWORK)
+ while (retry_send(close(fd)));
+#else
+ opt = 1;
+ setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &opt, sizeof(opt));
+#endif
+
+ return gotreply;
+}
+
+int delay_dhcp(time_t start, int sec, int fd, uint32_t addr, unsigned short id)
+{
+ /* Delay processing DHCP packets for "sec" seconds counting from "start".
+ If "fd" is not -1 it will stop waiting if an ICMP echo reply is received
+ from "addr" with ICMP ID "id" and return 1 */
+
+ /* Note that whilst waiting, we check for
+ (and service) events on the DNS and TFTP sockets, (so doing that
+ better not use any resources our caller has in use...)
+ but we remain deaf to signals or further DHCP packets. */
+
+ /* There can be a problem using dnsmasq_time() to end the loop, since
+ it's not monotonic, and can go backwards if the system clock is
+ tweaked, leading to the code getting stuck in this loop and
+ ignoring DHCP requests. To fix this, we check to see if select returned
+ as a result of a timeout rather than a socket becoming available. We
+ only allow this to happen as many times as it takes to get to the wait time
+ in quarter-second chunks. This provides a fallback way to end loop. */
+
+ int rc, timeout_count;
+ time_t now;
+
+ for (now = dnsmasq_time(), timeout_count = 0;
+ (difftime(now, start) <= (float)sec) && (timeout_count < sec * 4);)
{
- struct sockaddr_in faddr;
- socklen_t len = sizeof(faddr);
-
poll_reset();
- poll_listen(fd, POLLIN);
+ if (fd != -1)
+ poll_listen(fd, POLLIN);
set_dns_listeners(now);
set_log_writer();

@@ -1836,27 +1854,29 @@ int icmp_ping(struct in_addr addr)
check_tftp_listeners(now);
#endif

- if (poll_check(fd, POLLIN) &&
- recvfrom(fd, &packet, sizeof(packet), 0,
- (struct sockaddr *)&faddr, &len) == sizeof(packet) &&
- saddr.sin_addr.s_addr == faddr.sin_addr.s_addr &&
- packet.icmp.icmp_type == ICMP_ECHOREPLY &&
- packet.icmp.icmp_seq == 0 &&
- packet.icmp.icmp_id == id)
- {
- gotreply = 1;
- break;
- }
+ if (fd != -1)
+ {
+ struct {
+ struct ip ip;
+ struct icmp icmp;
+ } packet;
+ struct sockaddr_in faddr;
+ socklen_t len = sizeof(faddr);
+
+ if (poll_check(fd, POLLIN) &&
+ recvfrom(fd, &packet, sizeof(packet), 0,
+ (struct sockaddr *)&faddr, &len) == sizeof(packet) &&
+ addr == faddr.sin_addr.s_addr &&
+ packet.icmp.icmp_type == ICMP_ECHOREPLY &&
+ packet.icmp.icmp_seq == 0 &&
+ packet.icmp.icmp_id == id)
+ {
+ return 1;
+ }
+ }
}
-
-#if defined(HAVE_LINUX_NETWORK) || defined(HAVE_SOLARIS_NETWORK)
- while (retry_send(close(fd)));
-#else
- opt = 1;
- setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &opt, sizeof(opt));
-#endif

- return gotreply;
+ return 0;
}
#endif

diff --git a/src/dnsmasq.h b/src/dnsmasq.h
index 6b44e53..556a73a 100644
--- a/src/dnsmasq.h
+++ b/src/dnsmasq.h
@@ -1341,6 +1341,7 @@ unsigned char *extended_hwaddr(int hwtype, int hwlen, unsigned char *hwaddr,
#ifdef HAVE_DHCP
int make_icmp_sock(void);
int icmp_ping(struct in_addr addr);
+int delay_dhcp(time_t start, int sec, int fd, uint32_t addr, unsigned short id);
#endif
void queue_event(int event);
void send_alarm(time_t event, time_t now);
--
2.7.4
Floris Bos
2017-03-29 15:24:45 UTC
Permalink
Hi,
Le Wed, 29 Mar 2017 14:48:48 +0200
Post by Floris Bos
The PXE boot firmware implementation of the Raspberry Pi 3
has a bug causing it to fail if it receives replies
instantly.
As a workaround ensure there is a minimum delay of one
second if the client is a Pi.
On Linux it looks up the exact receive time of the UDP
packet with the SIOCGSTAMP ioctl to prevent multiple
delays if multiple packets come in around the same time,
or if there already was a delay caused by a ping check.
Just a side question: can't / won't the boot firmware be fixed?
There is a fix.
However that requires sticking a SD card with the newer boot firmware in
the Pi, and leaving it in permanently.

To be able to PXE boot without SD card, the firmware in the ROM of the
SoC has to be used, which is not reflashable, and -at least for the
devices currently out there- comes with this bug.



Yours sincerely,

Floris Bos
Albert ARIBAUD
2017-03-29 15:33:24 UTC
Permalink
Hi again,

Le Wed, 29 Mar 2017 17:24:45 +0200
Post by Floris Bos
Hi,
Le Wed, 29 Mar 2017 14:48:48 +0200
Post by Floris Bos
The PXE boot firmware implementation of the Raspberry Pi 3
has a bug causing it to fail if it receives replies
instantly.
As a workaround ensure there is a minimum delay of one
second if the client is a Pi.
On Linux it looks up the exact receive time of the UDP
packet with the SIOCGSTAMP ioctl to prevent multiple
delays if multiple packets come in around the same time,
or if there already was a delay caused by a ping check.
Just a side question: can't / won't the boot firmware be fixed?
There is a fix.
However that requires sticking a SD card with the newer boot firmware
in the Pi, and leaving it in permanently.
To be able to PXE boot without SD card, the firmware in the ROM of
the SoC has to be used, which is not reflashable, and -at least for
the devices currently out there- comes with this bug.
Oh, OK, so that's not an upgradable firmware, that's the ROM boot. Pity.

Thanks for the clarification!
Post by Floris Bos
Yours sincerely,
Floris Bos
Amicalement,
--
Albert.
Floris Bos
2017-03-29 17:17:41 UTC
Permalink
Post by Floris Bos
The PXE boot firmware implementation of the Raspberry Pi 3
has a bug causing it to fail if it receives replies
instantly.
As a workaround ensure there is a minimum delay of one
second if the client is a Pi.
On Linux it looks up the exact receive time of the UDP
packet with the SIOCGSTAMP ioctl to prevent multiple
delays if multiple packets come in around the same time,
or if there already was a delay caused by a ping check.
Why not have a configurable dhcp-delay setting instead of putting
device-specific quirks into the source code of dnsmasq forever?
That was my first proposal.
But there was opposition against introducing extra options.

http://lists.thekelleys.org.uk/pipermail/dnsmasq-discuss/2017q1/011336.html


Yours sincerely,

Floris Bos
Kurt H Maier
2017-03-29 17:49:41 UTC
Permalink
Post by Floris Bos
That was my first proposal.
But there was opposition against introducing extra options.
I'm not sure hardcoding an entire OUI is the right reaction to such
opposition. I'm also wary about hardcoding the duration.

As it sits this patch is both too specific and too general.

khm
Floris Bos
2017-03-29 18:48:30 UTC
Permalink
Post by Kurt H Maier
Post by Floris Bos
That was my first proposal.
But there was opposition against introducing extra options.
I'm not sure hardcoding an entire OUI is the right reaction to such
opposition.
So what do you propose that the reaction should be instead?
Post by Kurt H Maier
I'm also wary about hardcoding the duration.
Was configurable in the original patch...


Yours sincerely,

Floris Bos
Kurt H Maier
2017-03-29 19:31:19 UTC
Permalink
Post by Floris Bos
Post by Kurt H Maier
I'm not sure hardcoding an entire OUI is the right reaction to such
opposition.
So what do you propose that the reaction should be instead?
In your specific case, I would use netem [1] to introduce a delay to
incoming DHCP packets. I dislike the idea of deliberately introducing
performance penalties to upstream software, and I think fixes like this
needlessly complicate things. Broken hardware is a site-specific issue
and should be addressed in a site-specific manner.

khm

1 - https://wiki.linuxfoundation.org/networking/netem
Chris Novakovic
2017-03-29 17:43:00 UTC
Permalink
Post by Floris Bos
The PXE boot firmware implementation of the Raspberry Pi 3
has a bug causing it to fail if it receives replies
instantly.
As a workaround ensure there is a minimum delay of one
second if the client is a Pi.
On Linux it looks up the exact receive time of the UDP
packet with the SIOCGSTAMP ioctl to prevent multiple
delays if multiple packets come in around the same time,
or if there already was a delay caused by a ping check.
Why not have a configurable dhcp-delay setting instead of putting
device-specific quirks into the source code of dnsmasq forever?
+1 for a dhcp-delay setting, ideally per-MAC: the Ethernet adapters on
older RPi models (as well as the built-in wifi adapter on the RPi 3)
also use the b8:27:eb OUI, and this artificial delay oughtn't be applied
to them.
Dan Sneddon
2017-03-29 20:35:22 UTC
Permalink
Post by Chris Novakovic
Post by Floris Bos
The PXE boot firmware implementation of the Raspberry Pi 3
has a bug causing it to fail if it receives replies
instantly.
As a workaround ensure there is a minimum delay of one
second if the client is a Pi.
On Linux it looks up the exact receive time of the UDP
packet with the SIOCGSTAMP ioctl to prevent multiple
delays if multiple packets come in around the same time,
or if there already was a delay caused by a ping check.
Why not have a configurable dhcp-delay setting instead of putting
device-specific quirks into the source code of dnsmasq forever?
+1 for a dhcp-delay setting, ideally per-MAC: the Ethernet adapters on
older RPi models (as well as the built-in wifi adapter on the RPi 3)
also use the b8:27:eb OUI, and this artificial delay oughtn't be applied
to them.
_______________________________________________
Dnsmasq-discuss mailing list
http://lists.thekelleys.org.uk/mailman/listinfo/dnsmasq-discuss
Another +1 for adding a dhcp-delay setting on a per-MAC basis.

There is a side-benefit that this would enable a form of HA where you
have two servers, and one is set to have a built-in delay in its
responses. This would allow the primary server to (nearly) always
respond first if up, and the secondary server would respond after a
delay in case the first server was down.
--
Dan Sneddon | Senior Principal Software Engineer
***@redhat.com | redhat.com/openstack
dsneddon:irc | @dxs:twitter
Eric Luehrsen
2017-03-29 23:22:39 UTC
Permalink
Post by Dan Sneddon
Post by Chris Novakovic
Post by Floris Bos
The PXE boot firmware implementation of the Raspberry Pi 3
has a bug causing it to fail if it receives replies
instantly.
Why not have a configurable dhcp-delay setting instead of putting
device-specific quirks into the source code of dnsmasq forever?
+1 for a dhcp-delay setting, ideally per-MAC: the Ethernet adapters on
older RPi models (as well as the built-in wifi adapter on the RPi 3)
also use the b8:27:eb OUI, and this artificial delay oughtn't be applied
to them.
Another +1 for adding a dhcp-delay setting on a per-MAC basis.
PXE devices are limited. I guess in a way of thinking that is
intentionally so. A server side robustness action is a valid use case
consideration. I would suggest the "tag" option method. You can tag a
network if say a whole subnet was only to serve PXE security cameras.
You can tag a partial MAC (wildcard) to ID a manufacturer. Assign the
delay option to the tag. This is just like DHCP "need broadcast" and
other client-server quirks.

-Eric
Pali Rohár
2017-03-30 07:14:22 UTC
Permalink
Post by Eric Luehrsen
Post by Dan Sneddon
Post by Chris Novakovic
Post by Floris Bos
The PXE boot firmware implementation of the Raspberry Pi 3
has a bug causing it to fail if it receives replies
instantly.
Why not have a configurable dhcp-delay setting instead of putting
device-specific quirks into the source code of dnsmasq forever?
+1 for a dhcp-delay setting, ideally per-MAC: the Ethernet adapters on
older RPi models (as well as the built-in wifi adapter on the RPi 3)
also use the b8:27:eb OUI, and this artificial delay oughtn't be applied
to them.
Another +1 for adding a dhcp-delay setting on a per-MAC basis.
PXE devices are limited. I guess in a way of thinking that is
intentionally so. A server side robustness action is a valid use case
consideration. I would suggest the "tag" option method. You can tag a
network if say a whole subnet was only to serve PXE security cameras.
You can tag a partial MAC (wildcard) to ID a manufacturer. Assign the
delay option to the tag. This is just like DHCP "need broadcast" and
other client-server quirks.
+1 for tag method.
--
Pali Rohár
***@gmail.com
Floris Bos
2017-03-30 11:36:42 UTC
Permalink
Post by Chris Novakovic
+1 for a dhcp-delay setting, ideally per-MAC: the Ethernet adapters on
older RPi models (as well as the built-in wifi adapter on the RPi 3)
also use the b8:27:eb OUI, and this artificial delay oughtn't be applied
to them.
Note that if you are using dnsmasq as plain DHCP server to hand out IPs
instead of for PXE booting, you will likely already have a 3 second
delay in most cases while it performs a ping check to see if the IP it
intends to hand out is really free.
If there is already such existing delay, the patch will not delay things
further, so things are a little less dramatic than they may seem.

That said, will submit a patch that supports tags for consideration.


Yours sincerely,

Floris Bos

Loading...