Hi list,
We have update coded from the upstream session&tcp changes to our code base and find a possible bug which cause tcp connection can't be established anymore.
Our scenario is that we will connect to a remote tcp server with specified local port and local ip, however, new vpp code have introduced a lcl_endpts_freelist which will be either flushed when pending local endpoint exceeded the limit (32) or when transport_alloc_local_port is called.
However, since we specify the local port and local ip and the total session count is limited (< 32), in this case, the transport_cleanup_freelist will never be called which cause the previous session which use the specified local port and local ip will not be released after the session aborted.
I think we should also try to free the list in such case as I did in the following code:
toggle quoted message
Show quoted text
int transport_alloc_local_endpoint (u8 proto, transport_endpoint_cfg_t * rmt_cfg, ip46_address_t * lcl_addr, u16 * lcl_port) { // ZDY: transport_main_t *tm = &tp_main; transport_endpoint_t *rmt = (transport_endpoint_t *) rmt_cfg; session_error_t error; int port;
/* * Find the local address */ if (ip_is_zero (&rmt_cfg->peer.ip, rmt_cfg->peer.is_ip4)) { error = transport_find_local_ip_for_remote (&rmt_cfg->peer.sw_if_index, rmt, lcl_addr); if (error) return error; } else { /* Assume session layer vetted this address */ clib_memcpy_fast (lcl_addr, &rmt_cfg->peer.ip, sizeof (rmt_cfg->peer.ip)); }
/* * Allocate source port */ if (rmt_cfg->peer.port == 0) { port = transport_alloc_local_port (proto, lcl_addr, rmt_cfg); if (port < 1) return SESSION_E_NOPORT; *lcl_port = port; } else { port = clib_net_to_host_u16 (rmt_cfg->peer.port); *lcl_port = port;
// ZDY: need add this to to cleanup because in specified src port // case, we will not run to transport_alloc_local_port, then // freelist will only be freeed when list is full (>32). /* Cleanup freelist if need be */ if (vec_len (tm->lcl_endpts_freelist)) transport_cleanup_freelist ();
return transport_endpoint_mark_used (proto, lcl_addr, port); }
return 0; }
|
|
Hi,
Could you try this out [1]? I’ve hit this issue myself today but with udp sessions. Unfortunately, as you’ve correctly pointed out, we were forcing a cleanup only on the non-fixed local port branch.
Regards, Florin
[1] https://gerrit.fd.io/r/c/vpp/+/38473
toggle quoted message
Show quoted text
On Mar 13, 2023, at 7:35 PM, Zhang Dongya <fortitude.zhang@...> wrote:
Hi list,
We have update coded from the upstream session&tcp changes to our code base and find a possible bug which cause tcp connection can't be established anymore.
Our scenario is that we will connect to a remote tcp server with specified local port and local ip, however, new vpp code have introduced a lcl_endpts_freelist which will be either flushed when pending local endpoint exceeded the limit (32) or when transport_alloc_local_port is called.
However, since we specify the local port and local ip and the total session count is limited (< 32), in this case, the transport_cleanup_freelist will never be called which cause the previous session which use the specified local port and local ip will not be released after the session aborted.
I think we should also try to free the list in such case as I did in the following code:
int transport_alloc_local_endpoint (u8 proto, transport_endpoint_cfg_t * rmt_cfg, ip46_address_t * lcl_addr, u16 * lcl_port) { // ZDY: transport_main_t *tm = &tp_main; transport_endpoint_t *rmt = (transport_endpoint_t *) rmt_cfg; session_error_t error; int port;
/* * Find the local address */ if (ip_is_zero (&rmt_cfg->peer.ip, rmt_cfg->peer.is_ip4)) { error = transport_find_local_ip_for_remote (&rmt_cfg->peer.sw_if_index, rmt, lcl_addr); if (error) return error; } else { /* Assume session layer vetted this address */ clib_memcpy_fast (lcl_addr, &rmt_cfg->peer.ip, sizeof (rmt_cfg->peer.ip)); }
/* * Allocate source port */ if (rmt_cfg->peer.port == 0) { port = transport_alloc_local_port (proto, lcl_addr, rmt_cfg); if (port < 1) return SESSION_E_NOPORT; *lcl_port = port; } else { port = clib_net_to_host_u16 (rmt_cfg->peer.port); *lcl_port = port;
// ZDY: need add this to to cleanup because in specified src port // case, we will not run to transport_alloc_local_port, then // freelist will only be freeed when list is full (>32). /* Cleanup freelist if need be */ if (vec_len (tm->lcl_endpts_freelist)) transport_cleanup_freelist ();
return transport_endpoint_mark_used (proto, lcl_addr, port); }
return 0; }
|
|
Just use this patch and the connection can be reconnected after closed.
However, I find another possible bug when using local ip + local port for different target server due to transport_endpoint_mark_used return error if it find local ip + port being created.
I think it should increase the refcnt instead if it find 6 tuple is unique.
static int transport_endpoint_mark_used (u8 proto, ip46_address_t *ip, u16 port) { transport_main_t *tm = &tp_main; local_endpoint_t *lep; u32 tei;
ASSERT (vlib_get_thread_index () <= transport_cl_thread ());
// BUG??? maybe should allow reuse ???
tei = transport_endpoint_lookup (&tm->local_endpoints_table, proto, ip, port); if (tei != ENDPOINT_INVALID_INDEX) return SESSION_E_PORTINUSE;
/* Pool reallocs with worker barrier */ lep = transport_endpoint_alloc (); clib_memcpy_fast (&lep->ep.ip, ip, sizeof (*ip)); lep->ep.port = port; lep->proto = proto; lep->refcnt = 1;
transport_endpoint_table_add (&tm->local_endpoints_table, proto, &lep->ep, lep - tm->local_endpoints);
return 0; }
Hi,
Could you try this out [1]? I’ve hit this issue myself today but with udp sessions. Unfortunately, as you’ve correctly pointed out, we were forcing a cleanup only on the non-fixed local port branch.
Regards, Florin
[1] https://gerrit.fd.io/r/c/vpp/+/38473
Hi list,
We have update coded from the upstream session&tcp changes to our code base and find a possible bug which cause tcp connection can't be established anymore.
Our scenario is that we will connect to a remote tcp server with specified local port and local ip, however, new vpp code have introduced a lcl_endpts_freelist which will be either flushed when pending local endpoint exceeded the limit (32) or when transport_alloc_local_port is called.
However, since we specify the local port and local ip and the total session count is limited (< 32), in this case, the transport_cleanup_freelist will never be called which cause the previous session which use the specified local port and local ip will not be released after the session aborted.
I think we should also try to free the list in such case as I did in the following code:
int transport_alloc_local_endpoint (u8 proto, transport_endpoint_cfg_t * rmt_cfg, ip46_address_t * lcl_addr, u16 * lcl_port) { // ZDY: transport_main_t *tm = &tp_main; transport_endpoint_t *rmt = (transport_endpoint_t *) rmt_cfg; session_error_t error; int port;
/* * Find the local address */ if (ip_is_zero (&rmt_cfg->peer.ip, rmt_cfg->peer.is_ip4)) { error = transport_find_local_ip_for_remote (&rmt_cfg->peer.sw_if_index, rmt, lcl_addr); if (error) return error; } else { /* Assume session layer vetted this address */ clib_memcpy_fast (lcl_addr, &rmt_cfg->peer.ip, sizeof (rmt_cfg->peer.ip)); }
/* * Allocate source port */ if (rmt_cfg->peer.port == 0) { port = transport_alloc_local_port (proto, lcl_addr, rmt_cfg); if (port < 1) return SESSION_E_NOPORT; *lcl_port = port; } else { port = clib_net_to_host_u16 (rmt_cfg->peer.port); *lcl_port = port;
// ZDY: need add this to to cleanup because in specified src port // case, we will not run to transport_alloc_local_port, then // freelist will only be freeed when list is full (>32). /* Cleanup freelist if need be */ if (vec_len (tm->lcl_endpts_freelist)) transport_cleanup_freelist ();
return transport_endpoint_mark_used (proto, lcl_addr, port); }
return 0; }
|
|
Hi,
Are you looking for behavior similar to the one when random local ports are allocated when, if port is used, we check if the 5-tuple is available?
Don’t think we explicitly supported this before but here’s a patch [1].
Regards, Florin
[1] https://gerrit.fd.io/r/c/vpp/+/38486
toggle quoted message
Show quoted text
On Mar 14, 2023, at 12:56 AM, Zhang Dongya <fortitude.zhang@...> wrote:
Just use this patch and the connection can be reconnected after closed.
However, I find another possible bug when using local ip + local port for different target server due to transport_endpoint_mark_used return error if it find local ip + port being created.
I think it should increase the refcnt instead if it find 6 tuple is unique.
static int transport_endpoint_mark_used (u8 proto, ip46_address_t *ip, u16 port) { transport_main_t *tm = &tp_main; local_endpoint_t *lep; u32 tei;
ASSERT (vlib_get_thread_index () <= transport_cl_thread ());
// BUG??? maybe should allow reuse ???
tei = transport_endpoint_lookup (&tm->local_endpoints_table, proto, ip, port); if (tei != ENDPOINT_INVALID_INDEX) return SESSION_E_PORTINUSE;
/* Pool reallocs with worker barrier */ lep = transport_endpoint_alloc (); clib_memcpy_fast (&lep->ep.ip, ip, sizeof (*ip)); lep->ep.port = port; lep->proto = proto; lep->refcnt = 1;
transport_endpoint_table_add (&tm->local_endpoints_table, proto, &lep->ep, lep - tm->local_endpoints);
return 0; }
Hi,
Could you try this out [1]? I’ve hit this issue myself today but with udp sessions. Unfortunately, as you’ve correctly pointed out, we were forcing a cleanup only on the non-fixed local port branch.
Regards, Florin
[1] https://gerrit.fd.io/r/c/vpp/+/38473
Hi list,
We have update coded from the upstream session&tcp changes to our code base and find a possible bug which cause tcp connection can't be established anymore.
Our scenario is that we will connect to a remote tcp server with specified local port and local ip, however, new vpp code have introduced a lcl_endpts_freelist which will be either flushed when pending local endpoint exceeded the limit (32) or when transport_alloc_local_port is called.
However, since we specify the local port and local ip and the total session count is limited (< 32), in this case, the transport_cleanup_freelist will never be called which cause the previous session which use the specified local port and local ip will not be released after the session aborted.
I think we should also try to free the list in such case as I did in the following code:
int transport_alloc_local_endpoint (u8 proto, transport_endpoint_cfg_t * rmt_cfg, ip46_address_t * lcl_addr, u16 * lcl_port) { // ZDY: transport_main_t *tm = &tp_main; transport_endpoint_t *rmt = (transport_endpoint_t *) rmt_cfg; session_error_t error; int port;
/* * Find the local address */ if (ip_is_zero (&rmt_cfg->peer.ip, rmt_cfg->peer.is_ip4)) { error = transport_find_local_ip_for_remote (&rmt_cfg->peer.sw_if_index, rmt, lcl_addr); if (error) return error; } else { /* Assume session layer vetted this address */ clib_memcpy_fast (lcl_addr, &rmt_cfg->peer.ip, sizeof (rmt_cfg->peer.ip)); }
/* * Allocate source port */ if (rmt_cfg->peer.port == 0) { port = transport_alloc_local_port (proto, lcl_addr, rmt_cfg); if (port < 1) return SESSION_E_NOPORT; *lcl_port = port; } else { port = clib_net_to_host_u16 (rmt_cfg->peer.port); *lcl_port = port;
// ZDY: need add this to to cleanup because in specified src port // case, we will not run to transport_alloc_local_port, then // freelist will only be freeed when list is full (>32). /* Cleanup freelist if need be */ if (vec_len (tm->lcl_endpts_freelist)) transport_cleanup_freelist ();
return transport_endpoint_mark_used (proto, lcl_addr, port); }
return 0; }
|
|
yes, this is exactly what I want do, this patch works as expected, thanks a lot.
toggle quoted message
Show quoted text
Hi,
Are you looking for behavior similar to the one when random local ports are allocated when, if port is used, we check if the 5-tuple is available?
Don’t think we explicitly supported this before but here’s a patch [1].
Regards, Florin
Just use this patch and the connection can be reconnected after closed.
However, I find another possible bug when using local ip + local port for different target server due to transport_endpoint_mark_used return error if it find local ip + port being created.
I think it should increase the refcnt instead if it find 6 tuple is unique.
static int transport_endpoint_mark_used (u8 proto, ip46_address_t *ip, u16 port) { transport_main_t *tm = &tp_main; local_endpoint_t *lep; u32 tei;
ASSERT (vlib_get_thread_index () <= transport_cl_thread ());
// BUG??? maybe should allow reuse ???
tei = transport_endpoint_lookup (&tm->local_endpoints_table, proto, ip, port); if (tei != ENDPOINT_INVALID_INDEX) return SESSION_E_PORTINUSE;
/* Pool reallocs with worker barrier */ lep = transport_endpoint_alloc (); clib_memcpy_fast (&lep->ep.ip, ip, sizeof (*ip)); lep->ep.port = port; lep->proto = proto; lep->refcnt = 1;
transport_endpoint_table_add (&tm->local_endpoints_table, proto, &lep->ep, lep - tm->local_endpoints);
return 0; }
Hi,
Could you try this out [1]? I’ve hit this issue myself today but with udp sessions. Unfortunately, as you’ve correctly pointed out, we were forcing a cleanup only on the non-fixed local port branch.
Regards, Florin
[1] https://gerrit.fd.io/r/c/vpp/+/38473
Hi list,
We have update coded from the upstream session&tcp changes to our code base and find a possible bug which cause tcp connection can't be established anymore.
Our scenario is that we will connect to a remote tcp server with specified local port and local ip, however, new vpp code have introduced a lcl_endpts_freelist which will be either flushed when pending local endpoint exceeded the limit (32) or when transport_alloc_local_port is called.
However, since we specify the local port and local ip and the total session count is limited (< 32), in this case, the transport_cleanup_freelist will never be called which cause the previous session which use the specified local port and local ip will not be released after the session aborted.
I think we should also try to free the list in such case as I did in the following code:
int transport_alloc_local_endpoint (u8 proto, transport_endpoint_cfg_t * rmt_cfg, ip46_address_t * lcl_addr, u16 * lcl_port) { // ZDY: transport_main_t *tm = &tp_main; transport_endpoint_t *rmt = (transport_endpoint_t *) rmt_cfg; session_error_t error; int port;
/* * Find the local address */ if (ip_is_zero (&rmt_cfg->peer.ip, rmt_cfg->peer.is_ip4)) { error = transport_find_local_ip_for_remote (&rmt_cfg->peer.sw_if_index, rmt, lcl_addr); if (error) return error; } else { /* Assume session layer vetted this address */ clib_memcpy_fast (lcl_addr, &rmt_cfg->peer.ip, sizeof (rmt_cfg->peer.ip)); }
/* * Allocate source port */ if (rmt_cfg->peer.port == 0) { port = transport_alloc_local_port (proto, lcl_addr, rmt_cfg); if (port < 1) return SESSION_E_NOPORT; *lcl_port = port; } else { port = clib_net_to_host_u16 (rmt_cfg->peer.port); *lcl_port = port;
// ZDY: need add this to to cleanup because in specified src port // case, we will not run to transport_alloc_local_port, then // freelist will only be freeed when list is full (>32). /* Cleanup freelist if need be */ if (vec_len (tm->lcl_endpts_freelist)) transport_cleanup_freelist ();
return transport_endpoint_mark_used (proto, lcl_addr, port); }
return 0; }
|
|
Great! Thanks for confirming!
Regards,
toggle quoted message
Show quoted text
On Mar 16, 2023, at 8:29 PM, Zhang Dongya <fortitude.zhang@...> wrote:
yes, this is exactly what I want do, this patch works as expected, thanks a lot.
Hi,
Are you looking for behavior similar to the one when random local ports are allocated when, if port is used, we check if the 5-tuple is available?
Don’t think we explicitly supported this before but here’s a patch [1].
Regards, Florin
Just use this patch and the connection can be reconnected after closed.
However, I find another possible bug when using local ip + local port for different target server due to transport_endpoint_mark_used return error if it find local ip + port being created.
I think it should increase the refcnt instead if it find 6 tuple is unique.
static int transport_endpoint_mark_used (u8 proto, ip46_address_t *ip, u16 port) { transport_main_t *tm = &tp_main; local_endpoint_t *lep; u32 tei;
ASSERT (vlib_get_thread_index () <= transport_cl_thread ());
// BUG??? maybe should allow reuse ???
tei = transport_endpoint_lookup (&tm->local_endpoints_table, proto, ip, port); if (tei != ENDPOINT_INVALID_INDEX) return SESSION_E_PORTINUSE;
/* Pool reallocs with worker barrier */ lep = transport_endpoint_alloc (); clib_memcpy_fast (&lep->ep.ip, ip, sizeof (*ip)); lep->ep.port = port; lep->proto = proto; lep->refcnt = 1;
transport_endpoint_table_add (&tm->local_endpoints_table, proto, &lep->ep, lep - tm->local_endpoints);
return 0; }
Hi,
Could you try this out [1]? I’ve hit this issue myself today but with udp sessions. Unfortunately, as you’ve correctly pointed out, we were forcing a cleanup only on the non-fixed local port branch.
Regards, Florin
[1] https://gerrit.fd.io/r/c/vpp/+/38473
Hi list,
We have update coded from the upstream session&tcp changes to our code base and find a possible bug which cause tcp connection can't be established anymore.
Our scenario is that we will connect to a remote tcp server with specified local port and local ip, however, new vpp code have introduced a lcl_endpts_freelist which will be either flushed when pending local endpoint exceeded the limit (32) or when transport_alloc_local_port is called.
However, since we specify the local port and local ip and the total session count is limited (< 32), in this case, the transport_cleanup_freelist will never be called which cause the previous session which use the specified local port and local ip will not be released after the session aborted.
I think we should also try to free the list in such case as I did in the following code:
int transport_alloc_local_endpoint (u8 proto, transport_endpoint_cfg_t * rmt_cfg, ip46_address_t * lcl_addr, u16 * lcl_port) { // ZDY: transport_main_t *tm = &tp_main; transport_endpoint_t *rmt = (transport_endpoint_t *) rmt_cfg; session_error_t error; int port;
/* * Find the local address */ if (ip_is_zero (&rmt_cfg->peer.ip, rmt_cfg->peer.is_ip4)) { error = transport_find_local_ip_for_remote (&rmt_cfg->peer.sw_if_index, rmt, lcl_addr); if (error) return error; } else { /* Assume session layer vetted this address */ clib_memcpy_fast (lcl_addr, &rmt_cfg->peer.ip, sizeof (rmt_cfg->peer.ip)); }
/* * Allocate source port */ if (rmt_cfg->peer.port == 0) { port = transport_alloc_local_port (proto, lcl_addr, rmt_cfg); if (port < 1) return SESSION_E_NOPORT; *lcl_port = port; } else { port = clib_net_to_host_u16 (rmt_cfg->peer.port); *lcl_port = port;
// ZDY: need add this to to cleanup because in specified src port // case, we will not run to transport_alloc_local_port, then // freelist will only be freeed when list is full (>32). /* Cleanup freelist if need be */ if (vec_len (tm->lcl_endpts_freelist)) transport_cleanup_freelist ();
return transport_endpoint_mark_used (proto, lcl_addr, port); }
return 0; }
|
|