config/odp-linux-dpdk.conf


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306

# ODP runtime configuration options
#
# This template configuration file (odp-linux-dpdk.conf) is hardcoded
# during configure/build phase and the values defined here are used if
# optional ODP_CONFIG_FILE is not set. This configuration file MUST
# include all configuration options.
#
# ODP_CONFIG_FILE can be used to override default values and it doesn't
# have to include all available options. The missing options are
# replaced with hardcoded default values.
#
# The options defined here are implementation specific and valid option
# values should be checked from the implementation code.
#
# See libconfig syntax: https://hyperrealm.github.io/libconfig/libconfig_manual.html#Configuration-Files

# Mandatory fields
odp_implementation = "linux-dpdk"
config_file_version = "0.1.21"

# System options
system: {
	# CPU frequency value returned by odp_cpu_hz() and odp_cpu_hz_id()
	# calls on platforms where frequency isn't available using standard
	# Linux methods.
	cpu_mhz = 0

	# CPU max frequency value returned by odp_cpu_hz_max() and
	# odp_cpu_hz_max_id() calls on platforms where max frequency isn't
	# available using standard Linux methods.
	cpu_mhz_max = 1400

	# When enabled (1), implementation reads the CPU frequency values from
	# OS only once during ODP initialization. Enabling this option removes
	# system calls from odp_cpu_hz() and odp_cpu_hz_id() implementations.
	#
	# NOTE: This option should only be used on systems where CPU frequency
	# scaling is disabled.
	cpu_hz_static = 0

	# Maximum number of ODP threads that can be created.
	# odp_thread_count_max() returns this value or the build time
	# maximum ODP_THREAD_COUNT_MAX, whichever is lower. This setting
	# can be used to reduce thread related resource usage.
	thread_count_max = 256
}

# DPDK options
dpdk: {
	# Amount of preallocated memory for process mode usage in megabytes
	#
	# NOTE: Process mode is not officially supported by DPDK. Application
	# should reserve all shared resources and configure the system before
	# forking child processes for the best success probability.
	process_mode_memory_mb = 768

	# PCI devices configuration.
	#
	# Specify them as lists of strings of format.
	# "[domain:]bus:devid.func[,devargs]", where devargs are
	# optional arguments in the "key1=value1,key2=value2,..." form.
	# Either whitelist or blacklist should be defined but not both.
	#
	# List of PCI devices that should be used.
	pci_whitelist = []

	# List of PCI devices that should not be used.
	pci_blacklist = []

	# EAL parameters string
	#
	# String is in EAL parameters format. Parameters given here will be used
	# as last arguments to rte_eal_init() after other DPDK options
	# ('process_mode_memory_mb' (-m), 'pci_whitelist' (-w), 'pci_blacklist'
	# (-b)) and ODP_PLATFORM_PARAMS environment variable contents.
	eal_params = ""
}

# Pool options
pool: {
	# Packet pool options
	pkt: {
		# Maximum number of packets per pool. Power of two minus one
		# results optimal memory usage (e.g. (256 * 1024) - 1).
		max_num = 262143
	}
}

# General pktio options
pktio: {
	# Not supported by ODP-DPDK!
	pktin_frame_offset = 0

	# Pool size allocated for potential completion events for transmitted and
	# dropped packets. Separate pool for different packet IO instances.
	tx_compl_pool_size = 1024
}

# DPDK pktio options
pktio_dpdk: {
	# Default options

	# Number of RX and TX descriptors. The values may be adjusted by the
	# implementation to satisfy PMD limits.
	num_rx_desc = 1024
	num_tx_desc = 1024

	rx_drop_en = 0

	# Enable receipt of Ethernet frames sent to any multicast group
	multicast_en = 1

	# Driver specific options (use PMD names from DPDK)
	net_ixgbe: {
		rx_drop_en = 1
	}
}

queue_basic: {
	# Maximum queue size. Power of two minus one results optimal memory
	# usage (e.g. (8 * 1024) - 1).
	max_queue_size = 8191

	# Default queue size. Power of two minus one results optimal memory
	# usage (e.g. (4 * 1024) - 1).
	default_queue_size = 4095
}

sched_basic: {
	# Priority level spread
	#
	# Each priority level is spread into multiple scheduler internal queues.
	# This value defines the number of those queues. Minimum value is 1.
	# Each thread prefers one of the queues over other queues. A higher
	# spread value typically improves parallelism and thus is better for
	# high thread counts, but causes uneven service level for low thread
	# counts. Typically, optimal value is the number of threads using
	# the scheduler.
	prio_spread = 4

	# Weight of the preferred scheduler internal queue
	#
	# Each thread prefers one of the internal queues over other queues.
	# This value controls how many times the preferred queue is polled
	# between a poll to another internal queue. Minimum value is 1. A higher
	# value typically improves parallelism as threads work mostly on their
	# preferred queues, but causes uneven service level for low thread
	# counts as non-preferred queues are served less often
	prio_spread_weight = 63

	# Dynamic load balance of scheduler internal queues
	#
	# When enabled (1), scheduler checks periodically internal queue load levels and
	# moves event queues from one spread to another in order to even out the loads.
	# Load level of an internal queue (group/prio/spread) is measures as number of
	# event queues allocated to it, divided by number of threads serving it.
	load_balance = 1

	# Burst size configuration per priority. The first array element
	# represents the highest queue priority. The scheduler tries to get
	# burst_size_default[prio] events from a queue and stashes those that
	# cannot be passed to the application immediately. More events than the
	# default burst size may be returned from application request, but no
	# more than burst_size_max[prio].
	#
	# Large burst sizes improve throughput, but decrease application
	# responsiveness to higher priority events due to head of line blocking
	# caused by a burst of lower priority events.
	burst_size_default = [ 32,  32,  32,  32,  32, 16,  8, 4]
	burst_size_max     = [255, 255, 255, 255, 255, 16, 16, 8]

	# Burst size configuration per priority for each scheduled queue type.
	# Overrides default values set in 'burst_size_default' and
	# 'burst_size_max' if != 0.
	burst_size_parallel     = [0, 0, 0, 0, 0, 0, 0, 0]
	burst_size_max_parallel = [0, 0, 0, 0, 0, 0, 0, 0]
	burst_size_atomic       = [0, 0, 0, 0, 0, 0, 0, 0]
	burst_size_max_atomic   = [0, 0, 0, 0, 0, 0, 0, 0]
	burst_size_ordered      = [0, 0, 0, 0, 0, 0, 0, 0]
	burst_size_max_ordered  = [0, 0, 0, 0, 0, 0, 0, 0]

	# Automatically updated schedule groups
	#
	# DEPRECATED: use odp_schedule_config() API instead
	#
	# API specification defines that ODP_SCHED_GROUP_ALL,
	# _WORKER and _CONTROL are updated automatically. These options can be
	# used to disable these group when not used. Set value to 0 to disable
	# a group. Performance may improve when unused groups are disabled.
	group_enable: {
		all     = 1
		worker  = 1
		control = 1
	}

	# Ordered queue reorder stash size
	#
	# Number of events each thread can stash internally before having to
	# wait for the right order context. Reorder stash can improve
	# performance if threads process events in bursts. If 'order_stash_size'
	# > 0, events may be dropped by the implementation if the target queue
	# is full. To prevent this set 'order_stash_size' to 0.
	order_stash_size = 512
}

sched_eventdev: {
	# Eventdev supports up to RTE_EVENT_MAX_QUEUES_PER_DEV queues and these
	# have to be mapped to ODP's scheduled queue types at startup. If the
	# combined number of queues is zero, eventdev queues are divined evenly
	# amongst the ODP queue types.
	num_atomic_queues = 0
	num_ordered_queues = 0
	num_parallel_queues = 0

	# Number of event ports (zero = all available). Each ODP worker
	# calling scheduler or doing queue enqueue requires a private event
	# port.
	num_ports = 0
}

stash: {
	# Maximum number of stashes
	max_num = 512

	# Maximum number of objects in a stash
	#
	# The value may be rounded up by the implementation. For optimal memory
	# usage set value to a power of two - 1.
	max_num_obj = 4095

	# Strict size
	#
	# When set to 0, application can attempt to store more handles into a
	# stash than it specified in the creation parameters.
	strict_size = 1
}

timer: {
	# Inline timer poll interval
	#
	# When set to 1 inline timers are polled during every schedule round.
	# Increasing the value reduces timer processing overhead while
	# decreasing accuracy.
	inline_poll_interval = 10

	# Inline timer poll interval in nanoseconds
	#
	# When inline_poll_interval is larger than 1, use this option to limit
	# inline timer polling rate in nanoseconds. By default, this defines the
	# maximum rate a thread may poll timers. If a timer pool is created with
	# a higher resolution than this, the polling rate is increased
	# accordingly.
	inline_poll_interval_nsec = 500000

	# Use experimental DPDK alternate timer API based implementation
	alternate = 0
}

ipsec: {
	# Packet ordering method for asynchronous IPsec processing
	#
	# Asynchronous IPsec processing maintains original packet order when
	# started within ordered or atomic scheduling context. In addition
	# to that, ODP API specifies that the order of IPsec processing
	# (i.e. anti-replay window update and sequence number generation)
	# is the same as the original packet order.
	#
	# The following settings control how the order is maintained in
	# asynchronous IPsec operations. They have no effect on synchronous
	# operations where the ODP application is responsible of the ordering.
	#
	# Values:
	#
	# 0: Ordering is not attempted.
	#
	#    This has the lowest overhead and the greatest parallelism but
	#    is not fully compliant with the API specification.
	#
	#    Lack of ordering means that outbound IPsec packets, although
	#    remaining in the correct order, may have their sequence numbers
	#    assigned out of order. This can cause unexpected packet loss if
	#    the anti-replay window of the receiving end is not large enough
	#    to cover the possible misordering.
	#
	#    Similarly, since anti-replay check is not done in the reception
	#    order, the anti-replay check sees additional packet misordering
	#    on top of the true misordering of the received packets. This
	#    means that a larger anti-replay window may be required to avoid
	#    packet loss.
	#
	# 1: Ordering by waiting
	#
	#    Correct processing order is maintained by a simple mechanism
	#    that makes a thread wait until its scheduling context has
	#    reached the head of its input queue.
	#
	#    This limits parallelism when single input queue is used, even
	#    when packets get distributed to multiple SAs.
	ordering: {
		  # Odering method for asynchronous inbound operations.
		  async_inbound = 0

		  # Odering method for asynchronous outbound operations.
		  async_outbound = 0
	}
}