aboutsummaryrefslogtreecommitdiff
path: root/exec/vector/src/main/java/org/apache/drill/exec/vector/ValueVector.java
blob: 26598101c4afddf9983cc9a60f8a7608cea3c0b9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.drill.exec.vector;

import java.io.Closeable;
import java.util.Set;

import io.netty.buffer.DrillBuf;

import org.apache.drill.exec.exception.OutOfMemoryException;
import org.apache.drill.exec.memory.AllocationManager.BufferLedger;
import org.apache.drill.exec.memory.AllocationManager;
import org.apache.drill.exec.memory.BufferAllocator;
import org.apache.drill.exec.proto.UserBitShared.SerializedField;
import org.apache.drill.exec.record.MaterializedField;
import org.apache.drill.exec.record.TransferPair;
import org.apache.drill.exec.vector.complex.reader.FieldReader;

/**
 * An abstraction that is used to store a sequence of values in an individual
 * column.
 *
 * A {@link ValueVector value vector} stores underlying data in-memory in a
 * columnar fashion that is compact and efficient. The column whose data is
 * stored, is referred by {@link #getField()}.
 *
 * A vector when instantiated, relies on a
 * {@link org.apache.drill.exec.record.DeadBuf dead buffer}. It is important
 * that vector is allocated before attempting to read or write.
 *
 * There are a few "rules" around vectors:
 *
 * <ul>
 *   <li>Values need to be written in order (e.g. index 0, 1, 2, 5).</li>
 *   <li>Null vectors start with all values as null before writing anything.</li>
 *   <li>For variable width types, the offset vector should be all zeros before
 *   writing.</li>
 *   <li>You must call setValueCount before a vector can be read.</li>
 *   <li>You should never write to a vector once it has been read.</li>
 *   <li>Vectors may not grow larger than the number of bytes specified in
 *   {@link #MAX_BUFFER_SIZE} to prevent memory fragmentation. Use the
 *   <tt>setBounded()</tt> methods in the mutator to enforce this rule.</li>
 * </ul>
 *
 * Please note that the current implementation doesn't enforce those rules,
 * hence we may find few places that deviate from these rules (e.g. offset
 * vectors in Variable Length and Repeated vector)
 *
 * This interface "should" strive to guarantee this order of operation:
 * <blockquote>
 * allocate > mutate > setvaluecount > access > clear (or allocate
 * to start the process over).
 * </blockquote>
 */

public interface ValueVector extends Closeable, Iterable<ValueVector> {

  /**
   * Maximum allowed size of the buffer backing a value vector.
   * Set to the Netty chunk size to prevent memory fragmentation.
   */

  int MAX_BUFFER_SIZE = AllocationManager.chunkSize();

  /**
   * Maximum allowed row count in a vector. Repeated vectors
   * may have more items, but can have no more than this number
   * or arrays. Limited by 2-byte length in SV2: 65536 = 2<sup>16</sup>.
   */

  int MAX_ROW_COUNT = Character.MAX_VALUE + 1;
  int MIN_ROW_COUNT = 1;

  // Commonly-used internal vector names

  String BITS_VECTOR_NAME = "$bits$";
  String OFFSETS_VECTOR_NAME = "$offsets$";

  @Deprecated
  // See DRILL-6216
  String VALUES_VECTOR_NAME = "$values$";

  /**
   * Allocate new buffers. ValueVector implements logic to determine how much to allocate.
   * @throws OutOfMemoryException Thrown if no memory can be allocated.
   */
  void allocateNew() throws OutOfMemoryException;

  /**
   * Allocates new buffers. ValueVector implements logic to determine how much to allocate.
   * @return Returns true if allocation was successful.
   */
  boolean allocateNewSafe();

  BufferAllocator getAllocator();

  /**
   * Set the initial record capacity
   * @param numRecords
   */
  void setInitialCapacity(int numRecords);

  /**
   * Returns the maximum number of values that can be stored in this vector instance.
   */
  int getValueCapacity();

  /**
   * Alternative to clear(). Allows use as an AutoCloseable in try-with-resources.
   */
  @Override
  void close();

  /**
   * Release the underlying DrillBuf and reset the ValueVector to empty.
   */
  void clear();

  /**
   * Get information about how this field is materialized.
   */
  MaterializedField getField();

  /**
   * Returns a {@link org.apache.drill.exec.record.TransferPair transfer pair}, creating a new target vector of
   * the same type.
   */
  TransferPair getTransferPair(BufferAllocator allocator);

  TransferPair getTransferPair(String ref, BufferAllocator allocator);

  /**
   * Returns a new {@link org.apache.drill.exec.record.TransferPair transfer pair} that is used to transfer underlying
   * buffers into the target vector.
   */
  TransferPair makeTransferPair(ValueVector target);

  /**
   * Returns an {@link org.apache.drill.exec.vector.ValueVector.Accessor accessor} that is used to read from this vector
   * instance.
   */
  Accessor getAccessor();

  /**
   * Returns an {@link org.apache.drill.exec.vector.ValueVector.Mutator mutator} that is used to write to this vector
   * instance.
   */
  Mutator getMutator();

  /**
   * Returns a {@link org.apache.drill.exec.vector.complex.reader.FieldReader field reader} that supports reading values
   * from this vector.
   */
  FieldReader getReader();

  /**
   * Get the metadata for this field. Used in serialization
   *
   * @return FieldMetadata for this field.
   */
  SerializedField getMetadata();

  /**
   * Returns the number of bytes that is used by this vector instance.
   * This is a bit of a misnomer. Returns the number of bytes used by
   * data in this instance.
   */
  int getBufferSize();

  /**
   * Returns the total size of buffers allocated by this vector. Has
   * meaning only when vectors are directly allocated and each vector
   * has its own buffer. Does not have meaning for vectors deserialized
   * from the network or disk in which multiple vectors share the
   * same vector.
   *
   * @return allocated buffer size, in bytes
   */

  int getAllocatedSize();

  /**
   * Returns the number of bytes that is used by this vector if it holds the given number
   * of values. The result will be the same as if Mutator.setValueCount() were called, followed
   * by calling getBufferSize(), but without any of the closing side-effects that setValueCount()
   * implies wrt finishing off the population of a vector. Some operations might wish to use
   * this to determine how much memory has been used by a vector so far, even though it is
   * not finished being populated.
   *
   * @param valueCount the number of values to assume this vector contains
   * @return the buffer size if this vector is holding valueCount values
   */
  int getBufferSizeFor(int valueCount);

  /**
   * Return the underlying buffers associated with this vector. Note that this doesn't impact the reference counts for
   * this buffer so it only should be used for in-context access. Also note that this buffer changes regularly thus
   * external classes shouldn't hold a reference to it (unless they change it).
   * @param clear Whether to clear vector before returning; the buffers will still be refcounted;
   *   but the returned array will be the only reference to them
   *
   * @return The underlying {@link io.netty.buffer.DrillBuf buffers} that is used by this vector instance.
   */
  DrillBuf[] getBuffers(boolean clear);

  /**
   * Load the data provided in the buffer. Typically used when deserializing from the wire.
   *
   * @param metadata
   *          Metadata used to decode the incoming buffer.
   * @param buffer
   *          The buffer that contains the ValueVector.
   */
  void load(SerializedField metadata, DrillBuf buffer);

  void copyEntry(int toIndex, ValueVector from, int fromIndex);

  /**
   * Add the ledgers underlying the buffers underlying the components of the
   * vector to the set provided. Used to determine actual memory allocation.
   *
   * @param ledgers set of ledgers to which to add ledgers for this vector
   */

  void collectLedgers(Set<BufferLedger> ledgers);

  /**
   * Return the number of value bytes consumed by actual data.
   */

  int getPayloadByteCount(int valueCount);

  /**
   * Exchange state with another value vector of the same type.
   * Used to implement look-ahead writers.
   */

  void exchange(ValueVector other);

  /**
   * Convert a non-nullable vector to nullable by shuffling the data from
   * one to the other. Avoids the need to generate copy code just to change
   * mode. If this vector is non-nullable, accepts a nullable dual (same
   * minor type, different mode.) If the vector is non-nullable, or non-scalar,
   * then throws an exception.
   *
   * @param nullableVector nullable vector of the same minor type as
   * this vector
   */

  void toNullable(ValueVector nullableVector);

  /**
   * An abstraction that is used to read from this vector instance.
   */
  interface Accessor {
    /**
     * Get the Java Object representation of the element at the specified position. Useful for testing.
     *
     * @param index
     *          Index of the value to get
     */
    Object getObject(int index);

    /**
     * Returns the number of values that is stored in this vector.
     */
    int getValueCount();

    /**
     * Returns true if the value at the given index is null, false otherwise.
     */
    boolean isNull(int index);
  }

  /**
   * An abstraction that is used to write into this vector instance.
   */
  interface Mutator {
    /**
     * Sets the number of values that is stored in this vector to the given value count. <b>WARNING!</b> Once the
     * valueCount is set, the vector should be considered immutable.
     *
     * @param valueCount  value count to set.
     */
    void setValueCount(int valueCount);

    /**
     * Resets the mutator to pristine state.
     */
    void reset();

    /**
     * @deprecated  this has nothing to do with value vector abstraction and should be removed.
     */
    @Deprecated
    void generateTestData(int values);

    /**
     * Exchanges state with the mutator of another mutator. Used when exchanging
     * state with another vector.
     */

    void exchange(Mutator other);
  }
}