Halide 13.0.2
Halide compiler and libraries
HalideBuffer.h
Go to the documentation of this file.
1/** \file
2 * Defines a Buffer type that wraps from halide_buffer_t and adds
3 * functionality, and methods for more conveniently iterating over the
4 * samples in a halide_buffer_t outside of Halide code. */
5
6#ifndef HALIDE_RUNTIME_BUFFER_H
7#define HALIDE_RUNTIME_BUFFER_H
8
9#include <algorithm>
10#include <atomic>
11#include <cassert>
12#include <cstdint>
13#include <cstring>
14#include <limits>
15#include <memory>
16#include <vector>
17
18#if defined(__has_feature)
19#if __has_feature(memory_sanitizer)
20#include <sanitizer/msan_interface.h>
21#endif
22#endif
23
24#include "HalideRuntime.h"
25
26#ifdef _MSC_VER
27#include <malloc.h>
28#define HALIDE_ALLOCA _alloca
29#else
30#define HALIDE_ALLOCA __builtin_alloca
31#endif
32
33// gcc 5.1 has a false positive warning on this code
34#if __GNUC__ == 5 && __GNUC_MINOR__ == 1
35#pragma GCC diagnostic ignored "-Warray-bounds"
36#endif
37
38namespace Halide {
39namespace Runtime {
40
41// Forward-declare our Buffer class
42template<typename T, int D>
43class Buffer;
44
45// A helper to check if a parameter pack is entirely implicitly
46// int-convertible to use with std::enable_if
47template<typename... Args>
48struct AllInts : std::false_type {};
49
50template<>
51struct AllInts<> : std::true_type {};
52
53template<typename T, typename... Args>
54struct AllInts<T, Args...> {
55 static const bool value = std::is_convertible<T, int>::value && AllInts<Args...>::value;
56};
57
58// Floats and doubles are technically implicitly int-convertible, but
59// doing so produces a warning we treat as an error, so just disallow
60// it here.
61template<typename... Args>
62struct AllInts<float, Args...> : std::false_type {};
63
64template<typename... Args>
65struct AllInts<double, Args...> : std::false_type {};
66
67// A helper to detect if there are any zeros in a container
68namespace Internal {
69template<typename Container>
70bool any_zero(const Container &c) {
71 for (int i : c) {
72 if (i == 0) {
73 return true;
74 }
75 }
76 return false;
77}
78} // namespace Internal
79
80/** A struct acting as a header for allocations owned by the Buffer
81 * class itself. */
83 void (*deallocate_fn)(void *);
84 std::atomic<int> ref_count;
85
86 // Note that ref_count always starts at 1
89 }
90};
91
92/** This indicates how to deallocate the device for a Halide::Runtime::Buffer. */
93enum struct BufferDeviceOwnership : int {
94 Allocated, ///> halide_device_free will be called when device ref count goes to zero
95 WrappedNative, ///> halide_device_detach_native will be called when device ref count goes to zero
96 Unmanaged, ///> No free routine will be called when device ref count goes to zero
97 AllocatedDeviceAndHost, ///> Call device_and_host_free when DevRefCount goes to zero.
98 Cropped, ///> Call halide_device_release_crop when DevRefCount goes to zero.
99};
100
101/** A similar struct for managing device allocations. */
103 // This is only ever constructed when there's something to manage,
104 // so start at one.
105 std::atomic<int> count{1};
107};
108
109/** A templated Buffer class that wraps halide_buffer_t and adds
110 * functionality. When using Halide from C++, this is the preferred
111 * way to create input and output buffers. The overhead of using this
112 * class relative to a naked halide_buffer_t is minimal - it uses another
113 * ~16 bytes on the stack, and does no dynamic allocations when using
114 * it to represent existing memory of a known maximum dimensionality.
115 *
116 * The template parameter T is the element type. For buffers where the
117 * element type is unknown, or may vary, use void or const void.
118 *
119 * D is the maximum number of dimensions that can be represented using
120 * space inside the class itself. Set it to the maximum dimensionality
121 * you expect this buffer to be. If the actual dimensionality exceeds
122 * this, heap storage is allocated to track the shape of the buffer. D
123 * defaults to 4, which should cover nearly all usage.
124 *
125 * The class optionally allocates and owns memory for the image using
126 * a shared pointer allocated with the provided allocator. If they are
127 * null, malloc and free are used. Any device-side allocation is
128 * considered as owned if and only if the host-side allocation is
129 * owned. */
130template<typename T = void, int D = 4>
131class Buffer {
132 /** The underlying halide_buffer_t */
133 halide_buffer_t buf = {0};
134
135 /** Some in-class storage for shape of the dimensions. */
136 halide_dimension_t shape[D];
137
138 /** The allocation owned by this Buffer. NULL if the Buffer does not
139 * own the memory. */
140 AllocationHeader *alloc = nullptr;
141
142 /** A reference count for the device allocation owned by this
143 * buffer. */
144 mutable DeviceRefCount *dev_ref_count = nullptr;
145
146 /** True if T is of type void or const void */
147 static const bool T_is_void = std::is_same<typename std::remove_const<T>::type, void>::value;
148
149 /** A type function that adds a const qualifier if T is a const type. */
150 template<typename T2>
151 using add_const_if_T_is_const = typename std::conditional<std::is_const<T>::value, const T2, T2>::type;
152
153 /** T unless T is (const) void, in which case (const)
154 * uint8_t. Useful for providing return types for operator() */
155 using not_void_T = typename std::conditional<T_is_void,
156 add_const_if_T_is_const<uint8_t>,
157 T>::type;
158
159 /** T with constness removed. Useful for return type of copy(). */
160 using not_const_T = typename std::remove_const<T>::type;
161
162 /** The type the elements are stored as. Equal to not_void_T
163 * unless T is a pointer, in which case uint64_t. Halide stores
164 * all pointer types as uint64s internally, even on 32-bit
165 * systems. */
166 using storage_T = typename std::conditional<std::is_pointer<T>::value, uint64_t, not_void_T>::type;
167
168public:
169 /** True if the Halide type is not void (or const void). */
170 static constexpr bool has_static_halide_type = !T_is_void;
171
172 /** Get the Halide type of T. Callers should not use the result if
173 * has_static_halide_type is false. */
175 return halide_type_of<typename std::remove_cv<not_void_T>::type>();
176 }
177
178 /** Does this Buffer own the host memory it refers to? */
179 bool owns_host_memory() const {
180 return alloc != nullptr;
181 }
182
183private:
184 /** Increment the reference count of any owned allocation */
185 void incref() const {
186 if (owns_host_memory()) {
187 alloc->ref_count++;
188 }
189 if (buf.device) {
190 if (!dev_ref_count) {
191 // I seem to have a non-zero dev field but no
192 // reference count for it. I must have been given a
193 // device allocation by a Halide pipeline, and have
194 // never been copied from since. Take sole ownership
195 // of it.
196 dev_ref_count = new DeviceRefCount;
197 }
198 dev_ref_count->count++;
199 }
200 }
201
202 // Note that this is called "cropped" but can also encompass a slice/embed
203 // operation as well.
204 struct DevRefCountCropped : DeviceRefCount {
205 Buffer<T, D> cropped_from;
206 DevRefCountCropped(const Buffer<T, D> &cropped_from)
207 : cropped_from(cropped_from) {
209 }
210 };
211
212 /** Setup the device ref count for a buffer to indicate it is a crop (or slice, embed, etc) of cropped_from */
213 void crop_from(const Buffer<T, D> &cropped_from) {
214 assert(dev_ref_count == nullptr);
215 dev_ref_count = new DevRefCountCropped(cropped_from);
216 }
217
218 /** Decrement the reference count of any owned allocation and free host
219 * and device memory if it hits zero. Sets alloc to nullptr. */
220 void decref(bool device_only = false) {
221 if (owns_host_memory() && !device_only) {
222 int new_count = --(alloc->ref_count);
223 if (new_count == 0) {
224 void (*fn)(void *) = alloc->deallocate_fn;
225 alloc->~AllocationHeader();
226 fn(alloc);
227 }
228 buf.host = nullptr;
229 alloc = nullptr;
230 set_host_dirty(false);
231 }
232 int new_count = 0;
233 if (dev_ref_count) {
234 new_count = --(dev_ref_count->count);
235 }
236 if (new_count == 0) {
237 if (buf.device) {
238 assert(!(alloc && device_dirty()) &&
239 "Implicitly freeing a dirty device allocation while a host allocation still lives. "
240 "Call device_free explicitly if you want to drop dirty device-side data. "
241 "Call copy_to_host explicitly if you want the data copied to the host allocation "
242 "before the device allocation is freed.");
243 if (dev_ref_count && dev_ref_count->ownership == BufferDeviceOwnership::WrappedNative) {
244 buf.device_interface->detach_native(nullptr, &buf);
245 } else if (dev_ref_count && dev_ref_count->ownership == BufferDeviceOwnership::AllocatedDeviceAndHost) {
246 buf.device_interface->device_and_host_free(nullptr, &buf);
247 } else if (dev_ref_count && dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
248 buf.device_interface->device_release_crop(nullptr, &buf);
249 } else if (dev_ref_count == nullptr || dev_ref_count->ownership == BufferDeviceOwnership::Allocated) {
250 buf.device_interface->device_free(nullptr, &buf);
251 }
252 }
253 if (dev_ref_count) {
254 if (dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
255 delete (DevRefCountCropped *)dev_ref_count;
256 } else {
257 delete dev_ref_count;
258 }
259 }
260 }
261 dev_ref_count = nullptr;
262 buf.device = 0;
263 buf.device_interface = nullptr;
264 }
265
266 void free_shape_storage() {
267 if (buf.dim != shape) {
268 delete[] buf.dim;
269 buf.dim = nullptr;
270 }
271 }
272
273 void make_shape_storage(const int dimensions) {
274 // This should usually be inlined, so if dimensions is statically known,
275 // we can skip the call to new
276 buf.dimensions = dimensions;
277 buf.dim = (dimensions <= D) ? shape : new halide_dimension_t[dimensions];
278 }
279
280 void copy_shape_from(const halide_buffer_t &other) {
281 // All callers of this ensure that buf.dimensions == other.dimensions.
282 make_shape_storage(other.dimensions);
283 std::copy(other.dim, other.dim + other.dimensions, buf.dim);
284 }
285
286 template<typename T2, int D2>
287 void move_shape_from(Buffer<T2, D2> &&other) {
288 if (other.shape == other.buf.dim) {
289 copy_shape_from(other.buf);
290 } else {
291 buf.dim = other.buf.dim;
292 other.buf.dim = nullptr;
293 }
294 }
295
296 /** Initialize the shape from a halide_buffer_t. */
297 void initialize_from_buffer(const halide_buffer_t &b,
298 BufferDeviceOwnership ownership) {
299 memcpy(&buf, &b, sizeof(halide_buffer_t));
300 copy_shape_from(b);
301 if (b.device) {
302 dev_ref_count = new DeviceRefCount;
303 dev_ref_count->ownership = ownership;
304 }
305 }
306
307 /** Initialize the shape from an array of ints */
308 void initialize_shape(const int *sizes) {
309 for (int i = 0; i < buf.dimensions; i++) {
310 buf.dim[i].min = 0;
311 buf.dim[i].extent = sizes[i];
312 if (i == 0) {
313 buf.dim[i].stride = 1;
314 } else {
315 buf.dim[i].stride = buf.dim[i - 1].stride * buf.dim[i - 1].extent;
316 }
317 }
318 }
319
320 /** Initialize the shape from a vector of extents */
321 void initialize_shape(const std::vector<int> &sizes) {
322 assert(buf.dimensions == (int)sizes.size());
323 initialize_shape(sizes.data());
324 }
325
326 /** Initialize the shape from the static shape of an array */
327 template<typename Array, size_t N>
328 void initialize_shape_from_array_shape(int next, Array (&vals)[N]) {
329 buf.dim[next].min = 0;
330 buf.dim[next].extent = (int)N;
331 if (next == 0) {
332 buf.dim[next].stride = 1;
333 } else {
334 initialize_shape_from_array_shape(next - 1, vals[0]);
335 buf.dim[next].stride = buf.dim[next - 1].stride * buf.dim[next - 1].extent;
336 }
337 }
338
339 /** Base case for the template recursion above. */
340 template<typename T2>
341 void initialize_shape_from_array_shape(int, const T2 &) {
342 }
343
344 /** Get the dimensionality of a multi-dimensional C array */
345 template<typename Array, size_t N>
346 static int dimensionality_of_array(Array (&vals)[N]) {
347 return dimensionality_of_array(vals[0]) + 1;
348 }
349
350 template<typename T2>
351 static int dimensionality_of_array(const T2 &) {
352 return 0;
353 }
354
355 /** Get the underlying halide_type_t of an array's element type. */
356 template<typename Array, size_t N>
357 static halide_type_t scalar_type_of_array(Array (&vals)[N]) {
358 return scalar_type_of_array(vals[0]);
359 }
360
361 template<typename T2>
362 static halide_type_t scalar_type_of_array(const T2 &) {
363 return halide_type_of<typename std::remove_cv<T2>::type>();
364 }
365
366 /** Crop a single dimension without handling device allocation. */
367 void crop_host(int d, int min, int extent) {
368 assert(dim(d).min() <= min);
369 assert(dim(d).max() >= min + extent - 1);
370 ptrdiff_t shift = min - dim(d).min();
371 if (buf.host != nullptr) {
372 buf.host += (shift * dim(d).stride()) * type().bytes();
373 }
374 buf.dim[d].min = min;
375 buf.dim[d].extent = extent;
376 }
377
378 /** Crop as many dimensions as are in rect, without handling device allocation. */
379 void crop_host(const std::vector<std::pair<int, int>> &rect) {
380 assert(rect.size() <= static_cast<decltype(rect.size())>(std::numeric_limits<int>::max()));
381 int limit = (int)rect.size();
382 assert(limit <= dimensions());
383 for (int i = 0; i < limit; i++) {
384 crop_host(i, rect[i].first, rect[i].second);
385 }
386 }
387
388 void complete_device_crop(Buffer<T, D> &result_host_cropped) const {
389 assert(buf.device_interface != nullptr);
390 if (buf.device_interface->device_crop(nullptr, &this->buf, &result_host_cropped.buf) == 0) {
391 const Buffer<T, D> *cropped_from = this;
392 // TODO: Figure out what to do if dev_ref_count is nullptr. Should incref logic run here?
393 // is it possible to get to this point without incref having run at least once since
394 // the device field was set? (I.e. in the internal logic of crop. incref might have been
395 // called.)
396 if (dev_ref_count != nullptr && dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
397 cropped_from = &((DevRefCountCropped *)dev_ref_count)->cropped_from;
398 }
399 result_host_cropped.crop_from(*cropped_from);
400 }
401 }
402
403 /** slice a single dimension without handling device allocation. */
404 void slice_host(int d, int pos) {
405 assert(d >= 0 && d < dimensions());
406 assert(pos >= dim(d).min() && pos <= dim(d).max());
407 buf.dimensions--;
408 ptrdiff_t shift = pos - buf.dim[d].min;
409 if (buf.host != nullptr) {
410 buf.host += (shift * buf.dim[d].stride) * type().bytes();
411 }
412 for (int i = d; i < buf.dimensions; i++) {
413 buf.dim[i] = buf.dim[i + 1];
414 }
415 buf.dim[buf.dimensions] = {0, 0, 0};
416 }
417
418 void complete_device_slice(Buffer<T, D> &result_host_sliced, int d, int pos) const {
419 assert(buf.device_interface != nullptr);
420 if (buf.device_interface->device_slice(nullptr, &this->buf, d, pos, &result_host_sliced.buf) == 0) {
421 const Buffer<T, D> *sliced_from = this;
422 // TODO: Figure out what to do if dev_ref_count is nullptr. Should incref logic run here?
423 // is it possible to get to this point without incref having run at least once since
424 // the device field was set? (I.e. in the internal logic of slice. incref might have been
425 // called.)
426 if (dev_ref_count != nullptr && dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
427 sliced_from = &((DevRefCountCropped *)dev_ref_count)->cropped_from;
428 }
429 // crop_from() is correct here, despite the fact that we are slicing.
430 result_host_sliced.crop_from(*sliced_from);
431 }
432 }
433
434public:
435 typedef T ElemType;
436
437 /** Read-only access to the shape */
438 class Dimension {
439 const halide_dimension_t &d;
440
441 public:
442 /** The lowest coordinate in this dimension */
444 return d.min;
445 }
446
447 /** The number of elements in memory you have to step over to
448 * increment this coordinate by one. */
450 return d.stride;
451 }
452
453 /** The extent of the image along this dimension */
455 return d.extent;
456 }
457
458 /** The highest coordinate in this dimension */
460 return min() + extent() - 1;
461 }
462
463 /** An iterator class, so that you can iterate over
464 * coordinates in a dimensions using a range-based for loop. */
465 struct iterator {
466 int val;
467 int operator*() const {
468 return val;
469 }
470 bool operator!=(const iterator &other) const {
471 return val != other.val;
472 }
474 val++;
475 return *this;
476 }
477 };
478
479 /** An iterator that points to the min coordinate */
481 return {min()};
482 }
483
484 /** An iterator that points to one past the max coordinate */
486 return {min() + extent()};
487 }
488
490 : d(dim) {
491 }
492 };
493
494 /** Access the shape of the buffer */
496 assert(i >= 0 && i < this->dimensions());
497 return Dimension(buf.dim[i]);
498 }
499
500 /** Access to the mins, strides, extents. Will be deprecated. Do not use. */
501 // @{
502 int min(int i) const {
503 return dim(i).min();
504 }
505 int extent(int i) const {
506 return dim(i).extent();
507 }
508 int stride(int i) const {
509 return dim(i).stride();
510 }
511 // @}
512
513 /** The total number of elements this buffer represents. Equal to
514 * the product of the extents */
515 size_t number_of_elements() const {
516 return buf.number_of_elements();
517 }
518
519 /** Get the dimensionality of the buffer. */
520 int dimensions() const {
521 return buf.dimensions;
522 }
523
524 /** Get the type of the elements. */
526 return buf.type;
527 }
528
529 /** A pointer to the element with the lowest address. If all
530 * strides are positive, equal to the host pointer. */
531 T *begin() const {
532 assert(buf.host != nullptr); // Cannot call begin() on an unallocated Buffer.
533 return (T *)buf.begin();
534 }
535
536 /** A pointer to one beyond the element with the highest address. */
537 T *end() const {
538 assert(buf.host != nullptr); // Cannot call end() on an unallocated Buffer.
539 return (T *)buf.end();
540 }
541
542 /** The total number of bytes spanned by the data in memory. */
543 size_t size_in_bytes() const {
544 return buf.size_in_bytes();
545 }
546
547 /** Reset the Buffer to be equivalent to a default-constructed Buffer
548 * of the same static type (if any); Buffer<void> will have its runtime
549 * type reset to uint8. */
550 void reset() {
551 *this = Buffer();
552 }
553
555 : shape() {
556 buf.type = static_halide_type();
557 make_shape_storage(0);
558 }
559
560 /** Make a Buffer from a halide_buffer_t */
561 explicit Buffer(const halide_buffer_t &buf,
563 assert(T_is_void || buf.type == static_halide_type());
564 initialize_from_buffer(buf, ownership);
565 }
566
567 /** Give Buffers access to the members of Buffers of different dimensionalities and types. */
568 template<typename T2, int D2>
569 friend class Buffer;
570
571private:
572 template<typename T2, int D2>
573 static void static_assert_can_convert_from() {
574 static_assert((!std::is_const<T2>::value || std::is_const<T>::value),
575 "Can't convert from a Buffer<const T> to a Buffer<T>");
576 static_assert(std::is_same<typename std::remove_const<T>::type,
577 typename std::remove_const<T2>::type>::value ||
578 T_is_void || Buffer<T2, D2>::T_is_void,
579 "type mismatch constructing Buffer");
580 }
581
582public:
583 /** Determine if if an Buffer<T, D> can be constructed from some other Buffer type.
584 * If this can be determined at compile time, fail with a static assert; otherwise
585 * return a boolean based on runtime typing. */
586 template<typename T2, int D2>
587 static bool can_convert_from(const Buffer<T2, D2> &other) {
588 static_assert_can_convert_from<T2, D2>();
589 if (Buffer<T2, D2>::T_is_void && !T_is_void) {
590 return other.type() == static_halide_type();
591 }
592 return true;
593 }
594
595 /** Fail an assertion at runtime or compile-time if an Buffer<T, D>
596 * cannot be constructed from some other Buffer type. */
597 template<typename T2, int D2>
598 static void assert_can_convert_from(const Buffer<T2, D2> &other) {
599 // Explicitly call static_assert_can_convert_from() here so
600 // that we always get compile-time checking, even if compiling with
601 // assertions disabled.
602 static_assert_can_convert_from<T2, D2>();
603 assert(can_convert_from(other));
604 }
605
606 /** Copy constructor. Does not copy underlying data. */
607 Buffer(const Buffer<T, D> &other)
608 : buf(other.buf),
609 alloc(other.alloc) {
610 other.incref();
611 dev_ref_count = other.dev_ref_count;
612 copy_shape_from(other.buf);
613 }
614
615 /** Construct a Buffer from a Buffer of different dimensionality
616 * and type. Asserts that the type matches (at runtime, if one of
617 * the types is void). Note that this constructor is
618 * implicit. This, for example, lets you pass things like
619 * Buffer<T> or Buffer<const void> to functions expected
620 * Buffer<const T>. */
621 template<typename T2, int D2>
622 Buffer(const Buffer<T2, D2> &other)
623 : buf(other.buf),
624 alloc(other.alloc) {
626 other.incref();
627 dev_ref_count = other.dev_ref_count;
628 copy_shape_from(other.buf);
629 }
630
631 /** Move constructor */
632 Buffer(Buffer<T, D> &&other) noexcept
633 : buf(other.buf),
634 alloc(other.alloc),
635 dev_ref_count(other.dev_ref_count) {
636 other.dev_ref_count = nullptr;
637 other.alloc = nullptr;
638 move_shape_from(std::forward<Buffer<T, D>>(other));
639 other.buf = halide_buffer_t();
640 }
641
642 /** Move-construct a Buffer from a Buffer of different
643 * dimensionality and type. Asserts that the types match (at
644 * runtime if one of the types is void). */
645 template<typename T2, int D2>
647 : buf(other.buf),
648 alloc(other.alloc),
649 dev_ref_count(other.dev_ref_count) {
651 other.dev_ref_count = nullptr;
652 other.alloc = nullptr;
653 move_shape_from(std::forward<Buffer<T2, D2>>(other));
654 other.buf = halide_buffer_t();
655 }
656
657 /** Assign from another Buffer of possibly-different
658 * dimensionality and type. Asserts that the types match (at
659 * runtime if one of the types is void). */
660 template<typename T2, int D2>
662 if ((const void *)this == (const void *)&other) {
663 return *this;
664 }
666 other.incref();
667 decref();
668 dev_ref_count = other.dev_ref_count;
669 alloc = other.alloc;
670 free_shape_storage();
671 buf = other.buf;
672 copy_shape_from(other.buf);
673 return *this;
674 }
675
676 /** Standard assignment operator */
678 // The cast to void* here is just to satisfy clang-tidy
679 if ((const void *)this == (const void *)&other) {
680 return *this;
681 }
682 other.incref();
683 decref();
684 dev_ref_count = other.dev_ref_count;
685 alloc = other.alloc;
686 free_shape_storage();
687 buf = other.buf;
688 copy_shape_from(other.buf);
689 return *this;
690 }
691
692 /** Move from another Buffer of possibly-different
693 * dimensionality and type. Asserts that the types match (at
694 * runtime if one of the types is void). */
695 template<typename T2, int D2>
698 decref();
699 alloc = other.alloc;
700 other.alloc = nullptr;
701 dev_ref_count = other.dev_ref_count;
702 other.dev_ref_count = nullptr;
703 free_shape_storage();
704 buf = other.buf;
705 move_shape_from(std::forward<Buffer<T2, D2>>(other));
706 other.buf = halide_buffer_t();
707 return *this;
708 }
709
710 /** Standard move-assignment operator */
712 decref();
713 alloc = other.alloc;
714 other.alloc = nullptr;
715 dev_ref_count = other.dev_ref_count;
716 other.dev_ref_count = nullptr;
717 free_shape_storage();
718 buf = other.buf;
719 move_shape_from(std::forward<Buffer<T, D>>(other));
720 other.buf = halide_buffer_t();
721 return *this;
722 }
723
724 /** Check the product of the extents fits in memory. */
726 size_t size = type().bytes();
727 for (int i = 0; i < dimensions(); i++) {
728 size *= dim(i).extent();
729 }
730 // We allow 2^31 or 2^63 bytes, so drop the top bit.
731 size = (size << 1) >> 1;
732 for (int i = 0; i < dimensions(); i++) {
733 size /= dim(i).extent();
734 }
735 assert(size == (size_t)type().bytes() && "Error: Overflow computing total size of buffer.");
736 }
737
738 /** Allocate memory for this Buffer. Drops the reference to any
739 * owned memory. */
740 void allocate(void *(*allocate_fn)(size_t) = nullptr,
741 void (*deallocate_fn)(void *) = nullptr) {
742 if (!allocate_fn) {
743 allocate_fn = malloc;
744 }
745 if (!deallocate_fn) {
746 deallocate_fn = free;
747 }
748
749 // Drop any existing allocation
750 deallocate();
751
752 // Conservatively align images to 128 bytes. This is enough
753 // alignment for all the platforms we might use.
754 size_t size = size_in_bytes();
755 const size_t alignment = 128;
756 size = (size + alignment - 1) & ~(alignment - 1);
757 void *alloc_storage = allocate_fn(size + sizeof(AllocationHeader) + alignment - 1);
758 alloc = new (alloc_storage) AllocationHeader(deallocate_fn);
759 uint8_t *unaligned_ptr = ((uint8_t *)alloc) + sizeof(AllocationHeader);
760 buf.host = (uint8_t *)((uintptr_t)(unaligned_ptr + alignment - 1) & ~(alignment - 1));
761 }
762
763 /** Drop reference to any owned host or device memory, possibly
764 * freeing it, if this buffer held the last reference to
765 * it. Retains the shape of the buffer. Does nothing if this
766 * buffer did not allocate its own memory. */
767 void deallocate() {
768 decref();
769 }
770
771 /** Drop reference to any owned device memory, possibly freeing it
772 * if this buffer held the last reference to it. Asserts that
773 * device_dirty is false. */
775 decref(true);
776 }
777
778 /** Allocate a new image of the given size with a runtime
779 * type. Only used when you do know what size you want but you
780 * don't know statically what type the elements are. Pass zeroes
781 * to make a buffer suitable for bounds query calls. */
782 template<typename... Args,
783 typename = typename std::enable_if<AllInts<Args...>::value>::type>
784 Buffer(halide_type_t t, int first, Args... rest) {
785 if (!T_is_void) {
786 assert(static_halide_type() == t);
787 }
788 int extents[] = {first, (int)rest...};
789 buf.type = t;
790 constexpr int buf_dimensions = 1 + (int)(sizeof...(rest));
791 make_shape_storage(buf_dimensions);
792 initialize_shape(extents);
793 if (!Internal::any_zero(extents)) {
795 allocate();
796 }
797 }
798
799 /** Allocate a new image of the given size. Pass zeroes to make a
800 * buffer suitable for bounds query calls. */
801 // @{
802
803 // The overload with one argument is 'explicit', so that
804 // (say) int is not implicitly convertible to Buffer<int>
805 explicit Buffer(int first) {
806 static_assert(!T_is_void,
807 "To construct an Buffer<void>, pass a halide_type_t as the first argument to the constructor");
808 int extents[] = {first};
809 buf.type = static_halide_type();
810 constexpr int buf_dimensions = 1;
811 make_shape_storage(buf_dimensions);
812 initialize_shape(extents);
813 if (first != 0) {
815 allocate();
816 }
817 }
818
819 template<typename... Args,
820 typename = typename std::enable_if<AllInts<Args...>::value>::type>
821 Buffer(int first, int second, Args... rest) {
822 static_assert(!T_is_void,
823 "To construct an Buffer<void>, pass a halide_type_t as the first argument to the constructor");
824 int extents[] = {first, second, (int)rest...};
825 buf.type = static_halide_type();
826 constexpr int buf_dimensions = 2 + (int)(sizeof...(rest));
827 make_shape_storage(buf_dimensions);
828 initialize_shape(extents);
829 if (!Internal::any_zero(extents)) {
831 allocate();
832 }
833 }
834 // @}
835
836 /** Allocate a new image of unknown type using a vector of ints as the size. */
837 Buffer(halide_type_t t, const std::vector<int> &sizes) {
838 if (!T_is_void) {
839 assert(static_halide_type() == t);
840 }
841 buf.type = t;
842 make_shape_storage((int)sizes.size());
843 initialize_shape(sizes);
844 if (!Internal::any_zero(sizes)) {
846 allocate();
847 }
848 }
849
850 /** Allocate a new image of known type using a vector of ints as the size. */
851 explicit Buffer(const std::vector<int> &sizes)
852 : Buffer(static_halide_type(), sizes) {
853 }
854
855private:
856 // Create a copy of the sizes vector, ordered as specified by order.
857 static std::vector<int> make_ordered_sizes(const std::vector<int> &sizes, const std::vector<int> &order) {
858 assert(order.size() == sizes.size());
859 std::vector<int> ordered_sizes(sizes.size());
860 for (size_t i = 0; i < sizes.size(); ++i) {
861 ordered_sizes[i] = sizes.at(order[i]);
862 }
863 return ordered_sizes;
864 }
865
866public:
867 /** Allocate a new image of unknown type using a vector of ints as the size and
868 * a vector of indices indicating the storage order for each dimension. The
869 * length of the sizes vector and the storage-order vector must match. For instance,
870 * to allocate an interleaved RGB buffer, you would pass {2, 0, 1} for storage_order. */
871 Buffer(halide_type_t t, const std::vector<int> &sizes, const std::vector<int> &storage_order)
872 : Buffer(t, make_ordered_sizes(sizes, storage_order)) {
873 transpose(storage_order);
874 }
875
876 Buffer(const std::vector<int> &sizes, const std::vector<int> &storage_order)
877 : Buffer(static_halide_type(), sizes, storage_order) {
878 }
879
880 /** Make an Buffer that refers to a statically sized array. Does not
881 * take ownership of the data, and does not set the host_dirty flag. */
882 template<typename Array, size_t N>
883 explicit Buffer(Array (&vals)[N]) {
884 const int buf_dimensions = dimensionality_of_array(vals);
885 buf.type = scalar_type_of_array(vals);
886 buf.host = (uint8_t *)vals;
887 make_shape_storage(buf_dimensions);
888 initialize_shape_from_array_shape(buf.dimensions - 1, vals);
889 }
890
891 /** Initialize an Buffer of runtime type from a pointer and some
892 * sizes. Assumes dense row-major packing and a min coordinate of
893 * zero. Does not take ownership of the data and does not set the
894 * host_dirty flag. */
895 template<typename... Args,
896 typename = typename std::enable_if<AllInts<Args...>::value>::type>
897 explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data, int first, Args &&...rest) {
898 if (!T_is_void) {
899 assert(static_halide_type() == t);
900 }
901 int extents[] = {first, (int)rest...};
902 buf.type = t;
903 constexpr int buf_dimensions = 1 + (int)(sizeof...(rest));
904 buf.host = (uint8_t *)const_cast<void *>(data);
905 make_shape_storage(buf_dimensions);
906 initialize_shape(extents);
907 }
908
909 /** Initialize an Buffer from a pointer and some sizes. Assumes
910 * dense row-major packing and a min coordinate of zero. Does not
911 * take ownership of the data and does not set the host_dirty flag. */
912 template<typename... Args,
913 typename = typename std::enable_if<AllInts<Args...>::value>::type>
914 explicit Buffer(T *data, int first, Args &&...rest) {
915 int extents[] = {first, (int)rest...};
916 buf.type = static_halide_type();
917 constexpr int buf_dimensions = 1 + (int)(sizeof...(rest));
918 buf.host = (uint8_t *)const_cast<typename std::remove_const<T>::type *>(data);
919 make_shape_storage(buf_dimensions);
920 initialize_shape(extents);
921 }
922
923 /** Initialize an Buffer from a pointer and a vector of
924 * sizes. Assumes dense row-major packing and a min coordinate of
925 * zero. Does not take ownership of the data and does not set the
926 * host_dirty flag. */
927 explicit Buffer(T *data, const std::vector<int> &sizes) {
928 buf.type = static_halide_type();
929 buf.host = (uint8_t *)const_cast<typename std::remove_const<T>::type *>(data);
930 make_shape_storage((int)sizes.size());
931 initialize_shape(sizes);
932 }
933
934 /** Initialize an Buffer of runtime type from a pointer and a
935 * vector of sizes. Assumes dense row-major packing and a min
936 * coordinate of zero. Does not take ownership of the data and
937 * does not set the host_dirty flag. */
938 explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data, const std::vector<int> &sizes) {
939 if (!T_is_void) {
940 assert(static_halide_type() == t);
941 }
942 buf.type = t;
943 buf.host = (uint8_t *)const_cast<void *>(data);
944 make_shape_storage((int)sizes.size());
945 initialize_shape(sizes);
946 }
947
948 /** Initialize an Buffer from a pointer to the min coordinate and
949 * an array describing the shape. Does not take ownership of the
950 * data, and does not set the host_dirty flag. */
951 explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data, int d, const halide_dimension_t *shape) {
952 if (!T_is_void) {
953 assert(static_halide_type() == t);
954 }
955 buf.type = t;
956 buf.host = (uint8_t *)const_cast<void *>(data);
957 make_shape_storage(d);
958 for (int i = 0; i < d; i++) {
959 buf.dim[i] = shape[i];
960 }
961 }
962
963 /** Initialize a Buffer from a pointer to the min coordinate and
964 * a vector describing the shape. Does not take ownership of the
965 * data, and does not set the host_dirty flag. */
966 explicit inline Buffer(halide_type_t t, add_const_if_T_is_const<void> *data,
967 const std::vector<halide_dimension_t> &shape)
968 : Buffer(t, data, (int)shape.size(), shape.data()) {
969 }
970
971 /** Initialize an Buffer from a pointer to the min coordinate and
972 * an array describing the shape. Does not take ownership of the
973 * data and does not set the host_dirty flag. */
974 explicit Buffer(T *data, int d, const halide_dimension_t *shape) {
975 buf.type = static_halide_type();
976 buf.host = (uint8_t *)const_cast<typename std::remove_const<T>::type *>(data);
977 make_shape_storage(d);
978 for (int i = 0; i < d; i++) {
979 buf.dim[i] = shape[i];
980 }
981 }
982
983 /** Initialize a Buffer from a pointer to the min coordinate and
984 * a vector describing the shape. Does not take ownership of the
985 * data, and does not set the host_dirty flag. */
986 explicit inline Buffer(T *data, const std::vector<halide_dimension_t> &shape)
987 : Buffer(data, (int)shape.size(), shape.data()) {
988 }
989
990 /** Destructor. Will release any underlying owned allocation if
991 * this is the last reference to it. Will assert fail if there are
992 * weak references to this Buffer outstanding. */
994 free_shape_storage();
995 decref();
996 }
997
998 /** Get a pointer to the raw halide_buffer_t this wraps. */
999 // @{
1001 return &buf;
1002 }
1003
1005 return &buf;
1006 }
1007 // @}
1008
1009 /** Provide a cast operator to halide_buffer_t *, so that
1010 * instances can be passed directly to Halide filters. */
1011 operator halide_buffer_t *() {
1012 return &buf;
1013 }
1014
1015 /** Return a typed reference to this Buffer. Useful for converting
1016 * a reference to a Buffer<void> to a reference to, for example, a
1017 * Buffer<const uint8_t>, or converting a Buffer<T>& to Buffer<const T>&.
1018 * Does a runtime assert if the source buffer type is void. */
1019 template<typename T2>
1022 return *((Buffer<T2, D> *)this);
1023 }
1024
1025 /** Return a const typed reference to this Buffer. Useful for
1026 * converting a conference reference to one Buffer type to a const
1027 * reference to another Buffer type. Does a runtime assert if the
1028 * source buffer type is void. */
1029 template<typename T2>
1032 return *((const Buffer<T2, D> *)this);
1033 }
1034
1035 /** Returns this rval Buffer with a different type attached. Does
1036 * a dynamic type check if the source type is void. */
1037 template<typename T2>
1040 return *((Buffer<T2, D> *)this);
1041 }
1042
1043 /** as_const() is syntactic sugar for .as<const T>(), to avoid the need
1044 * to recapitulate the type argument. */
1045 // @{
1048 // Note that we can skip the assert_can_convert_from(), since T -> const T
1049 // conversion is always legal.
1050 return *((Buffer<typename std::add_const<T>::type, D> *)this);
1051 }
1052
1055 return *((const Buffer<typename std::add_const<T>::type, D> *)this);
1056 }
1057
1060 return *((Buffer<typename std::add_const<T>::type, D> *)this);
1061 }
1062 // @}
1063
1064 /** Conventional names for the first three dimensions. */
1065 // @{
1066 int width() const {
1067 return (dimensions() > 0) ? dim(0).extent() : 1;
1068 }
1069 int height() const {
1070 return (dimensions() > 1) ? dim(1).extent() : 1;
1071 }
1072 int channels() const {
1073 return (dimensions() > 2) ? dim(2).extent() : 1;
1074 }
1075 // @}
1076
1077 /** Conventional names for the min and max value of each dimension */
1078 // @{
1079 int left() const {
1080 return dim(0).min();
1081 }
1082
1083 int right() const {
1084 return dim(0).max();
1085 }
1086
1087 int top() const {
1088 return dim(1).min();
1089 }
1090
1091 int bottom() const {
1092 return dim(1).max();
1093 }
1094 // @}
1095
1096 /** Make a new image which is a deep copy of this image. Use crop
1097 * or slice followed by copy to make a copy of only a portion of
1098 * the image. The new image uses the same memory layout as the
1099 * original, with holes compacted away. Note that the returned
1100 * Buffer is always of a non-const type T (ie:
1101 *
1102 * Buffer<const T>.copy() -> Buffer<T> rather than Buffer<const T>
1103 *
1104 * which is always safe, since we are making a deep copy. (The caller
1105 * can easily cast it back to Buffer<const T> if desired, which is
1106 * always safe and free.)
1107 */
1108 Buffer<not_const_T, D> copy(void *(*allocate_fn)(size_t) = nullptr,
1109 void (*deallocate_fn)(void *) = nullptr) const {
1110 Buffer<not_const_T, D> dst = Buffer<not_const_T, D>::make_with_shape_of(*this, allocate_fn, deallocate_fn);
1111 dst.copy_from(*this);
1112 return dst;
1113 }
1114
1115 /** Like copy(), but the copy is created in interleaved memory layout
1116 * (vs. keeping the same memory layout as the original). Requires that 'this'
1117 * has exactly 3 dimensions.
1118 */
1119 Buffer<not_const_T, D> copy_to_interleaved(void *(*allocate_fn)(size_t) = nullptr,
1120 void (*deallocate_fn)(void *) = nullptr) const {
1121 assert(dimensions() == 3);
1123 dst.set_min(min(0), min(1), min(2));
1124 dst.allocate(allocate_fn, deallocate_fn);
1125 dst.copy_from(*this);
1126 return dst;
1127 }
1128
1129 /** Like copy(), but the copy is created in planar memory layout
1130 * (vs. keeping the same memory layout as the original).
1131 */
1132 Buffer<not_const_T, D> copy_to_planar(void *(*allocate_fn)(size_t) = nullptr,
1133 void (*deallocate_fn)(void *) = nullptr) const {
1134 std::vector<int> mins, extents;
1135 const int dims = dimensions();
1136 mins.reserve(dims);
1137 extents.reserve(dims);
1138 for (int d = 0; d < dims; ++d) {
1139 mins.push_back(dim(d).min());
1140 extents.push_back(dim(d).extent());
1141 }
1143 dst.set_min(mins);
1144 dst.allocate(allocate_fn, deallocate_fn);
1145 dst.copy_from(*this);
1146 return dst;
1147 }
1148
1149 /** Make a copy of the Buffer which shares the underlying host and/or device
1150 * allocations as the existing Buffer. This is purely syntactic sugar for
1151 * cases where you have a const reference to a Buffer but need a temporary
1152 * non-const copy (e.g. to make a call into AOT-generated Halide code), and want a terse
1153 * inline way to create a temporary. \code
1154 * void call_my_func(const Buffer<const uint8_t>& input) {
1155 * my_func(input.alias(), output);
1156 * }\endcode
1157 */
1158 inline Buffer<T, D> alias() const {
1159 return *this;
1160 }
1161
1162 /** Fill a Buffer with the values at the same coordinates in
1163 * another Buffer. Restricts itself to coordinates contained
1164 * within the intersection of the two buffers. If the two Buffers
1165 * are not in the same coordinate system, you will need to
1166 * translate the argument Buffer first. E.g. if you're blitting a
1167 * sprite onto a framebuffer, you'll want to translate the sprite
1168 * to the correct location first like so: \code
1169 * framebuffer.copy_from(sprite.translated({x, y})); \endcode
1170 */
1171 template<typename T2, int D2>
1173 static_assert(!std::is_const<T>::value, "Cannot call copy_from() on a Buffer<const T>");
1174 assert(!device_dirty() && "Cannot call Halide::Runtime::Buffer::copy_from on a device dirty destination.");
1175 assert(!src.device_dirty() && "Cannot call Halide::Runtime::Buffer::copy_from on a device dirty source.");
1176
1177 Buffer<T, D> dst(*this);
1178
1179 assert(src.dimensions() == dst.dimensions());
1180
1181 // Trim the copy to the region in common
1182 for (int i = 0; i < dimensions(); i++) {
1183 int min_coord = std::max(dst.dim(i).min(), src.dim(i).min());
1184 int max_coord = std::min(dst.dim(i).max(), src.dim(i).max());
1185 if (max_coord < min_coord) {
1186 // The buffers do not overlap.
1187 return;
1188 }
1189 dst.crop(i, min_coord, max_coord - min_coord + 1);
1190 src.crop(i, min_coord, max_coord - min_coord + 1);
1191 }
1192
1193 // If T is void, we need to do runtime dispatch to an
1194 // appropriately-typed lambda. We're copying, so we only care
1195 // about the element size. (If not, this should optimize away
1196 // into a static dispatch to the right-sized copy.)
1197 if (T_is_void ? (type().bytes() == 1) : (sizeof(not_void_T) == 1)) {
1198 using MemType = uint8_t;
1199 auto &typed_dst = (Buffer<MemType, D> &)dst;
1200 auto &typed_src = (Buffer<const MemType, D> &)src;
1201 typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1202 } else if (T_is_void ? (type().bytes() == 2) : (sizeof(not_void_T) == 2)) {
1203 using MemType = uint16_t;
1204 auto &typed_dst = (Buffer<MemType, D> &)dst;
1205 auto &typed_src = (Buffer<const MemType, D> &)src;
1206 typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1207 } else if (T_is_void ? (type().bytes() == 4) : (sizeof(not_void_T) == 4)) {
1208 using MemType = uint32_t;
1209 auto &typed_dst = (Buffer<MemType, D> &)dst;
1210 auto &typed_src = (Buffer<const MemType, D> &)src;
1211 typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1212 } else if (T_is_void ? (type().bytes() == 8) : (sizeof(not_void_T) == 8)) {
1213 using MemType = uint64_t;
1214 auto &typed_dst = (Buffer<MemType, D> &)dst;
1215 auto &typed_src = (Buffer<const MemType, D> &)src;
1216 typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1217 } else {
1218 assert(false && "type().bytes() must be 1, 2, 4, or 8");
1219 }
1221 }
1222
1223 /** Make an image that refers to a sub-range of this image along
1224 * the given dimension. Asserts that the crop region is within
1225 * the existing bounds: you cannot "crop outwards", even if you know there
1226 * is valid Buffer storage (e.g. because you already cropped inwards). */
1227 Buffer<T, D> cropped(int d, int min, int extent) const {
1228 // Make a fresh copy of the underlying buffer (but not a fresh
1229 // copy of the allocation, if there is one).
1230 Buffer<T, D> im = *this;
1231
1232 // This guarantees the prexisting device ref is dropped if the
1233 // device_crop call fails and maintains the buffer in a consistent
1234 // state.
1235 im.device_deallocate();
1236
1237 im.crop_host(d, min, extent);
1238 if (buf.device_interface != nullptr) {
1239 complete_device_crop(im);
1240 }
1241 return im;
1242 }
1243
1244 /** Crop an image in-place along the given dimension. This does
1245 * not move any data around in memory - it just changes the min
1246 * and extent of the given dimension. */
1247 void crop(int d, int min, int extent) {
1248 // An optimization for non-device buffers. For the device case,
1249 // a temp buffer is required, so reuse the not-in-place version.
1250 // TODO(zalman|abadams): Are nop crops common enough to special
1251 // case the device part of the if to do nothing?
1252 if (buf.device_interface != nullptr) {
1253 *this = cropped(d, min, extent);
1254 } else {
1255 crop_host(d, min, extent);
1256 }
1257 }
1258
1259 /** Make an image that refers to a sub-rectangle of this image along
1260 * the first N dimensions. Asserts that the crop region is within
1261 * the existing bounds. The cropped image may drop any device handle
1262 * if the device_interface cannot accomplish the crop in-place. */
1263 Buffer<T, D> cropped(const std::vector<std::pair<int, int>> &rect) const {
1264 // Make a fresh copy of the underlying buffer (but not a fresh
1265 // copy of the allocation, if there is one).
1266 Buffer<T, D> im = *this;
1267
1268 // This guarantees the prexisting device ref is dropped if the
1269 // device_crop call fails and maintains the buffer in a consistent
1270 // state.
1271 im.device_deallocate();
1272
1273 im.crop_host(rect);
1274 if (buf.device_interface != nullptr) {
1275 complete_device_crop(im);
1276 }
1277 return im;
1278 }
1279
1280 /** Crop an image in-place along the first N dimensions. This does
1281 * not move any data around in memory, nor does it free memory. It
1282 * just rewrites the min/extent of each dimension to refer to a
1283 * subregion of the same allocation. */
1284 void crop(const std::vector<std::pair<int, int>> &rect) {
1285 // An optimization for non-device buffers. For the device case,
1286 // a temp buffer is required, so reuse the not-in-place version.
1287 // TODO(zalman|abadams): Are nop crops common enough to special
1288 // case the device part of the if to do nothing?
1289 if (buf.device_interface != nullptr) {
1290 *this = cropped(rect);
1291 } else {
1292 crop_host(rect);
1293 }
1294 }
1295
1296 /** Make an image which refers to the same data with using
1297 * translated coordinates in the given dimension. Positive values
1298 * move the image data to the right or down relative to the
1299 * coordinate system. Drops any device handle. */
1300 Buffer<T, D> translated(int d, int dx) const {
1301 Buffer<T, D> im = *this;
1302 im.translate(d, dx);
1303 return im;
1304 }
1305
1306 /** Translate an image in-place along one dimension by changing
1307 * how it is indexed. Does not move any data around in memory. */
1308 void translate(int d, int delta) {
1309 assert(d >= 0 && d < this->dimensions());
1311 buf.dim[d].min += delta;
1312 }
1313
1314 /** Make an image which refers to the same data translated along
1315 * the first N dimensions. */
1316 Buffer<T, D> translated(const std::vector<int> &delta) const {
1317 Buffer<T, D> im = *this;
1318 im.translate(delta);
1319 return im;
1320 }
1321
1322 /** Translate an image along the first N dimensions by changing
1323 * how it is indexed. Does not move any data around in memory. */
1324 void translate(const std::vector<int> &delta) {
1326 assert(delta.size() <= static_cast<decltype(delta.size())>(std::numeric_limits<int>::max()));
1327 int limit = (int)delta.size();
1328 assert(limit <= dimensions());
1329 for (int i = 0; i < limit; i++) {
1330 translate(i, delta[i]);
1331 }
1332 }
1333
1334 /** Set the min coordinate of an image in the first N dimensions. */
1335 // @{
1336 void set_min(const std::vector<int> &mins) {
1337 assert(mins.size() <= static_cast<decltype(mins.size())>(dimensions()));
1339 for (size_t i = 0; i < mins.size(); i++) {
1340 buf.dim[i].min = mins[i];
1341 }
1342 }
1343
1344 template<typename... Args>
1345 void set_min(Args... args) {
1346 set_min(std::vector<int>{args...});
1347 }
1348 // @}
1349
1350 /** Test if a given coordinate is within the bounds of an image. */
1351 // @{
1352 bool contains(const std::vector<int> &coords) const {
1353 assert(coords.size() <= static_cast<decltype(coords.size())>(dimensions()));
1354 for (size_t i = 0; i < coords.size(); i++) {
1355 if (coords[i] < dim((int)i).min() || coords[i] > dim((int)i).max()) {
1356 return false;
1357 }
1358 }
1359 return true;
1360 }
1361
1362 template<typename... Args>
1363 bool contains(Args... args) const {
1364 return contains(std::vector<int>{args...});
1365 }
1366 // @}
1367
1368 /** Make a buffer which refers to the same data in the same layout
1369 * using a swapped indexing order for the dimensions given. So
1370 * A = B.transposed(0, 1) means that A(i, j) == B(j, i), and more
1371 * strongly that A.address_of(i, j) == B.address_of(j, i). */
1372 Buffer<T, D> transposed(int d1, int d2) const {
1373 Buffer<T, D> im = *this;
1374 im.transpose(d1, d2);
1375 return im;
1376 }
1377
1378 /** Transpose a buffer in-place by changing how it is indexed. For
1379 * example, transpose(0, 1) on a two-dimensional buffer means that
1380 * the value referred to by coordinates (i, j) is now reached at
1381 * the coordinates (j, i), and vice versa. This is done by
1382 * reordering the per-dimension metadata rather than by moving
1383 * data around in memory, so other views of the same memory will
1384 * not see the data as having been transposed. */
1385 void transpose(int d1, int d2) {
1386 assert(d1 >= 0 && d1 < this->dimensions());
1387 assert(d2 >= 0 && d2 < this->dimensions());
1388 std::swap(buf.dim[d1], buf.dim[d2]);
1389 }
1390
1391 /** A generalized transpose: instead of swapping two dimensions,
1392 * pass a vector that lists each dimension index exactly once, in
1393 * the desired order. This does not move any data around in memory
1394 * - it just permutes how it is indexed. */
1395 void transpose(const std::vector<int> &order) {
1396 assert((int)order.size() == dimensions());
1397 if (dimensions() < 2) {
1398 // My, that was easy
1399 return;
1400 }
1401
1402 std::vector<int> order_sorted = order;
1403 for (size_t i = 1; i < order_sorted.size(); i++) {
1404 for (size_t j = i; j > 0 && order_sorted[j - 1] > order_sorted[j]; j--) {
1405 std::swap(order_sorted[j], order_sorted[j - 1]);
1406 transpose(j, j - 1);
1407 }
1408 }
1409 }
1410
1411 /** Make a buffer which refers to the same data in the same
1412 * layout using a different ordering of the dimensions. */
1413 Buffer<T, D> transposed(const std::vector<int> &order) const {
1414 Buffer<T, D> im = *this;
1415 im.transpose(order);
1416 return im;
1417 }
1418
1419 /** Make a lower-dimensional buffer that refers to one slice of
1420 * this buffer. */
1421 Buffer<T, D> sliced(int d, int pos) const {
1422 Buffer<T, D> im = *this;
1423
1424 // This guarantees the prexisting device ref is dropped if the
1425 // device_slice call fails and maintains the buffer in a consistent
1426 // state.
1427 im.device_deallocate();
1428
1429 im.slice_host(d, pos);
1430 if (buf.device_interface != nullptr) {
1431 complete_device_slice(im, d, pos);
1432 }
1433 return im;
1434 }
1435
1436 /** Make a lower-dimensional buffer that refers to one slice of this
1437 * buffer at the dimension's minimum. */
1438 inline Buffer<T, D> sliced(int d) const {
1439 return sliced(d, dim(d).min());
1440 }
1441
1442 /** Rewrite the buffer to refer to a single lower-dimensional
1443 * slice of itself along the given dimension at the given
1444 * coordinate. Does not move any data around or free the original
1445 * memory, so other views of the same data are unaffected. */
1446 void slice(int d, int pos) {
1447 // An optimization for non-device buffers. For the device case,
1448 // a temp buffer is required, so reuse the not-in-place version.
1449 // TODO(zalman|abadams): Are nop slices common enough to special
1450 // case the device part of the if to do nothing?
1451 if (buf.device_interface != nullptr) {
1452 *this = sliced(d, pos);
1453 } else {
1454 slice_host(d, pos);
1455 }
1456 }
1457
1458 /** Slice a buffer in-place at the dimension's minimum. */
1459 inline void slice(int d) {
1460 slice(d, dim(d).min());
1461 }
1462
1463 /** Make a new buffer that views this buffer as a single slice in a
1464 * higher-dimensional space. The new dimension has extent one and
1465 * the given min. This operation is the opposite of slice. As an
1466 * example, the following condition is true:
1467 *
1468 \code
1469 im2 = im.embedded(1, 17);
1470 &im(x, y, c) == &im2(x, 17, y, c);
1471 \endcode
1472 */
1473 Buffer<T, D> embedded(int d, int pos = 0) const {
1474 Buffer<T, D> im(*this);
1475 im.embed(d, pos);
1476 return im;
1477 }
1478
1479 /** Embed a buffer in-place, increasing the
1480 * dimensionality. */
1481 void embed(int d, int pos = 0) {
1482 assert(d >= 0 && d <= dimensions());
1483 add_dimension();
1484 translate(dimensions() - 1, pos);
1485 for (int i = dimensions() - 1; i > d; i--) {
1486 transpose(i, i - 1);
1487 }
1488 }
1489
1490 /** Add a new dimension with a min of zero and an extent of
1491 * one. The stride is the extent of the outermost dimension times
1492 * its stride. The new dimension is the last dimension. This is a
1493 * special case of embed. */
1495 const int dims = buf.dimensions;
1496 buf.dimensions++;
1497 if (buf.dim != shape) {
1498 // We're already on the heap. Reallocate.
1499 halide_dimension_t *new_shape = new halide_dimension_t[buf.dimensions];
1500 for (int i = 0; i < dims; i++) {
1501 new_shape[i] = buf.dim[i];
1502 }
1503 delete[] buf.dim;
1504 buf.dim = new_shape;
1505 } else if (dims == D) {
1506 // Transition from the in-class storage to the heap
1507 make_shape_storage(buf.dimensions);
1508 for (int i = 0; i < dims; i++) {
1509 buf.dim[i] = shape[i];
1510 }
1511 } else {
1512 // We still fit in the class
1513 }
1514 buf.dim[dims] = {0, 1, 0};
1515 if (dims == 0) {
1516 buf.dim[dims].stride = 1;
1517 } else {
1518 buf.dim[dims].stride = buf.dim[dims - 1].extent * buf.dim[dims - 1].stride;
1519 }
1520 }
1521
1522 /** Add a new dimension with a min of zero, an extent of one, and
1523 * the specified stride. The new dimension is the last
1524 * dimension. This is a special case of embed. */
1526 add_dimension();
1527 buf.dim[buf.dimensions - 1].stride = s;
1528 }
1529
1530 /** Methods for managing any GPU allocation. */
1531 // @{
1532 // Set the host dirty flag. Called by every operator()
1533 // access. Must be inlined so it can be hoisted out of loops.
1535 void set_host_dirty(bool v = true) {
1536 assert((!v || !device_dirty()) && "Cannot set host dirty when device is already dirty. Call copy_to_host() before accessing the buffer from host.");
1537 buf.set_host_dirty(v);
1538 }
1539
1540 // Check if the device allocation is dirty. Called by
1541 // set_host_dirty, which is called by every accessor. Must be
1542 // inlined so it can be hoisted out of loops.
1544 bool device_dirty() const {
1545 return buf.device_dirty();
1546 }
1547
1548 bool host_dirty() const {
1549 return buf.host_dirty();
1550 }
1551
1552 void set_device_dirty(bool v = true) {
1553 assert((!v || !host_dirty()) && "Cannot set device dirty when host is already dirty.");
1554 buf.set_device_dirty(v);
1555 }
1556
1557 int copy_to_host(void *ctx = nullptr) {
1558 if (device_dirty()) {
1559 return buf.device_interface->copy_to_host(ctx, &buf);
1560 }
1561 return 0;
1562 }
1563
1564 int copy_to_device(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1565 if (host_dirty()) {
1566 return device_interface->copy_to_device(ctx, &buf, device_interface);
1567 }
1568 return 0;
1569 }
1570
1571 int device_malloc(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1572 return device_interface->device_malloc(ctx, &buf, device_interface);
1573 }
1574
1575 int device_free(void *ctx = nullptr) {
1576 if (dev_ref_count) {
1577 assert(dev_ref_count->ownership == BufferDeviceOwnership::Allocated &&
1578 "Can't call device_free on an unmanaged or wrapped native device handle. "
1579 "Free the source allocation or call device_detach_native instead.");
1580 // Multiple people may be holding onto this dev field
1581 assert(dev_ref_count->count == 1 &&
1582 "Multiple Halide::Runtime::Buffer objects share this device "
1583 "allocation. Freeing it would create dangling references. "
1584 "Don't call device_free on Halide buffers that you have copied or "
1585 "passed by value.");
1586 }
1587 int ret = 0;
1588 if (buf.device_interface) {
1589 ret = buf.device_interface->device_free(ctx, &buf);
1590 }
1591 if (dev_ref_count) {
1592 delete dev_ref_count;
1593 dev_ref_count = nullptr;
1594 }
1595 return ret;
1596 }
1597
1598 int device_wrap_native(const struct halide_device_interface_t *device_interface,
1599 uint64_t handle, void *ctx = nullptr) {
1600 assert(device_interface);
1601 dev_ref_count = new DeviceRefCount;
1603 return device_interface->wrap_native(ctx, &buf, handle, device_interface);
1604 }
1605
1606 int device_detach_native(void *ctx = nullptr) {
1607 assert(dev_ref_count &&
1609 "Only call device_detach_native on buffers wrapping a native "
1610 "device handle via device_wrap_native. This buffer was allocated "
1611 "using device_malloc, or is unmanaged. "
1612 "Call device_free or free the original allocation instead.");
1613 // Multiple people may be holding onto this dev field
1614 assert(dev_ref_count->count == 1 &&
1615 "Multiple Halide::Runtime::Buffer objects share this device "
1616 "allocation. Freeing it could create dangling references. "
1617 "Don't call device_detach_native on Halide buffers that you "
1618 "have copied or passed by value.");
1619 int ret = 0;
1620 if (buf.device_interface) {
1621 ret = buf.device_interface->detach_native(ctx, &buf);
1622 }
1623 delete dev_ref_count;
1624 dev_ref_count = nullptr;
1625 return ret;
1626 }
1627
1628 int device_and_host_malloc(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1629 return device_interface->device_and_host_malloc(ctx, &buf, device_interface);
1630 }
1631
1632 int device_and_host_free(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1633 if (dev_ref_count) {
1635 "Can't call device_and_host_free on a device handle not allocated with device_and_host_malloc. "
1636 "Free the source allocation or call device_detach_native instead.");
1637 // Multiple people may be holding onto this dev field
1638 assert(dev_ref_count->count == 1 &&
1639 "Multiple Halide::Runtime::Buffer objects share this device "
1640 "allocation. Freeing it would create dangling references. "
1641 "Don't call device_and_host_free on Halide buffers that you have copied or "
1642 "passed by value.");
1643 }
1644 int ret = 0;
1645 if (buf.device_interface) {
1646 ret = buf.device_interface->device_and_host_free(ctx, &buf);
1647 }
1648 if (dev_ref_count) {
1649 delete dev_ref_count;
1650 dev_ref_count = nullptr;
1651 }
1652 return ret;
1653 }
1654
1655 int device_sync(void *ctx = nullptr) {
1656 return buf.device_sync(ctx);
1657 }
1658
1660 return buf.device != 0;
1661 }
1662
1663 /** Return the method by which the device field is managed. */
1665 if (dev_ref_count == nullptr) {
1667 }
1668 return dev_ref_count->ownership;
1669 }
1670 // @}
1671
1672 /** If you use the (x, y, c) indexing convention, then Halide
1673 * Buffers are stored planar by default. This function constructs
1674 * an interleaved RGB or RGBA image that can still be indexed
1675 * using (x, y, c). Passing it to a generator requires that the
1676 * generator has been compiled with support for interleaved (also
1677 * known as packed or chunky) memory layouts. */
1680 // Note that this is equivalent to calling transpose({2, 0, 1}),
1681 // but slightly more efficient.
1682 im.transpose(0, 1);
1683 im.transpose(1, 2);
1684 return im;
1685 }
1686
1687 /** If you use the (x, y, c) indexing convention, then Halide
1688 * Buffers are stored planar by default. This function constructs
1689 * an interleaved RGB or RGBA image that can still be indexed
1690 * using (x, y, c). Passing it to a generator requires that the
1691 * generator has been compiled with support for interleaved (also
1692 * known as packed or chunky) memory layouts. */
1695 }
1696
1697 /** Wrap an existing interleaved image. */
1701 im.transpose(0, 1);
1702 im.transpose(1, 2);
1703 return im;
1704 }
1705
1706 /** Wrap an existing interleaved image. */
1709 }
1710
1711 /** Make a zero-dimensional Buffer */
1714 buf.slice(0, 0);
1715 return buf;
1716 }
1717
1718 /** Make a zero-dimensional Buffer */
1720 Buffer<T, 1> buf(1);
1721 buf.slice(0, 0);
1722 return buf;
1723 }
1724
1725 /** Make a zero-dimensional Buffer that points to non-owned, existing data */
1727 Buffer<T, 1> buf(data, 1);
1728 buf.slice(0, 0);
1729 return buf;
1730 }
1731
1732 /** Make a buffer with the same shape and memory nesting order as
1733 * another buffer. It may have a different type. */
1734 template<typename T2, int D2>
1736 void *(*allocate_fn)(size_t) = nullptr,
1737 void (*deallocate_fn)(void *) = nullptr) {
1738
1739 const halide_type_t dst_type = T_is_void ? src.type() : halide_type_of<typename std::remove_cv<not_void_T>::type>();
1740 return Buffer<>::make_with_shape_of_helper(dst_type, src.dimensions(), src.buf.dim,
1741 allocate_fn, deallocate_fn);
1742 }
1743
1744private:
1745 static Buffer<> make_with_shape_of_helper(halide_type_t dst_type,
1746 int dimensions,
1747 halide_dimension_t *shape,
1748 void *(*allocate_fn)(size_t),
1749 void (*deallocate_fn)(void *)) {
1750 // Reorder the dimensions of src to have strides in increasing order
1751 std::vector<int> swaps;
1752 for (int i = dimensions - 1; i > 0; i--) {
1753 for (int j = i; j > 0; j--) {
1754 if (shape[j - 1].stride > shape[j].stride) {
1755 std::swap(shape[j - 1], shape[j]);
1756 swaps.push_back(j);
1757 }
1758 }
1759 }
1760
1761 // Rewrite the strides to be dense (this messes up src, which
1762 // is why we took it by value).
1763 for (int i = 0; i < dimensions; i++) {
1764 if (i == 0) {
1765 shape[i].stride = 1;
1766 } else {
1767 shape[i].stride = shape[i - 1].extent * shape[i - 1].stride;
1768 }
1769 }
1770
1771 // Undo the dimension reordering
1772 while (!swaps.empty()) {
1773 int j = swaps.back();
1774 std::swap(shape[j - 1], shape[j]);
1775 swaps.pop_back();
1776 }
1777
1778 // Use an explicit runtime type, and make dst a Buffer<void>, to allow
1779 // using this method with Buffer<void> for either src or dst.
1780 Buffer<> dst(dst_type, nullptr, dimensions, shape);
1781 dst.allocate(allocate_fn, deallocate_fn);
1782
1783 return dst;
1784 }
1785
1786 template<typename... Args>
1788 ptrdiff_t
1789 offset_of(int d, int first, Args... rest) const {
1790 return offset_of(d + 1, rest...) + (ptrdiff_t)this->buf.dim[d].stride * (first - this->buf.dim[d].min);
1791 }
1792
1794 ptrdiff_t offset_of(int d) const {
1795 return 0;
1796 }
1797
1798 template<typename... Args>
1800 storage_T *
1801 address_of(Args... args) const {
1802 if (T_is_void) {
1803 return (storage_T *)(this->buf.host) + offset_of(0, args...) * type().bytes();
1804 } else {
1805 return (storage_T *)(this->buf.host) + offset_of(0, args...);
1806 }
1807 }
1808
1810 ptrdiff_t offset_of(const int *pos) const {
1811 ptrdiff_t offset = 0;
1812 for (int i = this->dimensions() - 1; i >= 0; i--) {
1813 offset += (ptrdiff_t)this->buf.dim[i].stride * (pos[i] - this->buf.dim[i].min);
1814 }
1815 return offset;
1816 }
1817
1819 storage_T *address_of(const int *pos) const {
1820 if (T_is_void) {
1821 return (storage_T *)this->buf.host + offset_of(pos) * type().bytes();
1822 } else {
1823 return (storage_T *)this->buf.host + offset_of(pos);
1824 }
1825 }
1826
1827public:
1828 /** Get a pointer to the address of the min coordinate. */
1829 T *data() const {
1830 return (T *)(this->buf.host);
1831 }
1832
1833 /** Access elements. Use im(...) to get a reference to an element,
1834 * and use &im(...) to get the address of an element. If you pass
1835 * fewer arguments than the buffer has dimensions, the rest are
1836 * treated as their min coordinate. The non-const versions set the
1837 * host_dirty flag to true.
1838 */
1839 //@{
1840 template<typename... Args,
1841 typename = typename std::enable_if<AllInts<Args...>::value>::type>
1842 HALIDE_ALWAYS_INLINE const not_void_T &operator()(int first, Args... rest) const {
1843 static_assert(!T_is_void,
1844 "Cannot use operator() on Buffer<void> types");
1845 assert(!device_dirty());
1846 return *((const not_void_T *)(address_of(first, rest...)));
1847 }
1848
1850 const not_void_T &
1851 operator()() const {
1852 static_assert(!T_is_void,
1853 "Cannot use operator() on Buffer<void> types");
1854 assert(!device_dirty());
1855 return *((const not_void_T *)(data()));
1856 }
1857
1859 const not_void_T &
1860 operator()(const int *pos) const {
1861 static_assert(!T_is_void,
1862 "Cannot use operator() on Buffer<void> types");
1863 assert(!device_dirty());
1864 return *((const not_void_T *)(address_of(pos)));
1865 }
1866
1867 template<typename... Args,
1868 typename = typename std::enable_if<AllInts<Args...>::value>::type>
1870 not_void_T &
1871 operator()(int first, Args... rest) {
1872 static_assert(!T_is_void,
1873 "Cannot use operator() on Buffer<void> types");
1875 return *((not_void_T *)(address_of(first, rest...)));
1876 }
1877
1879 not_void_T &
1881 static_assert(!T_is_void,
1882 "Cannot use operator() on Buffer<void> types");
1884 return *((not_void_T *)(data()));
1885 }
1886
1888 not_void_T &
1889 operator()(const int *pos) {
1890 static_assert(!T_is_void,
1891 "Cannot use operator() on Buffer<void> types");
1893 return *((not_void_T *)(address_of(pos)));
1894 }
1895 // @}
1896
1897 /** Tests that all values in this buffer are equal to val. */
1898 bool all_equal(not_void_T val) const {
1899 bool all_equal = true;
1900 for_each_element([&](const int *pos) { all_equal &= (*this)(pos) == val; });
1901 return all_equal;
1902 }
1903
1904 Buffer<T, D> &fill(not_void_T val) {
1906 for_each_value([=](T &v) { v = val; });
1907 return *this;
1908 }
1909
1910private:
1911 /** Helper functions for for_each_value. */
1912 // @{
1913 template<int N>
1914 struct for_each_value_task_dim {
1917 };
1918
1919 // Given an array of strides, and a bunch of pointers to pointers
1920 // (all of different types), advance the pointers using the
1921 // strides.
1922 template<typename Ptr, typename... Ptrs>
1923 HALIDE_ALWAYS_INLINE static void advance_ptrs(const std::ptrdiff_t *stride, Ptr &ptr, Ptrs &...ptrs) {
1924 ptr += *stride;
1925 advance_ptrs(stride + 1, ptrs...);
1926 }
1927
1929 static void advance_ptrs(const std::ptrdiff_t *) {
1930 }
1931
1932 template<typename Fn, typename Ptr, typename... Ptrs>
1933 HALIDE_NEVER_INLINE static void for_each_value_helper(Fn &&f, int d, bool innermost_strides_are_one,
1934 const for_each_value_task_dim<sizeof...(Ptrs) + 1> *t, Ptr ptr, Ptrs... ptrs) {
1935 if (d == 0) {
1936 if (innermost_strides_are_one) {
1937 Ptr end = ptr + t[0].extent;
1938 while (ptr != end) {
1939 f(*ptr++, (*ptrs++)...);
1940 }
1941 } else {
1942 for (std::ptrdiff_t i = t[0].extent; i != 0; i--) {
1943 f(*ptr, (*ptrs)...);
1944 advance_ptrs(t[0].stride, ptr, ptrs...);
1945 }
1946 }
1947 } else {
1948 for (std::ptrdiff_t i = t[d].extent; i != 0; i--) {
1949 for_each_value_helper(f, d - 1, innermost_strides_are_one, t, ptr, ptrs...);
1950 advance_ptrs(t[d].stride, ptr, ptrs...);
1951 }
1952 }
1953 }
1954
1955 template<int N>
1956 HALIDE_NEVER_INLINE static bool for_each_value_prep(for_each_value_task_dim<N> *t,
1957 const halide_buffer_t **buffers) {
1958 // Check the buffers all have clean host allocations
1959 for (int i = 0; i < N; i++) {
1960 if (buffers[i]->device) {
1961 assert(buffers[i]->host &&
1962 "Buffer passed to for_each_value has device allocation but no host allocation. Call allocate() and copy_to_host() first");
1963 assert(!buffers[i]->device_dirty() &&
1964 "Buffer passed to for_each_value is dirty on device. Call copy_to_host() first");
1965 } else {
1966 assert(buffers[i]->host &&
1967 "Buffer passed to for_each_value has no host or device allocation");
1968 }
1969 }
1970
1971 const int dimensions = buffers[0]->dimensions;
1972
1973 // Extract the strides in all the dimensions
1974 for (int i = 0; i < dimensions; i++) {
1975 for (int j = 0; j < N; j++) {
1976 assert(buffers[j]->dimensions == dimensions);
1977 assert(buffers[j]->dim[i].extent == buffers[0]->dim[i].extent &&
1978 buffers[j]->dim[i].min == buffers[0]->dim[i].min);
1979 const int s = buffers[j]->dim[i].stride;
1980 t[i].stride[j] = s;
1981 }
1982 t[i].extent = buffers[0]->dim[i].extent;
1983
1984 // Order the dimensions by stride, so that the traversal is cache-coherent.
1985 // Use the last dimension for this, because this is the source in copies.
1986 // It appears to be better to optimize read order than write order.
1987 for (int j = i; j > 0 && t[j].stride[N - 1] < t[j - 1].stride[N - 1]; j--) {
1988 std::swap(t[j], t[j - 1]);
1989 }
1990 }
1991
1992 // flatten dimensions where possible to make a larger inner
1993 // loop for autovectorization.
1994 int d = dimensions;
1995 for (int i = 1; i < d; i++) {
1996 bool flat = true;
1997 for (int j = 0; j < N; j++) {
1998 flat = flat && t[i - 1].stride[j] * t[i - 1].extent == t[i].stride[j];
1999 }
2000 if (flat) {
2001 t[i - 1].extent *= t[i].extent;
2002 for (int j = i; j < d; j++) {
2003 t[j] = t[j + 1];
2004 }
2005 i--;
2006 d--;
2007 t[d].extent = 1;
2008 }
2009 }
2010
2011 bool innermost_strides_are_one = true;
2012 if (dimensions > 0) {
2013 for (int i = 0; i < N; i++) {
2014 innermost_strides_are_one &= (t[0].stride[i] == 1);
2015 }
2016 }
2017
2018 return innermost_strides_are_one;
2019 }
2020
2021 template<typename Fn, typename... Args, int N = sizeof...(Args) + 1>
2022 void for_each_value_impl(Fn &&f, Args &&...other_buffers) const {
2023 if (dimensions() > 0) {
2024 Buffer<>::for_each_value_task_dim<N> *t =
2025 (Buffer<>::for_each_value_task_dim<N> *)HALIDE_ALLOCA((dimensions() + 1) * sizeof(for_each_value_task_dim<N>));
2026 // Move the preparatory code into a non-templated helper to
2027 // save code size.
2028 const halide_buffer_t *buffers[] = {&buf, (&other_buffers.buf)...};
2029 bool innermost_strides_are_one = Buffer<>::for_each_value_prep(t, buffers);
2030
2031 Buffer<>::for_each_value_helper(f, dimensions() - 1,
2032 innermost_strides_are_one,
2033 t,
2034 data(), (other_buffers.data())...);
2035 } else {
2036 f(*data(), (*other_buffers.data())...);
2037 }
2038 }
2039 // @}
2040
2041public:
2042 /** Call a function on every value in the buffer, and the
2043 * corresponding values in some number of other buffers of the
2044 * same size. The function should take a reference, const
2045 * reference, or value of the correct type for each buffer. This
2046 * effectively lifts a function of scalars to an element-wise
2047 * function of buffers. This produces code that the compiler can
2048 * autovectorize. This is slightly cheaper than for_each_element,
2049 * because it does not need to track the coordinates.
2050 *
2051 * Note that constness of Buffers is preserved: a const Buffer<T> (for either
2052 * 'this' or the other-buffers arguments) will allow mutation of the
2053 * buffer contents, while a Buffer<const T> will not. Attempting to specify
2054 * a mutable reference for the lambda argument of a Buffer<const T>
2055 * will result in a compilation error. */
2056 // @{
2057 template<typename Fn, typename... Args, int N = sizeof...(Args) + 1>
2058 HALIDE_ALWAYS_INLINE const Buffer<T, D> &for_each_value(Fn &&f, Args &&...other_buffers) const {
2059 for_each_value_impl(f, std::forward<Args>(other_buffers)...);
2060 return *this;
2061 }
2062
2063 template<typename Fn, typename... Args, int N = sizeof...(Args) + 1>
2065 Buffer<T, D> &
2066 for_each_value(Fn &&f, Args &&...other_buffers) {
2067 for_each_value_impl(f, std::forward<Args>(other_buffers)...);
2068 return *this;
2069 }
2070 // @}
2071
2072private:
2073 // Helper functions for for_each_element
2074 struct for_each_element_task_dim {
2075 int min, max;
2076 };
2077
2078 /** If f is callable with this many args, call it. The first
2079 * argument is just to make the overloads distinct. Actual
2080 * overload selection is done using the enable_if. */
2081 template<typename Fn,
2082 typename... Args,
2083 typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2084 HALIDE_ALWAYS_INLINE static void for_each_element_variadic(int, int, const for_each_element_task_dim *, Fn &&f, Args... args) {
2085 f(args...);
2086 }
2087
2088 /** If the above overload is impossible, we add an outer loop over
2089 * an additional argument and try again. */
2090 template<typename Fn,
2091 typename... Args>
2092 HALIDE_ALWAYS_INLINE static void for_each_element_variadic(double, int d, const for_each_element_task_dim *t, Fn &&f, Args... args) {
2093 for (int i = t[d].min; i <= t[d].max; i++) {
2094 for_each_element_variadic(0, d - 1, t, std::forward<Fn>(f), i, args...);
2095 }
2096 }
2097
2098 /** Determine the minimum number of arguments a callable can take
2099 * using the same trick. */
2100 template<typename Fn,
2101 typename... Args,
2102 typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2103 HALIDE_ALWAYS_INLINE static int num_args(int, Fn &&, Args...) {
2104 return (int)(sizeof...(Args));
2105 }
2106
2107 /** The recursive version is only enabled up to a recursion limit
2108 * of 256. This catches callables that aren't callable with any
2109 * number of ints. */
2110 template<typename Fn,
2111 typename... Args>
2112 HALIDE_ALWAYS_INLINE static int num_args(double, Fn &&f, Args... args) {
2113 static_assert(sizeof...(args) <= 256,
2114 "Callable passed to for_each_element must accept either a const int *,"
2115 " or up to 256 ints. No such operator found. Expect infinite template recursion.");
2116 return num_args(0, std::forward<Fn>(f), 0, args...);
2117 }
2118
2119 /** A version where the callable takes a position array instead,
2120 * with compile-time recursion on the dimensionality. This
2121 * overload is preferred to the one below using the same int vs
2122 * double trick as above, but is impossible once d hits -1 using
2123 * std::enable_if. */
2124 template<int d,
2125 typename Fn,
2126 typename = typename std::enable_if<(d >= 0)>::type>
2127 HALIDE_ALWAYS_INLINE static void for_each_element_array_helper(int, const for_each_element_task_dim *t, Fn &&f, int *pos) {
2128 for (pos[d] = t[d].min; pos[d] <= t[d].max; pos[d]++) {
2129 for_each_element_array_helper<d - 1>(0, t, std::forward<Fn>(f), pos);
2130 }
2131 }
2132
2133 /** Base case for recursion above. */
2134 template<int d,
2135 typename Fn,
2136 typename = typename std::enable_if<(d < 0)>::type>
2137 HALIDE_ALWAYS_INLINE static void for_each_element_array_helper(double, const for_each_element_task_dim *t, Fn &&f, int *pos) {
2138 f(pos);
2139 }
2140
2141 /** A run-time-recursive version (instead of
2142 * compile-time-recursive) that requires the callable to take a
2143 * pointer to a position array instead. Dispatches to the
2144 * compile-time-recursive version once the dimensionality gets
2145 * small. */
2146 template<typename Fn>
2147 static void for_each_element_array(int d, const for_each_element_task_dim *t, Fn &&f, int *pos) {
2148 if (d == -1) {
2149 f(pos);
2150 } else if (d == 0) {
2151 // Once the dimensionality gets small enough, dispatch to
2152 // a compile-time-recursive version for better codegen of
2153 // the inner loops.
2154 for_each_element_array_helper<0, Fn>(0, t, std::forward<Fn>(f), pos);
2155 } else if (d == 1) {
2156 for_each_element_array_helper<1, Fn>(0, t, std::forward<Fn>(f), pos);
2157 } else if (d == 2) {
2158 for_each_element_array_helper<2, Fn>(0, t, std::forward<Fn>(f), pos);
2159 } else if (d == 3) {
2160 for_each_element_array_helper<3, Fn>(0, t, std::forward<Fn>(f), pos);
2161 } else {
2162 for (pos[d] = t[d].min; pos[d] <= t[d].max; pos[d]++) {
2163 for_each_element_array(d - 1, t, std::forward<Fn>(f), pos);
2164 }
2165 }
2166 }
2167
2168 /** We now have two overloads for for_each_element. This one
2169 * triggers if the callable takes a const int *.
2170 */
2171 template<typename Fn,
2172 typename = decltype(std::declval<Fn>()((const int *)nullptr))>
2173 static void for_each_element(int, int dims, const for_each_element_task_dim *t, Fn &&f, int check = 0) {
2174 int *pos = (int *)HALIDE_ALLOCA(dims * sizeof(int));
2175 for_each_element_array(dims - 1, t, std::forward<Fn>(f), pos);
2176 }
2177
2178 /** This one triggers otherwise. It treats the callable as
2179 * something that takes some number of ints. */
2180 template<typename Fn>
2181 HALIDE_ALWAYS_INLINE static void for_each_element(double, int dims, const for_each_element_task_dim *t, Fn &&f) {
2182 int args = num_args(0, std::forward<Fn>(f));
2183 assert(dims >= args);
2184 for_each_element_variadic(0, args - 1, t, std::forward<Fn>(f));
2185 }
2186
2187 template<typename Fn>
2188 void for_each_element_impl(Fn &&f) const {
2189 for_each_element_task_dim *t =
2190 (for_each_element_task_dim *)HALIDE_ALLOCA(dimensions() * sizeof(for_each_element_task_dim));
2191 for (int i = 0; i < dimensions(); i++) {
2192 t[i].min = dim(i).min();
2193 t[i].max = dim(i).max();
2194 }
2195 for_each_element(0, dimensions(), t, std::forward<Fn>(f));
2196 }
2197
2198public:
2199 /** Call a function at each site in a buffer. This is likely to be
2200 * much slower than using Halide code to populate a buffer, but is
2201 * convenient for tests. If the function has more arguments than the
2202 * buffer has dimensions, the remaining arguments will be zero. If it
2203 * has fewer arguments than the buffer has dimensions then the last
2204 * few dimensions of the buffer are not iterated over. For example,
2205 * the following code exploits this to set a floating point RGB image
2206 * to red:
2207
2208 \code
2209 Buffer<float, 3> im(100, 100, 3);
2210 im.for_each_element([&](int x, int y) {
2211 im(x, y, 0) = 1.0f;
2212 im(x, y, 1) = 0.0f;
2213 im(x, y, 2) = 0.0f:
2214 });
2215 \endcode
2216
2217 * The compiled code is equivalent to writing the a nested for loop,
2218 * and compilers are capable of optimizing it in the same way.
2219 *
2220 * If the callable can be called with an int * as the sole argument,
2221 * that version is called instead. Each location in the buffer is
2222 * passed to it in a coordinate array. This version is higher-overhead
2223 * than the variadic version, but is useful for writing generic code
2224 * that accepts buffers of arbitrary dimensionality. For example, the
2225 * following sets the value at all sites in an arbitrary-dimensional
2226 * buffer to their first coordinate:
2227
2228 \code
2229 im.for_each_element([&](const int *pos) {im(pos) = pos[0];});
2230 \endcode
2231
2232 * It is also possible to use for_each_element to iterate over entire
2233 * rows or columns by cropping the buffer to a single column or row
2234 * respectively and iterating over elements of the result. For example,
2235 * to set the diagonal of the image to 1 by iterating over the columns:
2236
2237 \code
2238 Buffer<float, 3> im(100, 100, 3);
2239 im.sliced(1, 0).for_each_element([&](int x, int c) {
2240 im(x, x, c) = 1.0f;
2241 });
2242 \endcode
2243
2244 * Or, assuming the memory layout is known to be dense per row, one can
2245 * memset each row of an image like so:
2246
2247 \code
2248 Buffer<float, 3> im(100, 100, 3);
2249 im.sliced(0, 0).for_each_element([&](int y, int c) {
2250 memset(&im(0, y, c), 0, sizeof(float) * im.width());
2251 });
2252 \endcode
2253
2254 */
2255 // @{
2256 template<typename Fn>
2258 for_each_element_impl(f);
2259 return *this;
2260 }
2261
2262 template<typename Fn>
2264 Buffer<T, D> &
2266 for_each_element_impl(f);
2267 return *this;
2268 }
2269 // @}
2270
2271private:
2272 template<typename Fn>
2273 struct FillHelper {
2274 Fn f;
2276
2277 template<typename... Args,
2278 typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2279 void operator()(Args... args) {
2280 (*buf)(args...) = f(args...);
2281 }
2282
2283 FillHelper(Fn &&f, Buffer<T, D> *buf)
2284 : f(std::forward<Fn>(f)), buf(buf) {
2285 }
2286 };
2287
2288public:
2289 /** Fill a buffer by evaluating a callable at every site. The
2290 * callable should look much like a callable passed to
2291 * for_each_element, but it should return the value that should be
2292 * stored to the coordinate corresponding to the arguments. */
2293 template<typename Fn,
2294 typename = typename std::enable_if<!std::is_arithmetic<typename std::decay<Fn>::type>::value>::type>
2296 // We'll go via for_each_element. We need a variadic wrapper lambda.
2297 FillHelper<Fn> wrapper(std::forward<Fn>(f), this);
2298 return for_each_element(wrapper);
2299 }
2300
2301 /** Check if an input buffer passed extern stage is a querying
2302 * bounds. Compared to doing the host pointer check directly,
2303 * this both adds clarity to code and will facilitate moving to
2304 * another representation for bounds query arguments. */
2305 bool is_bounds_query() const {
2306 return buf.is_bounds_query();
2307 }
2308
2309 /** Convenient check to verify that all of the interesting bytes in the Buffer
2310 * are initialized under MSAN. Note that by default, we use for_each_value() here so that
2311 * we skip any unused padding that isn't part of the Buffer; this isn't efficient,
2312 * but in MSAN mode, it doesn't matter. (Pass true for the flag to force check
2313 * the entire Buffer storage.) */
2314 void msan_check_mem_is_initialized(bool entire = false) const {
2315#if defined(__has_feature)
2316#if __has_feature(memory_sanitizer)
2317 if (entire) {
2318 __msan_check_mem_is_initialized(data(), size_in_bytes());
2319 } else {
2320 for_each_value([](T &v) { __msan_check_mem_is_initialized(&v, sizeof(T)); ; });
2321 }
2322#endif
2323#endif
2324 }
2325};
2326
2327} // namespace Runtime
2328} // namespace Halide
2329
2330#undef HALIDE_ALLOCA
2331
2332#endif // HALIDE_RUNTIME_IMAGE_H
#define HALIDE_ALLOCA
Definition: HalideBuffer.h:30
This file declares the routines used by Halide internally in its runtime.
#define HALIDE_NEVER_INLINE
Definition: HalideRuntime.h:39
#define HALIDE_ALWAYS_INLINE
Definition: HalideRuntime.h:38
struct halide_buffer_t halide_buffer_t
The raw representation of an image passed around by generated Halide code.
A Halide::Buffer is a named shared reference to a Halide::Runtime::Buffer.
Definition: Buffer.h:115
Read-only access to the shape.
Definition: HalideBuffer.h:438
HALIDE_ALWAYS_INLINE int stride() const
The number of elements in memory you have to step over to increment this coordinate by one.
Definition: HalideBuffer.h:449
HALIDE_ALWAYS_INLINE int extent() const
The extent of the image along this dimension.
Definition: HalideBuffer.h:454
HALIDE_ALWAYS_INLINE iterator begin() const
An iterator that points to the min coordinate.
Definition: HalideBuffer.h:480
Dimension(const halide_dimension_t &dim)
Definition: HalideBuffer.h:489
HALIDE_ALWAYS_INLINE int min() const
The lowest coordinate in this dimension.
Definition: HalideBuffer.h:443
HALIDE_ALWAYS_INLINE int max() const
The highest coordinate in this dimension.
Definition: HalideBuffer.h:459
HALIDE_ALWAYS_INLINE iterator end() const
An iterator that points to one past the max coordinate.
Definition: HalideBuffer.h:485
A templated Buffer class that wraps halide_buffer_t and adds functionality.
Definition: HalideBuffer.h:131
Buffer(halide_type_t t, const std::vector< int > &sizes)
Allocate a new image of unknown type using a vector of ints as the size.
Definition: HalideBuffer.h:837
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, int d, const halide_dimension_t *shape)
Initialize an Buffer from a pointer to the min coordinate and an array describing the shape.
Definition: HalideBuffer.h:951
int width() const
Conventional names for the first three dimensions.
static Buffer< void, D > make_interleaved(halide_type_t t, int width, int height, int channels)
If you use the (x, y, c) indexing convention, then Halide Buffers are stored planar by default.
void translate(int d, int delta)
Translate an image in-place along one dimension by changing how it is indexed.
int dimensions() const
Get the dimensionality of the buffer.
Definition: HalideBuffer.h:520
bool owns_host_memory() const
Does this Buffer own the host memory it refers to?
Definition: HalideBuffer.h:179
halide_buffer_t * raw_buffer()
Get a pointer to the raw halide_buffer_t this wraps.
Buffer< T, D > & operator=(const Buffer< T, D > &other)
Standard assignment operator.
Definition: HalideBuffer.h:677
Buffer(halide_type_t t, const std::vector< int > &sizes, const std::vector< int > &storage_order)
Allocate a new image of unknown type using a vector of ints as the size and a vector of indices indic...
Definition: HalideBuffer.h:871
Buffer< T, D > & operator=(Buffer< T, D > &&other) noexcept
Standard move-assignment operator.
Definition: HalideBuffer.h:711
Buffer(Buffer< T, D > &&other) noexcept
Move constructor.
Definition: HalideBuffer.h:632
HALIDE_ALWAYS_INLINE Buffer< T, D > & for_each_value(Fn &&f, Args &&...other_buffers)
Buffer(const Buffer< T2, D2 > &other)
Construct a Buffer from a Buffer of different dimensionality and type.
Definition: HalideBuffer.h:622
Buffer(int first)
Allocate a new image of the given size.
Definition: HalideBuffer.h:805
Buffer(T *data, const std::vector< halide_dimension_t > &shape)
Initialize a Buffer from a pointer to the min coordinate and a vector describing the shape.
Definition: HalideBuffer.h:986
int extent(int i) const
Definition: HalideBuffer.h:505
Buffer(Buffer< T2, D2 > &&other)
Move-construct a Buffer from a Buffer of different dimensionality and type.
Definition: HalideBuffer.h:646
Buffer< T, D > embedded(int d, int pos=0) const
Make a new buffer that views this buffer as a single slice in a higher-dimensional space.
Buffer< T, D > translated(const std::vector< int > &delta) const
Make an image which refers to the same data translated along the first N dimensions.
void slice(int d, int pos)
Rewrite the buffer to refer to a single lower-dimensional slice of itself along the given dimension a...
void transpose(const std::vector< int > &order)
A generalized transpose: instead of swapping two dimensions, pass a vector that lists each dimension ...
Buffer< T, D > cropped(const std::vector< std::pair< int, int > > &rect) const
Make an image that refers to a sub-rectangle of this image along the first N dimensions.
int device_detach_native(void *ctx=nullptr)
int device_free(void *ctx=nullptr)
Buffer< T, D > & fill(not_void_T val)
Buffer(T *data, const std::vector< int > &sizes)
Initialize an Buffer from a pointer and a vector of sizes.
Definition: HalideBuffer.h:927
void set_device_dirty(bool v=true)
static Buffer< add_const_if_T_is_const< void >, D > make_interleaved(halide_type_t t, T *data, int width, int height, int channels)
Wrap an existing interleaved image.
void copy_from(Buffer< T2, D2 > src)
Fill a Buffer with the values at the same coordinates in another Buffer.
HALIDE_ALWAYS_INLINE Buffer< T2, D > & as() &
Return a typed reference to this Buffer.
static void assert_can_convert_from(const Buffer< T2, D2 > &other)
Fail an assertion at runtime or compile-time if an Buffer<T, D> cannot be constructed from some other...
Definition: HalideBuffer.h:598
void translate(const std::vector< int > &delta)
Translate an image along the first N dimensions by changing how it is indexed.
Buffer< not_const_T, D > copy_to_planar(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Like copy(), but the copy is created in planar memory layout (vs.
Buffer(T *data, int first, Args &&...rest)
Initialize an Buffer from a pointer and some sizes.
Definition: HalideBuffer.h:914
Buffer< T, D > transposed(int d1, int d2) const
Make a buffer which refers to the same data in the same layout using a swapped indexing order for the...
Buffer(const Buffer< T, D > &other)
Copy constructor.
Definition: HalideBuffer.h:607
friend class Buffer
Give Buffers access to the members of Buffers of different dimensionalities and types.
Definition: HalideBuffer.h:569
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, int first, Args &&...rest)
Initialize an Buffer of runtime type from a pointer and some sizes.
Definition: HalideBuffer.h:897
void crop(int d, int min, int extent)
Crop an image in-place along the given dimension.
static Buffer< T, D > make_scalar(T *data)
Make a zero-dimensional Buffer that points to non-owned, existing data.
Buffer(const std::vector< int > &sizes)
Allocate a new image of known type using a vector of ints as the size.
Definition: HalideBuffer.h:851
Buffer< T, D > & operator=(const Buffer< T2, D2 > &other)
Assign from another Buffer of possibly-different dimensionality and type.
Definition: HalideBuffer.h:661
HALIDE_ALWAYS_INLINE const not_void_T & operator()(int first, Args... rest) const
Access elements.
HALIDE_ALWAYS_INLINE Buffer< typename std::add_const< T >::type, D > & as_const() &
as_const() is syntactic sugar for .as<const T>(), to avoid the need to recapitulate the type argument...
void crop(const std::vector< std::pair< int, int > > &rect)
Crop an image in-place along the first N dimensions.
bool all_equal(not_void_T val) const
Tests that all values in this buffer are equal to val.
int device_wrap_native(const struct halide_device_interface_t *device_interface, uint64_t handle, void *ctx=nullptr)
void deallocate()
Drop reference to any owned host or device memory, possibly freeing it, if this buffer held the last ...
Definition: HalideBuffer.h:767
static Buffer< add_const_if_T_is_const< void >, D > make_scalar(halide_type_t t)
Make a zero-dimensional Buffer.
int left() const
Conventional names for the min and max value of each dimension.
Buffer< T, D > & fill(Fn &&f)
Fill a buffer by evaluating a callable at every site.
HALIDE_ALWAYS_INLINE void set_host_dirty(bool v=true)
Methods for managing any GPU allocation.
static constexpr bool has_static_halide_type
True if the Halide type is not void (or const void).
Definition: HalideBuffer.h:170
T * begin() const
A pointer to the element with the lowest address.
Definition: HalideBuffer.h:531
Buffer(const std::vector< int > &sizes, const std::vector< int > &storage_order)
Definition: HalideBuffer.h:876
bool has_device_allocation() const
void allocate(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr)
Allocate memory for this Buffer.
Definition: HalideBuffer.h:740
int device_and_host_malloc(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
int copy_to_device(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
static halide_type_t static_halide_type()
Get the Halide type of T.
Definition: HalideBuffer.h:174
Buffer(const halide_buffer_t &buf, BufferDeviceOwnership ownership=BufferDeviceOwnership::Unmanaged)
Make a Buffer from a halide_buffer_t.
Definition: HalideBuffer.h:561
Buffer(Array(&vals)[N])
Make an Buffer that refers to a statically sized array.
Definition: HalideBuffer.h:883
HALIDE_ALWAYS_INLINE bool device_dirty() const
Buffer(T *data, int d, const halide_dimension_t *shape)
Initialize an Buffer from a pointer to the min coordinate and an array describing the shape.
Definition: HalideBuffer.h:974
size_t number_of_elements() const
The total number of elements this buffer represents.
Definition: HalideBuffer.h:515
T * end() const
A pointer to one beyond the element with the highest address.
Definition: HalideBuffer.h:537
halide_type_t type() const
Get the type of the elements.
Definition: HalideBuffer.h:525
int stride(int i) const
Definition: HalideBuffer.h:508
static Buffer< T, D > make_interleaved(int width, int height, int channels)
If you use the (x, y, c) indexing convention, then Halide Buffers are stored planar by default.
void transpose(int d1, int d2)
Transpose a buffer in-place by changing how it is indexed.
HALIDE_ALWAYS_INLINE Buffer< T, D > & for_each_element(Fn &&f)
int device_and_host_free(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
HALIDE_ALWAYS_INLINE const Buffer< T, D > & for_each_element(Fn &&f) const
Call a function at each site in a buffer.
Buffer< T, D > & operator=(Buffer< T2, D2 > &&other)
Move from another Buffer of possibly-different dimensionality and type.
Definition: HalideBuffer.h:696
static bool can_convert_from(const Buffer< T2, D2 > &other)
Determine if if an Buffer<T, D> can be constructed from some other Buffer type.
Definition: HalideBuffer.h:587
HALIDE_ALWAYS_INLINE not_void_T & operator()()
HALIDE_ALWAYS_INLINE const Buffer< T, D > & for_each_value(Fn &&f, Args &&...other_buffers) const
Call a function on every value in the buffer, and the corresponding values in some number of other bu...
static Buffer< T, D > make_scalar()
Make a zero-dimensional Buffer.
void add_dimension()
Add a new dimension with a min of zero and an extent of one.
void msan_check_mem_is_initialized(bool entire=false) const
Convenient check to verify that all of the interesting bytes in the Buffer are initialized under MSAN...
HALIDE_ALWAYS_INLINE not_void_T & operator()(int first, Args... rest)
void slice(int d)
Slice a buffer in-place at the dimension's minimum.
static Buffer< T, D > make_interleaved(T *data, int width, int height, int channels)
Wrap an existing interleaved image.
void check_overflow()
Check the product of the extents fits in memory.
Definition: HalideBuffer.h:725
void reset()
Reset the Buffer to be equivalent to a default-constructed Buffer of the same static type (if any); B...
Definition: HalideBuffer.h:550
HALIDE_ALWAYS_INLINE Buffer< typename std::add_const< T >::type, D > as_const() &&
T * data() const
Get a pointer to the address of the min coordinate.
Buffer< T, D > translated(int d, int dx) const
Make an image which refers to the same data with using translated coordinates in the given dimension.
HALIDE_ALWAYS_INLINE Buffer< T2, D > as() &&
Returns this rval Buffer with a different type attached.
static Buffer< T, D > make_with_shape_of(Buffer< T2, D2 > src, void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr)
Make a buffer with the same shape and memory nesting order as another buffer.
size_t size_in_bytes() const
The total number of bytes spanned by the data in memory.
Definition: HalideBuffer.h:543
Buffer< not_const_T, D > copy_to_interleaved(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Like copy(), but the copy is created in interleaved memory layout (vs.
bool contains(Args... args) const
HALIDE_ALWAYS_INLINE const not_void_T & operator()(const int *pos) const
HALIDE_ALWAYS_INLINE Dimension dim(int i) const
Access the shape of the buffer.
Definition: HalideBuffer.h:495
int device_malloc(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, const std::vector< halide_dimension_t > &shape)
Initialize a Buffer from a pointer to the min coordinate and a vector describing the shape.
Definition: HalideBuffer.h:966
Buffer< T, D > alias() const
Make a copy of the Buffer which shares the underlying host and/or device allocations as the existing ...
Buffer(halide_type_t t, int first, Args... rest)
Allocate a new image of the given size with a runtime type.
Definition: HalideBuffer.h:784
int device_sync(void *ctx=nullptr)
Buffer< not_const_T, D > copy(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Make a new image which is a deep copy of this image.
const halide_buffer_t * raw_buffer() const
BufferDeviceOwnership device_ownership() const
Return the method by which the device field is managed.
bool is_bounds_query() const
Check if an input buffer passed extern stage is a querying bounds.
Buffer< T, D > sliced(int d) const
Make a lower-dimensional buffer that refers to one slice of this buffer at the dimension's minimum.
void embed(int d, int pos=0)
Embed a buffer in-place, increasing the dimensionality.
Buffer(int first, int second, Args... rest)
Definition: HalideBuffer.h:821
Buffer< T, D > transposed(const std::vector< int > &order) const
Make a buffer which refers to the same data in the same layout using a different ordering of the dime...
Buffer< T, D > sliced(int d, int pos) const
Make a lower-dimensional buffer that refers to one slice of this buffer.
void add_dimension_with_stride(int s)
Add a new dimension with a min of zero, an extent of one, and the specified stride.
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, const std::vector< int > &sizes)
Initialize an Buffer of runtime type from a pointer and a vector of sizes.
Definition: HalideBuffer.h:938
HALIDE_ALWAYS_INLINE not_void_T & operator()(const int *pos)
Buffer< T, D > cropped(int d, int min, int extent) const
Make an image that refers to a sub-range of this image along the given dimension.
HALIDE_ALWAYS_INLINE const Buffer< T2, D > & as() const &
Return a const typed reference to this Buffer.
HALIDE_ALWAYS_INLINE const Buffer< typename std::add_const< T >::type, D > & as_const() const &
int copy_to_host(void *ctx=nullptr)
bool contains(const std::vector< int > &coords) const
Test if a given coordinate is within the bounds of an image.
void set_min(Args... args)
int min(int i) const
Access to the mins, strides, extents.
Definition: HalideBuffer.h:502
void device_deallocate()
Drop reference to any owned device memory, possibly freeing it if this buffer held the last reference...
Definition: HalideBuffer.h:774
HALIDE_ALWAYS_INLINE const not_void_T & operator()() const
void set_min(const std::vector< int > &mins)
Set the min coordinate of an image in the first N dimensions.
bool any_zero(const Container &c)
Definition: HalideBuffer.h:70
BufferDeviceOwnership
This indicates how to deallocate the device for a Halide::Runtime::Buffer.
Definition: HalideBuffer.h:93
@ AllocatedDeviceAndHost
No free routine will be called when device ref count goes to zero
@ WrappedNative
halide_device_free will be called when device ref count goes to zero
@ Unmanaged
halide_device_detach_native will be called when device ref count goes to zero
@ Cropped
Call device_and_host_free when DevRefCount goes to zero.
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
@ Internal
Not visible externally, similar to 'static' linkage in C.
Expr min(const FuncRef &a, const FuncRef &b)
Explicit overloads of min and max for FuncRef.
Definition: Func.h:595
Expr max(const FuncRef &a, const FuncRef &b)
Definition: Func.h:598
char * buf
Definition: printer.h:32
char * dst
Definition: printer.h:32
unsigned __INT64_TYPE__ uint64_t
void * malloc(size_t)
unsigned __INT8_TYPE__ uint8_t
__PTRDIFF_TYPE__ ptrdiff_t
unsigned __INT16_TYPE__ uint16_t
void * memcpy(void *s1, const void *s2, size_t n)
unsigned __INT32_TYPE__ uint32_t
void free(void *)
A struct acting as a header for allocations owned by the Buffer class itself.
Definition: HalideBuffer.h:82
AllocationHeader(void(*deallocate_fn)(void *))
Definition: HalideBuffer.h:87
An iterator class, so that you can iterate over coordinates in a dimensions using a range-based for l...
Definition: HalideBuffer.h:465
bool operator!=(const iterator &other) const
Definition: HalideBuffer.h:470
A similar struct for managing device allocations.
Definition: HalideBuffer.h:102
BufferDeviceOwnership ownership
Definition: HalideBuffer.h:106
The raw representation of an image passed around by generated Halide code.
int32_t dimensions
The dimensionality of the buffer.
halide_dimension_t * dim
The shape of the buffer.
uint64_t device
A device-handle for e.g.
uint8_t * host
A pointer to the start of the data in main memory.
Each GPU API provides a halide_device_interface_t struct pointing to the code that manages device all...
int(* device_and_host_malloc)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
int(* wrap_native)(void *user_context, struct halide_buffer_t *buf, uint64_t handle, const struct halide_device_interface_t *device_interface)
int(* copy_to_device)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
int(* device_malloc)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
A runtime tag for a type in the halide type system.