OpenShot Library | libopenshot 0.6.0
Loading...
Searching...
No Matches
FFmpegReader.cpp
Go to the documentation of this file.
1
12// Copyright (c) 2008-2024 OpenShot Studios, LLC, Fabrice Bellard
13//
14// SPDX-License-Identifier: LGPL-3.0-or-later
15
16#include <thread> // for std::this_thread::sleep_for
17#include <chrono> // for std::chrono::milliseconds
18#include <algorithm>
19#include <cmath>
20#include <sstream>
21#include <unistd.h>
22
23#include "FFmpegUtilities.h"
24#include "effects/CropHelpers.h"
25
26#include "FFmpegReader.h"
27#include "Exceptions.h"
28#include "MemoryTrim.h"
29#include "Timeline.h"
30#include "ZmqLogger.h"
31
32#define ENABLE_VAAPI 0
33
34#if USE_HW_ACCEL
35#define MAX_SUPPORTED_WIDTH 1950
36#define MAX_SUPPORTED_HEIGHT 1100
37
38#if ENABLE_VAAPI
39#include "libavutil/hwcontext_vaapi.h"
40
41typedef struct VAAPIDecodeContext {
42 VAProfile va_profile;
43 VAEntrypoint va_entrypoint;
44 VAConfigID va_config;
45 VAContextID va_context;
46
47#if FF_API_STRUCT_VAAPI_CONTEXT
48 // FF_DISABLE_DEPRECATION_WARNINGS
49 int have_old_context;
50 struct vaapi_context *old_context;
51 AVBufferRef *device_ref;
52 // FF_ENABLE_DEPRECATION_WARNINGS
53#endif
54
55 AVHWDeviceContext *device;
56 AVVAAPIDeviceContext *hwctx;
57
58 AVHWFramesContext *frames;
59 AVVAAPIFramesContext *hwfc;
60
61 enum AVPixelFormat surface_format;
62 int surface_count;
63 } VAAPIDecodeContext;
64#endif // ENABLE_VAAPI
65#endif // USE_HW_ACCEL
66
67
68using namespace openshot;
69
70int hw_de_on = 0;
71#if USE_HW_ACCEL
72 AVPixelFormat hw_de_av_pix_fmt_global = AV_PIX_FMT_NONE;
73 AVHWDeviceType hw_de_av_device_type_global = AV_HWDEVICE_TYPE_NONE;
74#endif
75
76// Normalize deprecated JPEG-range YUVJ formats before creating swscale contexts.
77// swscale expects non-YUVJ formats plus explicit color-range metadata.
78static AVPixelFormat NormalizeDeprecatedPixFmt(AVPixelFormat pix_fmt, bool& is_full_range) {
79 switch (pix_fmt) {
80 case AV_PIX_FMT_YUVJ420P:
81 is_full_range = true;
82 return AV_PIX_FMT_YUV420P;
83 case AV_PIX_FMT_YUVJ422P:
84 is_full_range = true;
85 return AV_PIX_FMT_YUV422P;
86 case AV_PIX_FMT_YUVJ444P:
87 is_full_range = true;
88 return AV_PIX_FMT_YUV444P;
89 case AV_PIX_FMT_YUVJ440P:
90 is_full_range = true;
91 return AV_PIX_FMT_YUV440P;
92#ifdef AV_PIX_FMT_YUVJ411P
93 case AV_PIX_FMT_YUVJ411P:
94 is_full_range = true;
95 return AV_PIX_FMT_YUV411P;
96#endif
97 default:
98 return pix_fmt;
99 }
100}
101
102FFmpegReader::FFmpegReader(const std::string &path, bool inspect_reader)
103 : FFmpegReader(path, DurationStrategy::VideoPreferred, inspect_reader) {}
104
105FFmpegReader::FFmpegReader(const std::string &path, DurationStrategy duration_strategy, bool inspect_reader)
106 : last_frame(0), is_seeking(0), seeking_pts(0), seeking_frame(0), seek_count(0), NO_PTS_OFFSET(-99999),
107 path(path), is_video_seek(true), check_interlace(false), check_fps(false), enable_seek(true), is_open(false),
108 seek_audio_frame_found(0), seek_video_frame_found(0),
109 last_seek_max_frame(-1), seek_stagnant_count(0),
110 is_duration_known(false), largest_frame_processed(0),
111 current_video_frame(0), packet(NULL), duration_strategy(duration_strategy),
112 audio_pts(0), video_pts(0), pFormatCtx(NULL), videoStream(-1), audioStream(-1), pCodecCtx(NULL), aCodecCtx(NULL),
113 pStream(NULL), aStream(NULL), pFrame(NULL), previous_packet_location{-1,0},
114 hold_packet(false) {
115
116 // Initialize FFMpeg, and register all formats and codecs
119
120 // Init timestamp offsets
121 pts_offset_seconds = NO_PTS_OFFSET;
122 video_pts_seconds = NO_PTS_OFFSET;
123 audio_pts_seconds = NO_PTS_OFFSET;
124
125 // Init cache
126 const int init_working_cache_frames = std::max(Settings::Instance()->CACHE_MIN_FRAMES, OPEN_MP_NUM_PROCESSORS * 4);
127 const int init_final_cache_frames = std::max(Settings::Instance()->CACHE_MIN_FRAMES, OPEN_MP_NUM_PROCESSORS * 4);
128 working_cache.SetMaxBytesFromInfo(init_working_cache_frames, info.width, info.height, info.sample_rate, info.channels);
130
131 // Open and Close the reader, to populate its attributes (such as height, width, etc...)
132 if (inspect_reader) {
133 Open();
134 Close();
135 }
136}
137
139 if (is_open)
140 // Auto close reader if not already done
141 Close();
142}
143
144// This struct holds the associated video frame and starting sample # for an audio packet.
145bool AudioLocation::is_near(AudioLocation location, int samples_per_frame, int64_t amount) {
146 // Is frame even close to this one?
147 if (abs(location.frame - frame) >= 2)
148 // This is too far away to be considered
149 return false;
150
151 // Note that samples_per_frame can vary slightly frame to frame when the
152 // audio sampling rate is not an integer multiple of the video fps.
153 int64_t diff = samples_per_frame * (location.frame - frame) + location.sample_start - sample_start;
154 if (abs(diff) <= amount)
155 // close
156 return true;
157
158 // not close
159 return false;
160}
161
162#if USE_HW_ACCEL
163
164// Get hardware pix format
165static enum AVPixelFormat get_hw_dec_format(AVCodecContext *ctx, const enum AVPixelFormat *pix_fmts)
166{
167 const enum AVPixelFormat *p;
168
169 // Prefer only the format matching the selected hardware decoder
171
172 for (p = pix_fmts; *p != AV_PIX_FMT_NONE; p++) {
173 switch (*p) {
174#if defined(__linux__)
175 // Linux pix formats
176 case AV_PIX_FMT_VAAPI:
177 if (selected == 1) {
178 hw_de_av_pix_fmt_global = AV_PIX_FMT_VAAPI;
179 hw_de_av_device_type_global = AV_HWDEVICE_TYPE_VAAPI;
180 return *p;
181 }
182 break;
183 case AV_PIX_FMT_VDPAU:
184 if (selected == 6) {
185 hw_de_av_pix_fmt_global = AV_PIX_FMT_VDPAU;
186 hw_de_av_device_type_global = AV_HWDEVICE_TYPE_VDPAU;
187 return *p;
188 }
189 break;
190#endif
191#if defined(_WIN32)
192 // Windows pix formats
193 case AV_PIX_FMT_DXVA2_VLD:
194 if (selected == 3) {
195 hw_de_av_pix_fmt_global = AV_PIX_FMT_DXVA2_VLD;
196 hw_de_av_device_type_global = AV_HWDEVICE_TYPE_DXVA2;
197 return *p;
198 }
199 break;
200 case AV_PIX_FMT_D3D11:
201 if (selected == 4) {
202 hw_de_av_pix_fmt_global = AV_PIX_FMT_D3D11;
203 hw_de_av_device_type_global = AV_HWDEVICE_TYPE_D3D11VA;
204 return *p;
205 }
206 break;
207#endif
208#if defined(__APPLE__)
209 // Apple pix formats
210 case AV_PIX_FMT_VIDEOTOOLBOX:
211 if (selected == 5) {
212 hw_de_av_pix_fmt_global = AV_PIX_FMT_VIDEOTOOLBOX;
213 hw_de_av_device_type_global = AV_HWDEVICE_TYPE_VIDEOTOOLBOX;
214 return *p;
215 }
216 break;
217#endif
218 // Cross-platform pix formats
219 case AV_PIX_FMT_CUDA:
220 if (selected == 2) {
221 hw_de_av_pix_fmt_global = AV_PIX_FMT_CUDA;
222 hw_de_av_device_type_global = AV_HWDEVICE_TYPE_CUDA;
223 return *p;
224 }
225 break;
226 case AV_PIX_FMT_QSV:
227 if (selected == 7) {
228 hw_de_av_pix_fmt_global = AV_PIX_FMT_QSV;
229 hw_de_av_device_type_global = AV_HWDEVICE_TYPE_QSV;
230 return *p;
231 }
232 break;
233 default:
234 // This is only here to silence unused-enum warnings
235 break;
236 }
237 }
238 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::get_hw_dec_format (Unable to decode this file using hardware decode)");
239 return AV_PIX_FMT_NONE;
240}
241
242int FFmpegReader::IsHardwareDecodeSupported(int codecid)
243{
244 int ret;
245 switch (codecid) {
246 case AV_CODEC_ID_H264:
247 case AV_CODEC_ID_MPEG2VIDEO:
248 case AV_CODEC_ID_VC1:
249 case AV_CODEC_ID_WMV1:
250 case AV_CODEC_ID_WMV2:
251 case AV_CODEC_ID_WMV3:
252 ret = 1;
253 break;
254 default :
255 ret = 0;
256 break;
257 }
258 return ret;
259}
260#endif // USE_HW_ACCEL
261
263 // Open reader if not already open
264 if (!is_open) {
265 // Prevent async calls to the following code
266 const std::lock_guard<std::recursive_mutex> lock(getFrameMutex);
267
268 // Initialize format context
269 pFormatCtx = NULL;
270 {
271 hw_de_on = (!force_sw_decode && openshot::Settings::Instance()->HARDWARE_DECODER != 0 ? 1 : 0);
272 hw_decode_failed = false;
273 hw_decode_error_count = 0;
274 hw_decode_succeeded = false;
275 ZmqLogger::Instance()->AppendDebugMethod("Decode hardware acceleration settings", "hw_de_on", hw_de_on, "HARDWARE_DECODER", openshot::Settings::Instance()->HARDWARE_DECODER);
276 }
277
278 // Open video file
279 if (avformat_open_input(&pFormatCtx, path.c_str(), NULL, NULL) != 0)
280 throw InvalidFile("FFmpegReader could not open media file.", path);
281
282 // Retrieve stream information
283 if (avformat_find_stream_info(pFormatCtx, NULL) < 0)
284 throw NoStreamsFound("No streams found in file.", path);
285
286 videoStream = -1;
287 audioStream = -1;
288
289 // Init end-of-file detection variables
290 packet_status.reset(true);
291
292 // Loop through each stream, and identify the video and audio stream index
293 for (unsigned int i = 0; i < pFormatCtx->nb_streams; i++) {
294 // Is this a video stream?
295 if (AV_GET_CODEC_TYPE(pFormatCtx->streams[i]) == AVMEDIA_TYPE_VIDEO && videoStream < 0) {
296 videoStream = i;
297 packet_status.video_eof = false;
298 packet_status.packets_eof = false;
299 packet_status.end_of_file = false;
300 }
301 // Is this an audio stream?
302 if (AV_GET_CODEC_TYPE(pFormatCtx->streams[i]) == AVMEDIA_TYPE_AUDIO && audioStream < 0) {
303 audioStream = i;
304 packet_status.audio_eof = false;
305 packet_status.packets_eof = false;
306 packet_status.end_of_file = false;
307 }
308 }
309 if (videoStream == -1 && audioStream == -1)
310 throw NoStreamsFound("No video or audio streams found in this file.", path);
311
312 // Is there a video stream?
313 if (videoStream != -1) {
314 // Set the stream index
315 info.video_stream_index = videoStream;
316
317 // Set the codec and codec context pointers
318 pStream = pFormatCtx->streams[videoStream];
319
320 // Find the codec ID from stream
321 const AVCodecID codecId = AV_FIND_DECODER_CODEC_ID(pStream);
322
323 // Get codec and codec context from stream
324 const AVCodec *pCodec = avcodec_find_decoder(codecId);
325 AVDictionary *opts = NULL;
326 int retry_decode_open = 2;
327 // If hw accel is selected but hardware cannot handle repeat with software decoding
328 do {
329 pCodecCtx = AV_GET_CODEC_CONTEXT(pStream, pCodec);
330#if USE_HW_ACCEL
331 if (hw_de_on && (retry_decode_open==2)) {
332 // Up to here no decision is made if hardware or software decode
333 hw_de_supported = IsHardwareDecodeSupported(pCodecCtx->codec_id);
334 }
335#endif
336 retry_decode_open = 0;
337
338 // Set number of threads equal to number of processors (not to exceed 16)
339 pCodecCtx->thread_count = std::min(FF_VIDEO_NUM_PROCESSORS, 16);
340
341 if (pCodec == NULL) {
342 throw InvalidCodec("A valid video codec could not be found for this file.", path);
343 }
344
345 // Init options
346 av_dict_set(&opts, "strict", "experimental", 0);
347#if USE_HW_ACCEL
348 if (hw_de_on && hw_de_supported) {
349 // Open Hardware Acceleration
350 int i_decoder_hw = 0;
351 char adapter[256];
352 char *adapter_ptr = NULL;
353 int adapter_num;
355 ZmqLogger::Instance()->AppendDebugMethod("Hardware decoding device number", "adapter_num", adapter_num);
356
357 // Set hardware pix format (callback)
358 pCodecCtx->get_format = get_hw_dec_format;
359
360 if (adapter_num < 3 && adapter_num >=0) {
361#if defined(__linux__)
362 snprintf(adapter,sizeof(adapter),"/dev/dri/renderD%d", adapter_num+128);
363 adapter_ptr = adapter;
365 switch (i_decoder_hw) {
366 case 1:
367 hw_de_av_device_type = AV_HWDEVICE_TYPE_VAAPI;
368 break;
369 case 2:
370 hw_de_av_device_type = AV_HWDEVICE_TYPE_CUDA;
371 break;
372 case 6:
373 hw_de_av_device_type = AV_HWDEVICE_TYPE_VDPAU;
374 break;
375 case 7:
376 hw_de_av_device_type = AV_HWDEVICE_TYPE_QSV;
377 break;
378 default:
379 hw_de_av_device_type = AV_HWDEVICE_TYPE_VAAPI;
380 break;
381 }
382
383#elif defined(_WIN32)
384 adapter_ptr = NULL;
386 switch (i_decoder_hw) {
387 case 2:
388 hw_de_av_device_type = AV_HWDEVICE_TYPE_CUDA;
389 break;
390 case 3:
391 hw_de_av_device_type = AV_HWDEVICE_TYPE_DXVA2;
392 break;
393 case 4:
394 hw_de_av_device_type = AV_HWDEVICE_TYPE_D3D11VA;
395 break;
396 case 7:
397 hw_de_av_device_type = AV_HWDEVICE_TYPE_QSV;
398 break;
399 default:
400 hw_de_av_device_type = AV_HWDEVICE_TYPE_DXVA2;
401 break;
402 }
403#elif defined(__APPLE__)
404 adapter_ptr = NULL;
406 switch (i_decoder_hw) {
407 case 5:
408 hw_de_av_device_type = AV_HWDEVICE_TYPE_VIDEOTOOLBOX;
409 break;
410 case 7:
411 hw_de_av_device_type = AV_HWDEVICE_TYPE_QSV;
412 break;
413 default:
414 hw_de_av_device_type = AV_HWDEVICE_TYPE_VIDEOTOOLBOX;
415 break;
416 }
417#endif
418
419 } else {
420 adapter_ptr = NULL; // Just to be sure
421 }
422
423 // Check if it is there and writable
424#if defined(__linux__)
425 if( adapter_ptr != NULL && access( adapter_ptr, W_OK ) == 0 ) {
426#elif defined(_WIN32)
427 if( adapter_ptr != NULL ) {
428#elif defined(__APPLE__)
429 if( adapter_ptr != NULL ) {
430#endif
431 ZmqLogger::Instance()->AppendDebugMethod("Decode Device present using device");
432 }
433 else {
434 adapter_ptr = NULL; // use default
435 ZmqLogger::Instance()->AppendDebugMethod("Decode Device not present using default");
436 }
437
438 hw_device_ctx = NULL;
439 // Here the first hardware initialisations are made
440 if (av_hwdevice_ctx_create(&hw_device_ctx, hw_de_av_device_type, adapter_ptr, NULL, 0) >= 0) {
441 const char* hw_name = av_hwdevice_get_type_name(hw_de_av_device_type);
442 std::string hw_msg = "HW decode active: ";
443 hw_msg += (hw_name ? hw_name : "unknown");
444 ZmqLogger::Instance()->Log(hw_msg);
445 if (!(pCodecCtx->hw_device_ctx = av_buffer_ref(hw_device_ctx))) {
446 throw InvalidCodec("Hardware device reference create failed.", path);
447 }
448
449 /*
450 av_buffer_unref(&ist->hw_frames_ctx);
451 ist->hw_frames_ctx = av_hwframe_ctx_alloc(hw_device_ctx);
452 if (!ist->hw_frames_ctx) {
453 av_log(avctx, AV_LOG_ERROR, "Error creating a CUDA frames context\n");
454 return AVERROR(ENOMEM);
455 }
456
457 frames_ctx = (AVHWFramesContext*)ist->hw_frames_ctx->data;
458
459 frames_ctx->format = AV_PIX_FMT_CUDA;
460 frames_ctx->sw_format = avctx->sw_pix_fmt;
461 frames_ctx->width = avctx->width;
462 frames_ctx->height = avctx->height;
463
464 av_log(avctx, AV_LOG_DEBUG, "Initializing CUDA frames context: sw_format = %s, width = %d, height = %d\n",
465 av_get_pix_fmt_name(frames_ctx->sw_format), frames_ctx->width, frames_ctx->height);
466
467
468 ret = av_hwframe_ctx_init(pCodecCtx->hw_device_ctx);
469 ret = av_hwframe_ctx_init(ist->hw_frames_ctx);
470 if (ret < 0) {
471 av_log(avctx, AV_LOG_ERROR, "Error initializing a CUDA frame pool\n");
472 return ret;
473 }
474 */
475 }
476 else {
477 ZmqLogger::Instance()->Log("HW decode active: no (falling back to software)");
478 throw InvalidCodec("Hardware device create failed.", path);
479 }
480 }
481#endif // USE_HW_ACCEL
482
483 // Disable per-frame threading for album arts
484 // Using FF_THREAD_FRAME adds one frame decoding delay per thread,
485 // but there's only one frame in this case.
486 if (HasAlbumArt())
487 {
488 pCodecCtx->thread_type &= ~FF_THREAD_FRAME;
489 }
490
491 // Open video codec
492 int avcodec_return = avcodec_open2(pCodecCtx, pCodec, &opts);
493 if (avcodec_return < 0) {
494 std::stringstream avcodec_error_msg;
495 avcodec_error_msg << "A video codec was found, but could not be opened. Error: " << av_err2string(avcodec_return);
496 throw InvalidCodec(avcodec_error_msg.str(), path);
497 }
498
499#if USE_HW_ACCEL
500 if (hw_de_on && hw_de_supported) {
501 AVHWFramesConstraints *constraints = NULL;
502 void *hwconfig = NULL;
503 hwconfig = av_hwdevice_hwconfig_alloc(hw_device_ctx);
504
505// TODO: needs va_config!
506#if ENABLE_VAAPI
507 ((AVVAAPIHWConfig *)hwconfig)->config_id = ((VAAPIDecodeContext *)(pCodecCtx->priv_data))->va_config;
508 constraints = av_hwdevice_get_hwframe_constraints(hw_device_ctx,hwconfig);
509#endif // ENABLE_VAAPI
510 if (constraints) {
511 if (pCodecCtx->coded_width < constraints->min_width ||
512 pCodecCtx->coded_height < constraints->min_height ||
513 pCodecCtx->coded_width > constraints->max_width ||
514 pCodecCtx->coded_height > constraints->max_height) {
515 ZmqLogger::Instance()->AppendDebugMethod("DIMENSIONS ARE TOO LARGE for hardware acceleration\n");
516 hw_de_supported = 0;
517 retry_decode_open = 1;
518 AV_FREE_CONTEXT(pCodecCtx);
519 if (hw_device_ctx) {
520 av_buffer_unref(&hw_device_ctx);
521 hw_device_ctx = NULL;
522 }
523 }
524 else {
525 // All is just peachy
526 ZmqLogger::Instance()->AppendDebugMethod("\nDecode hardware acceleration is used\n", "Min width :", constraints->min_width, "Min Height :", constraints->min_height, "MaxWidth :", constraints->max_width, "MaxHeight :", constraints->max_height, "Frame width :", pCodecCtx->coded_width, "Frame height :", pCodecCtx->coded_height);
527 retry_decode_open = 0;
528 }
529 av_hwframe_constraints_free(&constraints);
530 if (hwconfig) {
531 av_freep(&hwconfig);
532 }
533 }
534 else {
535 int max_h, max_w;
536 //max_h = ((getenv( "LIMIT_HEIGHT_MAX" )==NULL) ? MAX_SUPPORTED_HEIGHT : atoi(getenv( "LIMIT_HEIGHT_MAX" )));
538 //max_w = ((getenv( "LIMIT_WIDTH_MAX" )==NULL) ? MAX_SUPPORTED_WIDTH : atoi(getenv( "LIMIT_WIDTH_MAX" )));
540 ZmqLogger::Instance()->AppendDebugMethod("Constraints could not be found using default limit\n");
541 //cerr << "Constraints could not be found using default limit\n";
542 if (pCodecCtx->coded_width < 0 ||
543 pCodecCtx->coded_height < 0 ||
544 pCodecCtx->coded_width > max_w ||
545 pCodecCtx->coded_height > max_h ) {
546 ZmqLogger::Instance()->AppendDebugMethod("DIMENSIONS ARE TOO LARGE for hardware acceleration\n", "Max Width :", max_w, "Max Height :", max_h, "Frame width :", pCodecCtx->coded_width, "Frame height :", pCodecCtx->coded_height);
547 hw_de_supported = 0;
548 retry_decode_open = 1;
549 AV_FREE_CONTEXT(pCodecCtx);
550 if (hw_device_ctx) {
551 av_buffer_unref(&hw_device_ctx);
552 hw_device_ctx = NULL;
553 }
554 }
555 else {
556 ZmqLogger::Instance()->AppendDebugMethod("\nDecode hardware acceleration is used\n", "Max Width :", max_w, "Max Height :", max_h, "Frame width :", pCodecCtx->coded_width, "Frame height :", pCodecCtx->coded_height);
557 retry_decode_open = 0;
558 }
559 }
560 } // if hw_de_on && hw_de_supported
561 else {
562 ZmqLogger::Instance()->AppendDebugMethod("\nDecode in software is used\n");
563 }
564#else
565 retry_decode_open = 0;
566#endif // USE_HW_ACCEL
567 } while (retry_decode_open); // retry_decode_open
568 // Free options
569 av_dict_free(&opts);
570
571 // Update the File Info struct with video details (if a video stream is found)
572 UpdateVideoInfo();
573 }
574
575 // Is there an audio stream?
576 if (audioStream != -1) {
577 // Set the stream index
578 info.audio_stream_index = audioStream;
579
580 // Get a pointer to the codec context for the audio stream
581 aStream = pFormatCtx->streams[audioStream];
582
583 // Find the codec ID from stream
584 AVCodecID codecId = AV_FIND_DECODER_CODEC_ID(aStream);
585
586 // Get codec and codec context from stream
587 const AVCodec *aCodec = avcodec_find_decoder(codecId);
588 aCodecCtx = AV_GET_CODEC_CONTEXT(aStream, aCodec);
589
590 // Audio encoding does not typically use more than 2 threads (most codecs use 1 thread)
591 aCodecCtx->thread_count = std::min(FF_AUDIO_NUM_PROCESSORS, 2);
592
593 bool audio_opened = false;
594 if (aCodec != NULL) {
595 // Init options
596 AVDictionary *opts = NULL;
597 av_dict_set(&opts, "strict", "experimental", 0);
598
599 // Open audio codec
600 audio_opened = (avcodec_open2(aCodecCtx, aCodec, &opts) >= 0);
601
602 // Free options
603 av_dict_free(&opts);
604 }
605
606 if (audio_opened) {
607 // Update the File Info struct with audio details (if an audio stream is found)
608 UpdateAudioInfo();
609
610 // Disable malformed audio stream metadata (prevents divide-by-zero / invalid resampling math)
611 const bool invalid_audio_info =
612 (info.channels <= 0) ||
613 (info.sample_rate <= 0) ||
614 (info.audio_timebase.num <= 0) ||
615 (info.audio_timebase.den <= 0) ||
616 (aCodecCtx->sample_fmt == AV_SAMPLE_FMT_NONE);
617 if (invalid_audio_info) {
619 "FFmpegReader::Open (Disable invalid audio stream)",
620 "channels", info.channels,
621 "sample_rate", info.sample_rate,
622 "audio_timebase.num", info.audio_timebase.num,
623 "audio_timebase.den", info.audio_timebase.den,
624 "sample_fmt", static_cast<int>(aCodecCtx ? aCodecCtx->sample_fmt : AV_SAMPLE_FMT_NONE));
625 info.has_audio = false;
627 audioStream = -1;
628 packet_status.audio_eof = true;
629 if (aCodecCtx) {
630 if (avcodec_is_open(aCodecCtx)) {
631 avcodec_flush_buffers(aCodecCtx);
632 }
633 AV_FREE_CONTEXT(aCodecCtx);
634 aCodecCtx = nullptr;
635 }
636 aStream = nullptr;
637 }
638 } else {
639 // Keep decoding video, but disable bad/unsupported audio stream.
641 "FFmpegReader::Open (Audio codec unavailable; disabling audio)",
642 "audioStream", audioStream);
643 info.has_audio = false;
645 audioStream = -1;
646 packet_status.audio_eof = true;
647 if (aCodecCtx) {
648 AV_FREE_CONTEXT(aCodecCtx);
649 aCodecCtx = nullptr;
650 }
651 aStream = nullptr;
652 }
653 }
654
655 // Guard invalid frame-rate / timebase values from malformed streams.
656 if (info.fps.num <= 0 || info.fps.den <= 0) {
658 "FFmpegReader::Open (Invalid FPS detected; applying fallback)",
659 "fps.num", info.fps.num,
660 "fps.den", info.fps.den);
661 info.fps.num = 30;
662 info.fps.den = 1;
663 }
664 if (info.video_timebase.num <= 0 || info.video_timebase.den <= 0) {
666 "FFmpegReader::Open (Invalid video_timebase detected; applying fallback)",
667 "video_timebase.num", info.video_timebase.num,
668 "video_timebase.den", info.video_timebase.den);
670 }
671
672 // Add format metadata (if any)
673 AVDictionaryEntry *tag = NULL;
674 while ((tag = av_dict_get(pFormatCtx->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
675 QString str_key = tag->key;
676 QString str_value = tag->value;
677 info.metadata[str_key.toStdString()] = str_value.trimmed().toStdString();
678 }
679
680 // Process video stream side data (rotation, spherical metadata, etc)
681 for (unsigned int i = 0; i < pFormatCtx->nb_streams; i++) {
682 AVStream* st = pFormatCtx->streams[i];
683 if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
684 // Only inspect the first video stream
685 for (int j = 0; j < st->nb_side_data; j++) {
686 AVPacketSideData *sd = &st->side_data[j];
687
688 // Handle rotation metadata (unchanged)
689 if (sd->type == AV_PKT_DATA_DISPLAYMATRIX &&
690 sd->size >= 9 * sizeof(int32_t) &&
691 !info.metadata.count("rotate"))
692 {
693 double rotation = -av_display_rotation_get(
694 reinterpret_cast<int32_t *>(sd->data));
695 if (std::isnan(rotation)) rotation = 0;
696 info.metadata["rotate"] = std::to_string(rotation);
697 }
698 // Handle spherical video metadata
699 else if (sd->type == AV_PKT_DATA_SPHERICAL) {
700 // Always mark as spherical
701 info.metadata["spherical"] = "1";
702
703 // Cast the raw bytes to an AVSphericalMapping
704 const AVSphericalMapping* map =
705 reinterpret_cast<const AVSphericalMapping*>(sd->data);
706
707 // Projection enum → string
708 const char* proj_name = av_spherical_projection_name(map->projection);
709 info.metadata["spherical_projection"] = proj_name
710 ? proj_name
711 : "unknown";
712
713 // Convert 16.16 fixed-point to float degrees
714 auto to_deg = [](int32_t v){
715 return (double)v / 65536.0;
716 };
717 info.metadata["spherical_yaw"] = std::to_string(to_deg(map->yaw));
718 info.metadata["spherical_pitch"] = std::to_string(to_deg(map->pitch));
719 info.metadata["spherical_roll"] = std::to_string(to_deg(map->roll));
720 }
721 }
722 break;
723 }
724 }
725
726 // Init previous audio location to zero
727 previous_packet_location.frame = -1;
728 previous_packet_location.sample_start = 0;
729
730 // Adjust cache size based on size of frame and audio
731 const int working_cache_frames = std::max(Settings::Instance()->CACHE_MIN_FRAMES, int(OPEN_MP_NUM_PROCESSORS * info.fps.ToDouble() * 2));
732 const int final_cache_frames = std::max(Settings::Instance()->CACHE_MIN_FRAMES, OPEN_MP_NUM_PROCESSORS * 2);
733 working_cache.SetMaxBytesFromInfo(working_cache_frames, info.width, info.height, info.sample_rate, info.channels);
735
736 // Scan PTS for any offsets (i.e. non-zero starting streams). At least 1 stream must start at zero timestamp.
737 // This method allows us to shift timestamps to ensure at least 1 stream is starting at zero.
738 UpdatePTSOffset();
739
740 // Override an invalid framerate
741 if (info.fps.ToFloat() > 240.0f || (info.fps.num <= 0 || info.fps.den <= 0) || info.video_length <= 0) {
742 // Calculate FPS, duration, video bit rate, and video length manually
743 // by scanning through all the video stream packets
744 CheckFPS();
745 }
746
747 // Mark as "open"
748 is_open = true;
749
750 // Seek back to beginning of file (if not already seeking)
751 if (!is_seeking) {
752 Seek(1);
753 }
754 }
755}
756
758 // Close all objects, if reader is 'open'
759 if (is_open) {
760 // Prevent async calls to the following code
761 const std::lock_guard<std::recursive_mutex> lock(getFrameMutex);
762
763 // Mark as "closed"
764 is_open = false;
765
766 // Keep track of most recent packet
767 AVPacket *recent_packet = packet;
768
769 // Drain any packets from the decoder
770 packet = NULL;
771 int attempts = 0;
772 int max_attempts = 128;
773 while (packet_status.packets_decoded() < packet_status.packets_read() && attempts < max_attempts) {
774 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::Close (Drain decoder loop)",
775 "packets_read", packet_status.packets_read(),
776 "packets_decoded", packet_status.packets_decoded(),
777 "attempts", attempts);
778 if (packet_status.video_decoded < packet_status.video_read) {
779 ProcessVideoPacket(info.video_length);
780 }
781 if (packet_status.audio_decoded < packet_status.audio_read) {
782 ProcessAudioPacket(info.video_length);
783 }
784 attempts++;
785 }
786
787 // Remove packet
788 if (recent_packet) {
789 RemoveAVPacket(recent_packet);
790 }
791
792 // Close the video codec
793 if (info.has_video) {
794 if(avcodec_is_open(pCodecCtx)) {
795 avcodec_flush_buffers(pCodecCtx);
796 }
797 AV_FREE_CONTEXT(pCodecCtx);
798#if USE_HW_ACCEL
799 if (hw_de_on) {
800 if (hw_device_ctx) {
801 av_buffer_unref(&hw_device_ctx);
802 hw_device_ctx = NULL;
803 }
804 }
805#endif // USE_HW_ACCEL
806 if (img_convert_ctx) {
807 sws_freeContext(img_convert_ctx);
808 img_convert_ctx = nullptr;
809 }
810 if (pFrameRGB_cached) {
811 AV_FREE_FRAME(&pFrameRGB_cached);
812 }
813 }
814
815 // Close the audio codec
816 if (info.has_audio) {
817 if(avcodec_is_open(aCodecCtx)) {
818 avcodec_flush_buffers(aCodecCtx);
819 }
820 AV_FREE_CONTEXT(aCodecCtx);
821 if (avr_ctx) {
822 SWR_CLOSE(avr_ctx);
823 SWR_FREE(&avr_ctx);
824 avr_ctx = nullptr;
825 }
826 }
827
828 // Clear final cache
830 working_cache.Clear();
831
832 // Close the video file
833 avformat_close_input(&pFormatCtx);
834 av_freep(&pFormatCtx);
835
836 // Do not trim here; trimming is handled on explicit cache clears
837
838 // Reset some variables
839 last_frame = 0;
840 hold_packet = false;
841 largest_frame_processed = 0;
842 seek_audio_frame_found = 0;
843 seek_video_frame_found = 0;
844 current_video_frame = 0;
845 last_video_frame.reset();
846 last_final_video_frame.reset();
847 }
848}
849
850bool FFmpegReader::HasAlbumArt() {
851 // Check if the video stream we use is an attached picture
852 // This won't return true if the file has a cover image as a secondary stream
853 // like an MKV file with an attached image file
854 return pFormatCtx && videoStream >= 0 && pFormatCtx->streams[videoStream]
855 && (pFormatCtx->streams[videoStream]->disposition & AV_DISPOSITION_ATTACHED_PIC);
856}
857
858double FFmpegReader::PickDurationSeconds() const {
859 auto has_value = [](double value) { return value > 0.0; };
860
861 switch (duration_strategy) {
863 if (has_value(video_stream_duration_seconds))
864 return video_stream_duration_seconds;
865 if (has_value(audio_stream_duration_seconds))
866 return audio_stream_duration_seconds;
867 if (has_value(format_duration_seconds))
868 return format_duration_seconds;
869 break;
871 if (has_value(audio_stream_duration_seconds))
872 return audio_stream_duration_seconds;
873 if (has_value(video_stream_duration_seconds))
874 return video_stream_duration_seconds;
875 if (has_value(format_duration_seconds))
876 return format_duration_seconds;
877 break;
879 default:
880 {
881 double longest = 0.0;
882 if (has_value(video_stream_duration_seconds))
883 longest = std::max(longest, video_stream_duration_seconds);
884 if (has_value(audio_stream_duration_seconds))
885 longest = std::max(longest, audio_stream_duration_seconds);
886 if (has_value(format_duration_seconds))
887 longest = std::max(longest, format_duration_seconds);
888 if (has_value(longest))
889 return longest;
890 }
891 break;
892 }
893
894 if (has_value(format_duration_seconds))
895 return format_duration_seconds;
896 if (has_value(inferred_duration_seconds))
897 return inferred_duration_seconds;
898
899 return 0.0;
900}
901
902void FFmpegReader::ApplyDurationStrategy() {
903 const double fps_value = info.fps.ToDouble();
904 const double chosen_seconds = PickDurationSeconds();
905
906 if (chosen_seconds <= 0.0 || fps_value <= 0.0) {
907 info.duration = 0.0f;
908 info.video_length = 0;
909 is_duration_known = false;
910 return;
911 }
912
913 const int64_t frames = static_cast<int64_t>(std::llround(chosen_seconds * fps_value));
914 if (frames <= 0) {
915 info.duration = 0.0f;
916 info.video_length = 0;
917 is_duration_known = false;
918 return;
919 }
920
921 info.video_length = frames;
922 info.duration = static_cast<float>(static_cast<double>(frames) / fps_value);
923 is_duration_known = true;
924}
925
926void FFmpegReader::UpdateAudioInfo() {
927 const int codec_channels =
928#if HAVE_CH_LAYOUT
929 AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->ch_layout.nb_channels;
930#else
931 AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->channels;
932#endif
933
934 // Set default audio channel layout (if needed)
935#if HAVE_CH_LAYOUT
936 if (codec_channels > 0 &&
937 !av_channel_layout_check(&(AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->ch_layout)))
938 AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->ch_layout = (AVChannelLayout) AV_CHANNEL_LAYOUT_STEREO;
939#else
940 if (codec_channels > 0 && AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->channel_layout == 0)
941 AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->channel_layout = av_get_default_channel_layout(AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->channels);
942#endif
943
944 if (info.sample_rate > 0) {
945 // Skip init - if info struct already populated
946 return;
947 }
948
949 auto record_duration = [](double &target, double seconds) {
950 if (seconds > 0.0)
951 target = std::max(target, seconds);
952 };
953
954 // Set values of FileInfo struct
955 info.has_audio = true;
956 info.file_size = pFormatCtx->pb ? avio_size(pFormatCtx->pb) : -1;
957 info.acodec = aCodecCtx->codec->name;
958#if HAVE_CH_LAYOUT
959 info.channels = AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->ch_layout.nb_channels;
960 info.channel_layout = (ChannelLayout) AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->ch_layout.u.mask;
961#else
962 info.channels = AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->channels;
963 info.channel_layout = (ChannelLayout) AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->channel_layout;
964#endif
965
966 // If channel layout is not set, guess based on the number of channels
967 if (info.channel_layout == 0) {
968 if (info.channels == 1) {
970 } else if (info.channels == 2) {
972 }
973 }
974
975 info.sample_rate = AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->sample_rate;
976 info.audio_bit_rate = AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->bit_rate;
977 if (info.audio_bit_rate <= 0) {
978 // Get bitrate from format
979 info.audio_bit_rate = pFormatCtx->bit_rate;
980 }
981
982 // Set audio timebase
983 info.audio_timebase.num = aStream->time_base.num;
984 info.audio_timebase.den = aStream->time_base.den;
985
986 // Get timebase of audio stream (if valid) and greater than the current duration
987 if (aStream->duration > 0) {
988 record_duration(audio_stream_duration_seconds, aStream->duration * info.audio_timebase.ToDouble());
989 }
990 if (pFormatCtx->duration > 0) {
991 // Use the format's duration when stream duration is missing or shorter
992 record_duration(format_duration_seconds, static_cast<double>(pFormatCtx->duration) / AV_TIME_BASE);
993 }
994
995 // Calculate duration from filesize and bitrate (if any)
996 if (info.duration <= 0.0f && info.video_bit_rate > 0 && info.file_size > 0) {
997 // Estimate from bitrate, total bytes, and framerate
998 record_duration(inferred_duration_seconds, static_cast<double>(info.file_size) / info.video_bit_rate);
999 }
1000
1001 // Set video timebase (if no video stream was found)
1002 if (!info.has_video) {
1003 // Set a few important default video settings (so audio can be divided into frames)
1004 info.fps.num = 30;
1005 info.fps.den = 1;
1007 info.video_timebase.den = 30;
1008 info.width = 720;
1009 info.height = 480;
1010
1011 // Use timeline to set correct width & height (if any)
1012 Clip *parent = static_cast<Clip *>(ParentClip());
1013 if (parent) {
1014 if (parent->ParentTimeline()) {
1015 // Set max width/height based on parent clip's timeline (if attached to a timeline)
1018 }
1019 }
1020 }
1021
1022 ApplyDurationStrategy();
1023
1024 // Add audio metadata (if any found)
1025 AVDictionaryEntry *tag = NULL;
1026 while ((tag = av_dict_get(aStream->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
1027 QString str_key = tag->key;
1028 QString str_value = tag->value;
1029 info.metadata[str_key.toStdString()] = str_value.trimmed().toStdString();
1030 }
1031}
1032
1033void FFmpegReader::UpdateVideoInfo() {
1034 if (info.vcodec.length() > 0) {
1035 // Skip init - if info struct already populated
1036 return;
1037 }
1038
1039 auto record_duration = [](double &target, double seconds) {
1040 if (seconds > 0.0)
1041 target = std::max(target, seconds);
1042 };
1043
1044 // Set values of FileInfo struct
1045 info.has_video = true;
1046 info.file_size = pFormatCtx->pb ? avio_size(pFormatCtx->pb) : -1;
1047 info.height = AV_GET_CODEC_ATTRIBUTES(pStream, pCodecCtx)->height;
1048 info.width = AV_GET_CODEC_ATTRIBUTES(pStream, pCodecCtx)->width;
1049 info.vcodec = pCodecCtx->codec->name;
1050 info.video_bit_rate = (pFormatCtx->bit_rate / 8);
1051
1052 // Frame rate from the container and codec
1053 AVRational framerate = av_guess_frame_rate(pFormatCtx, pStream, NULL);
1054 if (!check_fps) {
1055 info.fps.num = framerate.num;
1056 info.fps.den = framerate.den;
1057 }
1058
1059 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::UpdateVideoInfo", "info.fps.num", info.fps.num, "info.fps.den", info.fps.den);
1060
1061 // TODO: remove excessive debug info in the next releases
1062 // The debug info below is just for comparison and troubleshooting on users side during the transition period
1063 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::UpdateVideoInfo (pStream->avg_frame_rate)", "num", pStream->avg_frame_rate.num, "den", pStream->avg_frame_rate.den);
1064
1065 if (pStream->sample_aspect_ratio.num != 0) {
1066 info.pixel_ratio.num = pStream->sample_aspect_ratio.num;
1067 info.pixel_ratio.den = pStream->sample_aspect_ratio.den;
1068 } else if (AV_GET_CODEC_ATTRIBUTES(pStream, pCodecCtx)->sample_aspect_ratio.num != 0) {
1069 info.pixel_ratio.num = AV_GET_CODEC_ATTRIBUTES(pStream, pCodecCtx)->sample_aspect_ratio.num;
1070 info.pixel_ratio.den = AV_GET_CODEC_ATTRIBUTES(pStream, pCodecCtx)->sample_aspect_ratio.den;
1071 } else {
1072 info.pixel_ratio.num = 1;
1073 info.pixel_ratio.den = 1;
1074 }
1075 info.pixel_format = AV_GET_CODEC_PIXEL_FORMAT(pStream, pCodecCtx);
1076
1077 // Calculate the DAR (display aspect ratio)
1079
1080 // Reduce size fraction
1081 size.Reduce();
1082
1083 // Set the ratio based on the reduced fraction
1084 info.display_ratio.num = size.num;
1085 info.display_ratio.den = size.den;
1086
1087 // Get scan type and order from codec context/params
1088 if (!check_interlace) {
1089 check_interlace = true;
1090 AVFieldOrder field_order = AV_GET_CODEC_ATTRIBUTES(pStream, pCodecCtx)->field_order;
1091 switch(field_order) {
1092 case AV_FIELD_PROGRESSIVE:
1093 info.interlaced_frame = false;
1094 break;
1095 case AV_FIELD_TT:
1096 case AV_FIELD_TB:
1097 info.interlaced_frame = true;
1098 info.top_field_first = true;
1099 break;
1100 case AV_FIELD_BT:
1101 case AV_FIELD_BB:
1102 info.interlaced_frame = true;
1103 info.top_field_first = false;
1104 break;
1105 case AV_FIELD_UNKNOWN:
1106 // Check again later?
1107 check_interlace = false;
1108 break;
1109 }
1110 // check_interlace will prevent these checks being repeated,
1111 // unless it was cleared because we got an AV_FIELD_UNKNOWN response.
1112 }
1113
1114 // Set the video timebase
1115 info.video_timebase.num = pStream->time_base.num;
1116 info.video_timebase.den = pStream->time_base.den;
1117
1118 // Set the duration in seconds, and video length (# of frames)
1119 record_duration(video_stream_duration_seconds, pStream->duration * info.video_timebase.ToDouble());
1120
1121 // Check for valid duration (if found)
1122 if (pFormatCtx->duration >= 0) {
1123 // Use the format's duration as another candidate
1124 record_duration(format_duration_seconds, static_cast<double>(pFormatCtx->duration) / AV_TIME_BASE);
1125 }
1126
1127 // Calculate duration from filesize and bitrate (if any)
1128 if (info.video_bit_rate > 0 && info.file_size > 0) {
1129 // Estimate from bitrate, total bytes, and framerate
1130 record_duration(inferred_duration_seconds, static_cast<double>(info.file_size) / info.video_bit_rate);
1131 }
1132
1133 // Certain "image" formats do not have a valid duration
1134 if (video_stream_duration_seconds <= 0.0 && format_duration_seconds <= 0.0 &&
1135 pStream->duration == AV_NOPTS_VALUE && pFormatCtx->duration == AV_NOPTS_VALUE) {
1136 // Force an "image" duration
1137 record_duration(video_stream_duration_seconds, 60 * 60 * 1); // 1 hour duration
1138 info.has_single_image = true;
1139 }
1140 // Static GIFs can have no usable duration; fall back to a small default
1141 if (video_stream_duration_seconds <= 0.0 && format_duration_seconds <= 0.0 &&
1142 pFormatCtx && pFormatCtx->iformat && strcmp(pFormatCtx->iformat->name, "gif") == 0) {
1143 record_duration(video_stream_duration_seconds, 60 * 60 * 1); // 1 hour duration
1144 info.has_single_image = true;
1145 }
1146
1147 ApplyDurationStrategy();
1148
1149 // Normalize FFmpeg-decoded still images (e.g. JPG/JPEG) to match image-reader behavior.
1150 // This keeps timing/flags consistent regardless of which reader path was used.
1151 if (!info.has_single_image) {
1152 const AVCodecID codec_id = AV_FIND_DECODER_CODEC_ID(pStream);
1153 const bool likely_still_codec =
1154 codec_id == AV_CODEC_ID_MJPEG ||
1155 codec_id == AV_CODEC_ID_PNG ||
1156 codec_id == AV_CODEC_ID_BMP ||
1157 codec_id == AV_CODEC_ID_TIFF ||
1158 codec_id == AV_CODEC_ID_WEBP ||
1159 codec_id == AV_CODEC_ID_JPEG2000;
1160 const bool likely_image_demuxer =
1161 pFormatCtx && pFormatCtx->iformat && pFormatCtx->iformat->name &&
1162 strstr(pFormatCtx->iformat->name, "image2");
1163 const bool has_attached_pic = HasAlbumArt();
1164 const bool single_frame_stream =
1165 (pStream && pStream->nb_frames > 0 && pStream->nb_frames <= 1);
1166 const bool single_frame_clip = info.video_length <= 1;
1167
1168 const bool is_still_image_video =
1169 has_attached_pic ||
1170 ((single_frame_stream || single_frame_clip) &&
1171 (likely_still_codec || likely_image_demuxer));
1172
1173 if (is_still_image_video) {
1174 info.has_single_image = true;
1175
1176 // Only force long duration for standalone images. For audio + attached-art
1177 // files, keep stream-derived duration so the cover image spans the audio.
1178 if (audioStream < 0) {
1179 record_duration(video_stream_duration_seconds, 60 * 60 * 1); // 1 hour duration
1180 }
1181
1182 ApplyDurationStrategy();
1183 }
1184 }
1185
1186 // Add video metadata (if any)
1187 AVDictionaryEntry *tag = NULL;
1188 while ((tag = av_dict_get(pStream->metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
1189 QString str_key = tag->key;
1190 QString str_value = tag->value;
1191 info.metadata[str_key.toStdString()] = str_value.trimmed().toStdString();
1192 }
1193}
1194
1196 return this->is_duration_known;
1197}
1198
1199std::shared_ptr<Frame> FFmpegReader::GetFrame(int64_t requested_frame) {
1200 last_seek_max_frame = -1;
1201 seek_stagnant_count = 0;
1202 // Check for open reader (or throw exception)
1203 if (!is_open)
1204 throw ReaderClosed("The FFmpegReader is closed. Call Open() before calling this method.", path);
1205
1206 // Adjust for a requested frame that is too small or too large
1207 if (requested_frame < 1)
1208 requested_frame = 1;
1209 if (requested_frame > info.video_length && is_duration_known)
1210 requested_frame = info.video_length;
1211 if (info.has_video && info.video_length == 0)
1212 // Invalid duration of video file
1213 throw InvalidFile("Could not detect the duration of the video or audio stream.", path);
1214
1215 // Debug output
1216 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetFrame", "requested_frame", requested_frame, "last_frame", last_frame);
1217
1218 // Check the cache for this frame
1219 std::shared_ptr<Frame> frame = final_cache.GetFrame(requested_frame);
1220 if (frame) {
1221 // Debug output
1222 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetFrame", "returned cached frame", requested_frame);
1223 // Return the cached frame
1224 return frame;
1225 } else {
1226
1227 // Prevent async calls to the remainder of this code
1228 const std::lock_guard<std::recursive_mutex> lock(getFrameMutex);
1229
1230 // Check the cache a 2nd time (due to the potential previous lock)
1231 frame = final_cache.GetFrame(requested_frame);
1232 if (frame) {
1233 // Debug output
1234 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetFrame", "returned cached frame on 2nd look", requested_frame);
1235 } else {
1236 // Frame is not in cache
1237 // Reset seek count
1238 seek_count = 0;
1239
1240 // Are we within X frames of the requested frame?
1241 int64_t diff = requested_frame - last_frame;
1242 if (diff >= 1 && diff <= 20) {
1243 // Continue walking the stream
1244 frame = ReadStream(requested_frame);
1245 } else {
1246 // Greater than 30 frames away, or backwards, we need to seek to the nearest key frame
1247 if (enable_seek) {
1248 // Only seek if enabled
1249 Seek(requested_frame);
1250
1251 } else if (!enable_seek && diff < 0) {
1252 // Start over, since we can't seek, and the requested frame is smaller than our position
1253 // Since we are seeking to frame 1, this actually just closes/re-opens the reader
1254 Seek(1);
1255 }
1256
1257 // Then continue walking the stream
1258 frame = ReadStream(requested_frame);
1259 }
1260 }
1261 return frame;
1262 }
1263}
1264
1265// Read the stream until we find the requested Frame
1266std::shared_ptr<Frame> FFmpegReader::ReadStream(int64_t requested_frame) {
1267 // Allocate video frame
1268 bool check_seek = false;
1269 int packet_error = -1;
1270 int64_t no_progress_count = 0;
1271 int64_t prev_packets_read = packet_status.packets_read();
1272 int64_t prev_packets_decoded = packet_status.packets_decoded();
1273 int64_t prev_video_decoded = packet_status.video_decoded;
1274 double prev_video_pts_seconds = video_pts_seconds;
1275
1276 // Debug output
1277 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ReadStream", "requested_frame", requested_frame);
1278
1279 // Loop through the stream until the correct frame is found
1280 while (true) {
1281 // Check if working frames are 'finished'
1282 if (!is_seeking) {
1283 // Check for final frames
1284 CheckWorkingFrames(requested_frame);
1285 }
1286
1287 // Check if requested 'final' frame is available (and break out of loop if found)
1288 bool is_cache_found = (final_cache.GetFrame(requested_frame) != NULL);
1289 if (is_cache_found) {
1290 break;
1291 }
1292
1293 if (!hold_packet || !packet) {
1294 // Get the next packet
1295 packet_error = GetNextPacket();
1296 if (packet_error < 0 && !packet) {
1297 // No more packets to be found
1298 packet_status.packets_eof = true;
1299 }
1300 }
1301
1302 // Debug output
1303 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ReadStream (GetNextPacket)", "requested_frame", requested_frame,"packets_read", packet_status.packets_read(), "packets_decoded", packet_status.packets_decoded(), "is_seeking", is_seeking);
1304
1305 // Check the status of a seek (if any)
1306 if (is_seeking) {
1307 check_seek = CheckSeek();
1308 } else {
1309 check_seek = false;
1310 }
1311
1312 if (check_seek) {
1313 // Packet may become NULL on Close inside Seek if CheckSeek returns false
1314 // Jump to the next iteration of this loop
1315 continue;
1316 }
1317
1318 // Video packet
1319 if ((info.has_video && packet && packet->stream_index == videoStream) ||
1320 (info.has_video && packet_status.video_decoded < packet_status.video_read) ||
1321 (info.has_video && !packet && !packet_status.video_eof)) {
1322 // Process Video Packet
1323 ProcessVideoPacket(requested_frame);
1324 if (ReopenWithoutHardwareDecode(requested_frame)) {
1325 continue;
1326 }
1327 }
1328 // Audio packet
1329 if ((info.has_audio && packet && packet->stream_index == audioStream) ||
1330 (info.has_audio && !packet && packet_status.audio_decoded < packet_status.audio_read) ||
1331 (info.has_audio && !packet && !packet_status.audio_eof)) {
1332 // Process Audio Packet
1333 ProcessAudioPacket(requested_frame);
1334 }
1335
1336 // Remove unused packets (sometimes we purposely ignore video or audio packets,
1337 // if the has_video or has_audio properties are manually overridden)
1338 if ((!info.has_video && packet && packet->stream_index == videoStream) ||
1339 (!info.has_audio && packet && packet->stream_index == audioStream)) {
1340 // Keep track of deleted packet counts
1341 if (packet->stream_index == videoStream) {
1342 packet_status.video_decoded++;
1343 } else if (packet->stream_index == audioStream) {
1344 packet_status.audio_decoded++;
1345 }
1346
1347 // Remove unused packets (sometimes we purposely ignore video or audio packets,
1348 // if the has_video or has_audio properties are manually overridden)
1349 RemoveAVPacket(packet);
1350 packet = NULL;
1351 }
1352
1353 // Determine end-of-stream (waiting until final decoder threads finish)
1354 // Force end-of-stream in some situations
1355 packet_status.end_of_file = packet_status.packets_eof && packet_status.video_eof && packet_status.audio_eof;
1356 if ((packet_status.packets_eof && packet_status.packets_read() == packet_status.packets_decoded()) || packet_status.end_of_file) {
1357 // Force EOF (end of file) variables to true, if decoder does not support EOF detection.
1358 // If we have no more packets, and all known packets have been decoded
1359 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ReadStream (force EOF)", "packets_read", packet_status.packets_read(), "packets_decoded", packet_status.packets_decoded(), "packets_eof", packet_status.packets_eof, "video_eof", packet_status.video_eof, "audio_eof", packet_status.audio_eof, "end_of_file", packet_status.end_of_file);
1360 if (!packet_status.video_eof) {
1361 packet_status.video_eof = true;
1362 }
1363 if (!packet_status.audio_eof) {
1364 packet_status.audio_eof = true;
1365 }
1366 packet_status.end_of_file = true;
1367 break;
1368 }
1369
1370 // Detect decoder stalls with no progress at EOF and force completion so
1371 // missing frames can be finalized from prior image data.
1372 const bool has_progress =
1373 (packet_status.packets_read() != prev_packets_read) ||
1374 (packet_status.packets_decoded() != prev_packets_decoded) ||
1375 (packet_status.video_decoded != prev_video_decoded) ||
1376 (video_pts_seconds != prev_video_pts_seconds);
1377
1378 if (has_progress) {
1379 no_progress_count = 0;
1380 } else {
1381 no_progress_count++;
1382 if (no_progress_count >= 2000
1383 && packet_status.packets_eof
1384 && !packet
1385 && !hold_packet) {
1386 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ReadStream (force EOF after stall)",
1387 "requested_frame", requested_frame,
1388 "no_progress_count", no_progress_count,
1389 "packets_read", packet_status.packets_read(),
1390 "packets_decoded", packet_status.packets_decoded(),
1391 "video_decoded", packet_status.video_decoded,
1392 "audio_decoded", packet_status.audio_decoded);
1393 packet_status.video_eof = true;
1394 packet_status.audio_eof = true;
1395 packet_status.end_of_file = true;
1396 break;
1397 }
1398 }
1399 prev_packets_read = packet_status.packets_read();
1400 prev_packets_decoded = packet_status.packets_decoded();
1401 prev_video_decoded = packet_status.video_decoded;
1402 prev_video_pts_seconds = video_pts_seconds;
1403 } // end while
1404
1405 // Debug output
1406 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ReadStream (Completed)",
1407 "packets_read", packet_status.packets_read(),
1408 "packets_decoded", packet_status.packets_decoded(),
1409 "end_of_file", packet_status.end_of_file,
1410 "largest_frame_processed", largest_frame_processed,
1411 "Working Cache Count", working_cache.Count());
1412
1413 // Have we reached end-of-stream (or the final frame)?
1414 if (!packet_status.end_of_file && requested_frame >= info.video_length) {
1415 // Force end-of-stream
1416 packet_status.end_of_file = true;
1417 }
1418 if (packet_status.end_of_file) {
1419 // Mark any other working frames as 'finished'
1420 CheckWorkingFrames(requested_frame);
1421 }
1422
1423 // Return requested frame (if found)
1424 std::shared_ptr<Frame> frame = final_cache.GetFrame(requested_frame);
1425 if (frame)
1426 // Return prepared frame
1427 return frame;
1428 else {
1429
1430 // Check if largest frame is still cached
1431 frame = final_cache.GetFrame(largest_frame_processed);
1432 int samples_in_frame = Frame::GetSamplesPerFrame(requested_frame, info.fps,
1434 if (frame) {
1435 // Copy and return the largest processed frame (assuming it was the last in the video file)
1436 std::shared_ptr<Frame> f = CreateFrame(largest_frame_processed);
1437
1438 // Use solid color (if no image data found)
1439 if (!frame->has_image_data) {
1440 // Use solid black frame if no image data available
1441 f->AddColor(info.width, info.height, "#000");
1442 }
1443 // Silence audio data (if any), since we are repeating the last frame
1444 frame->AddAudioSilence(samples_in_frame);
1445
1446 return frame;
1447 } else {
1448 // The largest processed frame is no longer in cache. Prefer the most recent
1449 // finalized image first, then decoded image, to avoid black flashes.
1450 std::shared_ptr<Frame> f = CreateFrame(largest_frame_processed);
1451 if (last_final_video_frame && last_final_video_frame->has_image_data
1452 && last_final_video_frame->number <= requested_frame) {
1453 f->AddImage(std::make_shared<QImage>(last_final_video_frame->GetImage()->copy()));
1454 } else if (last_video_frame && last_video_frame->has_image_data
1455 && last_video_frame->number <= requested_frame) {
1456 f->AddImage(std::make_shared<QImage>(last_video_frame->GetImage()->copy()));
1457 } else {
1458 f->AddColor(info.width, info.height, "#000");
1459 }
1460 f->AddAudioSilence(samples_in_frame);
1461 return f;
1462 }
1463 }
1464
1465}
1466
1467// Get the next packet (if any)
1468int FFmpegReader::GetNextPacket() {
1469 int found_packet = 0;
1470 AVPacket *next_packet;
1471 next_packet = new AVPacket();
1472 found_packet = av_read_frame(pFormatCtx, next_packet);
1473
1474 if (packet) {
1475 // Remove previous packet before getting next one
1476 RemoveAVPacket(packet);
1477 packet = NULL;
1478 }
1479 if (found_packet >= 0) {
1480 // Update current packet pointer
1481 packet = next_packet;
1482
1483 // Keep track of packet stats
1484 if (packet->stream_index == videoStream) {
1485 packet_status.video_read++;
1486 } else if (packet->stream_index == audioStream) {
1487 packet_status.audio_read++;
1488 }
1489 } else {
1490 // No more packets found
1491 delete next_packet;
1492 packet = NULL;
1493 }
1494 // Return if packet was found (or error number)
1495 return found_packet;
1496}
1497
1498// Get an AVFrame (if any)
1499bool FFmpegReader::GetAVFrame() {
1500 int frameFinished = 0;
1501 auto note_hw_decode_failure = [&](int err, const char* stage) {
1502#if USE_HW_ACCEL
1503 if (!hw_de_on || !hw_de_supported || force_sw_decode) {
1504 return;
1505 }
1506 if (err == AVERROR_INVALIDDATA && packet_status.video_decoded == 0) {
1507 hw_decode_error_count++;
1509 std::string("FFmpegReader::GetAVFrame (hardware decode failure candidate during ") + stage + ")",
1510 "error_count", hw_decode_error_count,
1511 "error", err);
1512 if (hw_decode_error_count >= 3) {
1513 hw_decode_failed = true;
1514 }
1515 }
1516#else
1517 (void) err;
1518 (void) stage;
1519#endif
1520 };
1521
1522 // Decode video frame
1523 AVFrame *next_frame = AV_ALLOCATE_FRAME();
1524
1525#if IS_FFMPEG_3_2
1526 int send_packet_err = 0;
1527 int64_t send_packet_pts = 0;
1528 if ((packet && packet->stream_index == videoStream) || !packet) {
1529 send_packet_err = avcodec_send_packet(pCodecCtx, packet);
1530
1531 if (packet && send_packet_err >= 0) {
1532 send_packet_pts = GetPacketPTS();
1533 hold_packet = false;
1534 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetAVFrame (send packet succeeded)", "send_packet_err", send_packet_err, "send_packet_pts", send_packet_pts);
1535 }
1536 }
1537
1538 #if USE_HW_ACCEL
1539 // Get the format from the variables set in get_hw_dec_format
1540 hw_de_av_pix_fmt = hw_de_av_pix_fmt_global;
1541 hw_de_av_device_type = hw_de_av_device_type_global;
1542 #endif // USE_HW_ACCEL
1543 if (send_packet_err < 0 && send_packet_err != AVERROR_EOF) {
1544 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetAVFrame (send packet: Not sent [" + av_err2string(send_packet_err) + "])", "send_packet_err", send_packet_err, "send_packet_pts", send_packet_pts);
1545 note_hw_decode_failure(send_packet_err, "send_packet");
1546 if (send_packet_err == AVERROR(EAGAIN)) {
1547 hold_packet = true;
1548 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetAVFrame (send packet: AVERROR(EAGAIN): user must read output with avcodec_receive_frame()", "send_packet_pts", send_packet_pts);
1549 }
1550 if (send_packet_err == AVERROR(EINVAL)) {
1551 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetAVFrame (send packet: AVERROR(EINVAL): codec not opened, it is an encoder, or requires flush", "send_packet_pts", send_packet_pts);
1552 }
1553 if (send_packet_err == AVERROR(ENOMEM)) {
1554 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetAVFrame (send packet: AVERROR(ENOMEM): failed to add packet to internal queue, or legitimate decoding errors", "send_packet_pts", send_packet_pts);
1555 }
1556 }
1557
1558 // Always try and receive a packet, if not EOF.
1559 // Even if the above avcodec_send_packet failed to send,
1560 // we might still need to receive a packet.
1561 int receive_frame_err = 0;
1562 AVFrame *decoded_frame = next_frame;
1563 AVFrame *next_frame2;
1564#if USE_HW_ACCEL
1565 if (hw_de_on && hw_de_supported) {
1566 next_frame2 = AV_ALLOCATE_FRAME();
1567 }
1568 else
1569#endif // USE_HW_ACCEL
1570 {
1571 next_frame2 = next_frame;
1572 }
1573 pFrame = AV_ALLOCATE_FRAME();
1574 while (receive_frame_err >= 0) {
1575 receive_frame_err = avcodec_receive_frame(pCodecCtx, next_frame2);
1576
1577 if (receive_frame_err != 0) {
1578 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetAVFrame (receive frame: frame not ready yet from decoder [\" + av_err2string(receive_frame_err) + \"])", "receive_frame_err", receive_frame_err, "send_packet_pts", send_packet_pts);
1579 note_hw_decode_failure(receive_frame_err, "receive_frame");
1580
1581 if (receive_frame_err == AVERROR_EOF) {
1583 "FFmpegReader::GetAVFrame (receive frame: AVERROR_EOF: EOF detected from decoder, flushing buffers)", "send_packet_pts", send_packet_pts);
1584 avcodec_flush_buffers(pCodecCtx);
1585 packet_status.video_eof = true;
1586 }
1587 if (receive_frame_err == AVERROR(EINVAL)) {
1589 "FFmpegReader::GetAVFrame (receive frame: AVERROR(EINVAL): invalid frame received, flushing buffers)", "send_packet_pts", send_packet_pts);
1590 avcodec_flush_buffers(pCodecCtx);
1591 }
1592 if (receive_frame_err == AVERROR(EAGAIN)) {
1594 "FFmpegReader::GetAVFrame (receive frame: AVERROR(EAGAIN): output is not available in this state - user must try to send new input)", "send_packet_pts", send_packet_pts);
1595 }
1596 if (receive_frame_err == AVERROR_INPUT_CHANGED) {
1598 "FFmpegReader::GetAVFrame (receive frame: AVERROR_INPUT_CHANGED: current decoded frame has changed parameters with respect to first decoded frame)", "send_packet_pts", send_packet_pts);
1599 }
1600
1601 // Break out of decoding loop
1602 // Nothing ready for decoding yet
1603 break;
1604 }
1605
1606#if USE_HW_ACCEL
1607 if (hw_de_on && hw_de_supported) {
1608 int err;
1609 if (next_frame2->format == hw_de_av_pix_fmt) {
1610 if ((err = av_hwframe_transfer_data(next_frame, next_frame2, 0)) < 0) {
1612 "FFmpegReader::GetAVFrame (Failed to transfer data to output frame)",
1613 "hw_de_on", hw_de_on,
1614 "error", err);
1615 note_hw_decode_failure(AVERROR_INVALIDDATA, "hwframe_transfer");
1616 break;
1617 }
1618 if ((err = av_frame_copy_props(next_frame, next_frame2)) < 0) {
1620 "FFmpegReader::GetAVFrame (Failed to copy props to output frame)",
1621 "hw_de_on", hw_de_on,
1622 "error", err);
1623 note_hw_decode_failure(AVERROR_INVALIDDATA, "hwframe_copy_props");
1624 break;
1625 }
1626 if (next_frame->format == AV_PIX_FMT_NONE) {
1627 next_frame->format = pCodecCtx->sw_pix_fmt;
1628 }
1629 if (next_frame->width <= 0) {
1630 next_frame->width = next_frame2->width;
1631 }
1632 if (next_frame->height <= 0) {
1633 next_frame->height = next_frame2->height;
1634 }
1635 decoded_frame = next_frame;
1636 } else {
1637 // Some hardware decoders can still return software-readable frames.
1638 decoded_frame = next_frame2;
1639 }
1640 }
1641 else
1642#endif // USE_HW_ACCEL
1643 { // No hardware acceleration used -> no copy from GPU memory needed
1644 decoded_frame = next_frame2;
1645 }
1646
1647 if (!decoded_frame->data[0]) {
1649 "FFmpegReader::GetAVFrame (Decoded frame missing image data)",
1650 "format", decoded_frame->format,
1651 "width", decoded_frame->width,
1652 "height", decoded_frame->height);
1653 note_hw_decode_failure(AVERROR_INVALIDDATA, "decoded_frame_empty");
1654 break;
1655 }
1656
1657 // TODO also handle possible further frames
1658 // Use only the first frame like avcodec_decode_video2
1659 frameFinished = 1;
1660 hw_decode_error_count = 0;
1661#if USE_HW_ACCEL
1662 if (hw_de_on && hw_de_supported && !force_sw_decode) {
1663 hw_decode_succeeded = true;
1664 }
1665#endif
1666 packet_status.video_decoded++;
1667
1668 // Allocate image (align 32 for simd)
1669 AVPixelFormat decoded_pix_fmt = (AVPixelFormat)(decoded_frame->format);
1670 if (decoded_pix_fmt == AV_PIX_FMT_NONE)
1671 decoded_pix_fmt = (AVPixelFormat)(pStream->codecpar->format);
1672 if (AV_ALLOCATE_IMAGE(pFrame, decoded_pix_fmt, info.width, info.height) <= 0) {
1673 throw OutOfMemory("Failed to allocate image buffer", path);
1674 }
1675 av_image_copy(pFrame->data, pFrame->linesize, (const uint8_t**)decoded_frame->data, decoded_frame->linesize,
1676 decoded_pix_fmt, info.width, info.height);
1677 pFrame->format = decoded_pix_fmt;
1678 pFrame->width = info.width;
1679 pFrame->height = info.height;
1680 pFrame->color_range = decoded_frame->color_range;
1681 pFrame->colorspace = decoded_frame->colorspace;
1682 pFrame->color_primaries = decoded_frame->color_primaries;
1683 pFrame->color_trc = decoded_frame->color_trc;
1684 pFrame->chroma_location = decoded_frame->chroma_location;
1685
1686 // Get display PTS from video frame, often different than packet->pts.
1687 // Sending packets to the decoder (i.e. packet->pts) is async,
1688 // and retrieving packets from the decoder (frame->pts) is async. In most decoders
1689 // sending and retrieving are separated by multiple calls to this method.
1690 if (decoded_frame->pts != AV_NOPTS_VALUE) {
1691 // This is the current decoded frame (and should be the pts used) for
1692 // processing this data
1693 video_pts = decoded_frame->pts;
1694 } else if (decoded_frame->pkt_dts != AV_NOPTS_VALUE) {
1695 // Some videos only set this timestamp (fallback)
1696 video_pts = decoded_frame->pkt_dts;
1697 }
1698
1700 "FFmpegReader::GetAVFrame (Successful frame received)", "video_pts", video_pts, "send_packet_pts", send_packet_pts);
1701
1702 // break out of loop after each successful image returned
1703 break;
1704 }
1705#if USE_HW_ACCEL
1706 if (hw_de_on && hw_de_supported && next_frame2 != next_frame) {
1707 AV_FREE_FRAME(&next_frame2);
1708 }
1709 #endif // USE_HW_ACCEL
1710#else
1711 avcodec_decode_video2(pCodecCtx, next_frame, &frameFinished, packet);
1712
1713 // always allocate pFrame (because we do that in the ffmpeg >= 3.2 as well); it will always be freed later
1714 pFrame = AV_ALLOCATE_FRAME();
1715
1716 // is frame finished
1717 if (frameFinished) {
1718 // AVFrames are clobbered on the each call to avcodec_decode_video, so we
1719 // must make a copy of the image data before this method is called again.
1720 avpicture_alloc((AVPicture *) pFrame, pCodecCtx->pix_fmt, info.width, info.height);
1721 av_picture_copy((AVPicture *) pFrame, (AVPicture *) next_frame, pCodecCtx->pix_fmt, info.width,
1722 info.height);
1723 }
1724#endif // IS_FFMPEG_3_2
1725
1726 // deallocate the frame
1727 AV_FREE_FRAME(&next_frame);
1728
1729 // Did we get a video frame?
1730 return frameFinished;
1731}
1732
1733bool FFmpegReader::ReopenWithoutHardwareDecode(int64_t requested_frame) {
1734#if USE_HW_ACCEL
1735 if (!hw_decode_failed || force_sw_decode) {
1736 return false;
1737 }
1738
1740 "FFmpegReader::ReopenWithoutHardwareDecode (falling back to software decode)",
1741 "requested_frame", requested_frame,
1742 "video_packets_read", packet_status.video_read,
1743 "video_packets_decoded", packet_status.video_decoded,
1744 "hw_decode_error_count", hw_decode_error_count);
1745
1746 force_sw_decode = true;
1747 hw_decode_failed = false;
1748 hw_decode_error_count = 0;
1749
1750 Close();
1751 Open();
1752 Seek(requested_frame);
1753 return true;
1754#else
1755 (void) requested_frame;
1756 return false;
1757#endif
1758}
1759
1761#if USE_HW_ACCEL
1762 return hw_decode_succeeded;
1763#else
1764 return false;
1765#endif
1766}
1767
1768// Check the current seek position and determine if we need to seek again
1769bool FFmpegReader::CheckSeek() {
1770 // Are we seeking for a specific frame?
1771 if (is_seeking) {
1772 const int64_t kSeekRetryMax = 5;
1773 const int kSeekStagnantMax = 2;
1774
1775 // Determine if both an audio and video packet have been decoded since the seek happened.
1776 // If not, allow the ReadStream method to keep looping
1777 if ((is_video_seek && !seek_video_frame_found) || (!is_video_seek && !seek_audio_frame_found))
1778 return false;
1779
1780 // Check for both streams
1781 if ((info.has_video && !seek_video_frame_found) || (info.has_audio && !seek_audio_frame_found))
1782 return false;
1783
1784 // Determine max seeked frame
1785 int64_t max_seeked_frame = std::max(seek_audio_frame_found, seek_video_frame_found);
1786 // Track stagnant seek results (no progress between retries)
1787 if (max_seeked_frame == last_seek_max_frame) {
1788 seek_stagnant_count++;
1789 } else {
1790 last_seek_max_frame = max_seeked_frame;
1791 seek_stagnant_count = 0;
1792 }
1793
1794 // determine if we are "before" the requested frame
1795 if (max_seeked_frame >= seeking_frame) {
1796 // SEEKED TOO FAR
1797 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::CheckSeek (Too far, seek again)",
1798 "is_video_seek", is_video_seek,
1799 "max_seeked_frame", max_seeked_frame,
1800 "seeking_frame", seeking_frame,
1801 "seeking_pts", seeking_pts,
1802 "seek_video_frame_found", seek_video_frame_found,
1803 "seek_audio_frame_found", seek_audio_frame_found);
1804
1805 // Seek again... to the nearest Keyframe
1806 if (seek_count < kSeekRetryMax) {
1807 Seek(seeking_frame - (10 * seek_count * seek_count));
1808 } else if (seek_stagnant_count >= kSeekStagnantMax) {
1809 // Stagnant seek: force a much earlier target and keep seeking.
1810 Seek(seeking_frame - (10 * kSeekRetryMax * kSeekRetryMax));
1811 } else {
1812 // Retry budget exhausted: keep seeking from a conservative offset.
1813 Seek(seeking_frame - (10 * seek_count * seek_count));
1814 }
1815 } else {
1816 // SEEK WORKED
1817 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::CheckSeek (Successful)",
1818 "is_video_seek", is_video_seek,
1819 "packet->pts", GetPacketPTS(),
1820 "seeking_pts", seeking_pts,
1821 "seeking_frame", seeking_frame,
1822 "seek_video_frame_found", seek_video_frame_found,
1823 "seek_audio_frame_found", seek_audio_frame_found);
1824
1825 // Seek worked, and we are "before" the requested frame
1826 is_seeking = false;
1827 seeking_frame = 0;
1828 seeking_pts = -1;
1829 }
1830 }
1831
1832 // return the pts to seek to (if any)
1833 return is_seeking;
1834}
1835
1836// Process a video packet
1837void FFmpegReader::ProcessVideoPacket(int64_t requested_frame) {
1838 // Get the AVFrame from the current packet
1839 // This sets the video_pts to the correct timestamp
1840 int frame_finished = GetAVFrame();
1841
1842 // Check if the AVFrame is finished and set it
1843 if (!frame_finished) {
1844 // No AVFrame decoded yet, bail out
1845 if (pFrame) {
1846 RemoveAVFrame(pFrame);
1847 }
1848 return;
1849 }
1850
1851 // Calculate current frame #
1852 int64_t current_frame = ConvertVideoPTStoFrame(video_pts);
1853
1854 // Track 1st video packet after a successful seek
1855 if (!seek_video_frame_found && is_seeking)
1856 seek_video_frame_found = current_frame;
1857
1858 // Create or get the existing frame object. Requested frame needs to be created
1859 // in working_cache at least once. Seek can clear the working_cache, so we must
1860 // add the requested frame back to the working_cache here. If it already exists,
1861 // it will be moved to the top of the working_cache.
1862 working_cache.Add(CreateFrame(requested_frame));
1863
1864 // Debug output
1865 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ProcessVideoPacket (Before)", "requested_frame", requested_frame, "current_frame", current_frame);
1866
1867 // Init some things local (for OpenMP)
1868 AVPixelFormat decoded_pix_fmt = (pFrame && pFrame->format != AV_PIX_FMT_NONE)
1869 ? static_cast<AVPixelFormat>(pFrame->format)
1870 : AV_GET_CODEC_PIXEL_FORMAT(pStream, pCodecCtx);
1871 bool src_full_range = (pFrame && pFrame->color_range == AVCOL_RANGE_JPEG);
1872 AVPixelFormat src_pix_fmt = NormalizeDeprecatedPixFmt(decoded_pix_fmt, src_full_range);
1873 int src_width = (pFrame && pFrame->width > 0) ? pFrame->width : info.width;
1874 int src_height = (pFrame && pFrame->height > 0) ? pFrame->height : info.height;
1875 int height = src_height;
1876 int width = src_width;
1877 int64_t video_length = info.video_length;
1878
1879 // Create or reuse a RGB Frame (since most videos are not in RGB, we must convert it)
1880 AVFrame *pFrameRGB = pFrameRGB_cached;
1881 if (!pFrameRGB) {
1882 pFrameRGB = AV_ALLOCATE_FRAME();
1883 if (pFrameRGB == nullptr)
1884 throw OutOfMemory("Failed to allocate frame buffer", path);
1885 pFrameRGB_cached = pFrameRGB;
1886 }
1887 AV_RESET_FRAME(pFrameRGB);
1888 uint8_t *buffer = nullptr;
1889
1890 // Determine the max size of this source image (based on the timeline's size, the scaling mode,
1891 // and the scaling keyframes). This is a performance improvement, to keep the images as small as possible,
1892 // without losing quality. NOTE: We cannot go smaller than the timeline itself, or the add_layer timeline
1893 // method will scale it back to timeline size before scaling it smaller again. This needs to be fixed in
1894 // the future.
1895 int max_width = info.width;
1896 int max_height = info.height;
1897
1898 Clip *parent = static_cast<Clip *>(ParentClip());
1899 if (parent) {
1900 if (parent->ParentTimeline()) {
1901 // Set max width/height based on parent clip's timeline (if attached to a timeline)
1902 max_width = parent->ParentTimeline()->preview_width;
1903 max_height = parent->ParentTimeline()->preview_height;
1904 }
1905 if (parent->scale == SCALE_FIT || parent->scale == SCALE_STRETCH) {
1906 // Best fit or Stretch scaling (based on max timeline size * scaling keyframes)
1907 float max_scale_x = parent->scale_x.GetMaxPoint().co.Y;
1908 float max_scale_y = parent->scale_y.GetMaxPoint().co.Y;
1909 max_width = std::max(float(max_width), max_width * max_scale_x);
1910 max_height = std::max(float(max_height), max_height * max_scale_y);
1911
1912 } else if (parent->scale == SCALE_CROP) {
1913 // Cropping scale mode (based on max timeline size * cropped size * scaling keyframes)
1914 float max_scale_x = parent->scale_x.GetMaxPoint().co.Y;
1915 float max_scale_y = parent->scale_y.GetMaxPoint().co.Y;
1916 QSize width_size(max_width * max_scale_x,
1917 round(max_width / (float(info.width) / float(info.height))));
1918 QSize height_size(round(max_height / (float(info.height) / float(info.width))),
1919 max_height * max_scale_y);
1920 // respect aspect ratio
1921 if (width_size.width() >= max_width && width_size.height() >= max_height) {
1922 max_width = std::max(max_width, width_size.width());
1923 max_height = std::max(max_height, width_size.height());
1924 } else {
1925 max_width = std::max(max_width, height_size.width());
1926 max_height = std::max(max_height, height_size.height());
1927 }
1928
1929 } else {
1930 // Scale video to equivalent unscaled size
1931 // Since the preview window can change sizes, we want to always
1932 // scale against the ratio of original video size to timeline size
1933 float preview_ratio = 1.0;
1934 if (parent->ParentTimeline()) {
1935 Timeline *t = (Timeline *) parent->ParentTimeline();
1936 preview_ratio = t->preview_width / float(t->info.width);
1937 }
1938 float max_scale_x = parent->scale_x.GetMaxPoint().co.Y;
1939 float max_scale_y = parent->scale_y.GetMaxPoint().co.Y;
1940 max_width = info.width * max_scale_x * preview_ratio;
1941 max_height = info.height * max_scale_y * preview_ratio;
1942 }
1943
1944 // If a crop effect is resizing the image, request enough pixels to preserve detail
1945 ApplyCropResizeScale(parent, info.width, info.height, max_width, max_height);
1946 }
1947
1948 // Determine if image needs to be scaled (for performance reasons)
1949 int original_height = src_height;
1950 if (max_width != 0 && max_height != 0 && max_width < width && max_height < height) {
1951 // Override width and height (but maintain aspect ratio)
1952 float ratio = float(width) / float(height);
1953 int possible_width = round(max_height * ratio);
1954 int possible_height = round(max_width / ratio);
1955
1956 if (possible_width <= max_width) {
1957 // use calculated width, and max_height
1958 width = possible_width;
1959 height = max_height;
1960 } else {
1961 // use max_width, and calculated height
1962 width = max_width;
1963 height = possible_height;
1964 }
1965 }
1966
1967 // Determine required buffer size and allocate buffer
1968 const int bytes_per_pixel = 4;
1969 int raw_buffer_size = (width * height * bytes_per_pixel) + 128;
1970
1971 // Aligned memory allocation (for speed)
1972 constexpr size_t ALIGNMENT = 32; // AVX2
1973 int buffer_size = ((raw_buffer_size + ALIGNMENT - 1) / ALIGNMENT) * ALIGNMENT;
1974 buffer = (unsigned char*) aligned_malloc(buffer_size, ALIGNMENT);
1975
1976 // Copy picture data from one AVFrame (or AVPicture) to another one.
1977 AV_COPY_PICTURE_DATA(pFrameRGB, buffer, PIX_FMT_RGBA, width, height);
1978
1979 int scale_mode = SWS_FAST_BILINEAR;
1980 if (openshot::Settings::Instance()->HIGH_QUALITY_SCALING) {
1981 scale_mode = SWS_BICUBIC;
1982 }
1983 img_convert_ctx = sws_getCachedContext(img_convert_ctx, src_width, src_height, src_pix_fmt, width, height, PIX_FMT_RGBA, scale_mode, NULL, NULL, NULL);
1984 if (!img_convert_ctx)
1985 throw OutOfMemory("Failed to initialize sws context", path);
1986 const int *src_coeff = sws_getCoefficients(SWS_CS_DEFAULT);
1987 const int *dst_coeff = sws_getCoefficients(SWS_CS_DEFAULT);
1988 const int dst_full_range = 1; // RGB outputs are full-range
1989 sws_setColorspaceDetails(img_convert_ctx, src_coeff, src_full_range ? 1 : 0,
1990 dst_coeff, dst_full_range, 0, 1 << 16, 1 << 16);
1991
1992 if (!pFrame || !pFrame->data[0] || pFrame->linesize[0] <= 0) {
1993#if USE_HW_ACCEL
1994 if (hw_de_on && hw_de_supported && !force_sw_decode) {
1995 hw_decode_failed = true;
1997 "FFmpegReader::ProcessVideoPacket (Invalid source frame; forcing software fallback)",
1998 "requested_frame", requested_frame,
1999 "current_frame", current_frame,
2000 "src_pix_fmt", src_pix_fmt,
2001 "src_width", src_width,
2002 "src_height", src_height);
2003 }
2004#endif
2005 if (pFrame) {
2006 RemoveAVFrame(pFrame);
2007 pFrame = NULL;
2008 }
2009 return;
2010 }
2011
2012 // Resize / Convert to RGB
2013 const int scaled_lines = sws_scale(img_convert_ctx, pFrame->data, pFrame->linesize, 0,
2014 original_height, pFrameRGB->data, pFrameRGB->linesize);
2015 if (scaled_lines <= 0) {
2016#if USE_HW_ACCEL
2017 if (hw_de_on && hw_de_supported && !force_sw_decode) {
2018 hw_decode_failed = true;
2020 "FFmpegReader::ProcessVideoPacket (sws_scale failed; forcing software fallback)",
2021 "requested_frame", requested_frame,
2022 "current_frame", current_frame,
2023 "scaled_lines", scaled_lines,
2024 "src_pix_fmt", src_pix_fmt,
2025 "src_width", src_width,
2026 "src_height", src_height);
2027 }
2028#endif
2029 free(buffer);
2030 AV_RESET_FRAME(pFrameRGB);
2031 RemoveAVFrame(pFrame);
2032 pFrame = NULL;
2033 return;
2034 }
2035
2036 // Create or get the existing frame object
2037 std::shared_ptr<Frame> f = CreateFrame(current_frame);
2038
2039 // Add Image data to frame
2040 if (!ffmpeg_has_alpha(src_pix_fmt)) {
2041 // Add image with no alpha channel, Speed optimization
2042 f->AddImage(width, height, bytes_per_pixel, QImage::Format_RGBA8888_Premultiplied, buffer);
2043 } else {
2044 // Add image with alpha channel (this will be converted to premultipled when needed, but is slower)
2045 f->AddImage(width, height, bytes_per_pixel, QImage::Format_RGBA8888, buffer);
2046 }
2047
2048 // Update working cache
2049 working_cache.Add(f);
2050
2051 // Keep track of last last_video_frame
2052 last_video_frame = f;
2053
2054 // Free the RGB image
2055 AV_RESET_FRAME(pFrameRGB);
2056
2057 // Remove frame and packet
2058 RemoveAVFrame(pFrame);
2059
2060 // Get video PTS in seconds
2061 video_pts_seconds = (double(video_pts) * info.video_timebase.ToDouble()) + pts_offset_seconds;
2062
2063 // Debug output
2064 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ProcessVideoPacket (After)", "requested_frame", requested_frame, "current_frame", current_frame, "f->number", f->number, "video_pts_seconds", video_pts_seconds);
2065}
2066
2067// Process an audio packet
2068void FFmpegReader::ProcessAudioPacket(int64_t requested_frame) {
2069 AudioLocation location;
2070 // Calculate location of current audio packet
2071 if (packet && packet->pts != AV_NOPTS_VALUE) {
2072 // Determine related video frame and starting sample # from audio PTS
2073 location = GetAudioPTSLocation(packet->pts);
2074
2075 // Track 1st audio packet after a successful seek
2076 if (!seek_audio_frame_found && is_seeking)
2077 seek_audio_frame_found = location.frame;
2078 }
2079
2080 // Create or get the existing frame object. Requested frame needs to be created
2081 // in working_cache at least once. Seek can clear the working_cache, so we must
2082 // add the requested frame back to the working_cache here. If it already exists,
2083 // it will be moved to the top of the working_cache.
2084 working_cache.Add(CreateFrame(requested_frame));
2085
2086 // Debug output
2087 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ProcessAudioPacket (Before)",
2088 "requested_frame", requested_frame,
2089 "target_frame", location.frame,
2090 "starting_sample", location.sample_start);
2091
2092 // Init an AVFrame to hold the decoded audio samples
2093 int frame_finished = 0;
2094 AVFrame *audio_frame = AV_ALLOCATE_FRAME();
2095 AV_RESET_FRAME(audio_frame);
2096
2097 int packet_samples = 0;
2098 int data_size = 0;
2099
2100#if IS_FFMPEG_3_2
2101 int send_packet_err = avcodec_send_packet(aCodecCtx, packet);
2102 if (send_packet_err < 0 && send_packet_err != AVERROR_EOF) {
2103 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ProcessAudioPacket (Packet not sent)");
2104 }
2105 else {
2106 int receive_frame_err = avcodec_receive_frame(aCodecCtx, audio_frame);
2107 if (receive_frame_err >= 0) {
2108 frame_finished = 1;
2109 }
2110 if (receive_frame_err == AVERROR_EOF) {
2111 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ProcessAudioPacket (EOF detected from decoder)");
2112 packet_status.audio_eof = true;
2113 }
2114 if (receive_frame_err == AVERROR(EINVAL) || receive_frame_err == AVERROR_EOF) {
2115 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ProcessAudioPacket (invalid frame received or EOF from decoder)");
2116 avcodec_flush_buffers(aCodecCtx);
2117 }
2118 if (receive_frame_err != 0) {
2119 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ProcessAudioPacket (frame not ready yet from decoder)");
2120 }
2121 }
2122#else
2123 int used = avcodec_decode_audio4(aCodecCtx, audio_frame, &frame_finished, packet);
2124#endif
2125
2126 if (frame_finished) {
2127 packet_status.audio_decoded++;
2128
2129 // This can be different than the current packet, so we need to look
2130 // at the current AVFrame from the audio decoder. This timestamp should
2131 // be used for the remainder of this function
2132 audio_pts = audio_frame->pts;
2133
2134 // Determine related video frame and starting sample # from audio PTS
2135 location = GetAudioPTSLocation(audio_pts);
2136
2137 // determine how many samples were decoded
2138 int plane_size = -1;
2139#if HAVE_CH_LAYOUT
2140 int nb_channels = AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->ch_layout.nb_channels;
2141#else
2142 int nb_channels = AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->channels;
2143#endif
2144 data_size = av_samples_get_buffer_size(&plane_size, nb_channels,
2145 audio_frame->nb_samples, (AVSampleFormat) (AV_GET_SAMPLE_FORMAT(aStream, aCodecCtx)), 1);
2146
2147 // Calculate total number of samples
2148 packet_samples = audio_frame->nb_samples * nb_channels;
2149 } else {
2150 if (audio_frame) {
2151 // Free audio frame
2152 AV_FREE_FRAME(&audio_frame);
2153 }
2154 }
2155
2156 // Estimate the # of samples and the end of this packet's location (to prevent GAPS for the next timestamp)
2157 int pts_remaining_samples = packet_samples / info.channels; // Adjust for zero based array
2158
2159 // Bail if no samples found
2160 if (pts_remaining_samples == 0) {
2161 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ProcessAudioPacket (No samples, bailing)",
2162 "packet_samples", packet_samples,
2163 "info.channels", info.channels,
2164 "pts_remaining_samples", pts_remaining_samples);
2165 return;
2166 }
2167
2168 while (pts_remaining_samples) {
2169 // Get Samples per frame (for this frame number)
2170 int samples_per_frame = Frame::GetSamplesPerFrame(previous_packet_location.frame, info.fps, info.sample_rate, info.channels);
2171
2172 // Calculate # of samples to add to this frame
2173 int samples = samples_per_frame - previous_packet_location.sample_start;
2174 if (samples > pts_remaining_samples)
2175 samples = pts_remaining_samples;
2176
2177 // Decrement remaining samples
2178 pts_remaining_samples -= samples;
2179
2180 if (pts_remaining_samples > 0) {
2181 // next frame
2182 previous_packet_location.frame++;
2183 previous_packet_location.sample_start = 0;
2184 } else {
2185 // Increment sample start
2186 previous_packet_location.sample_start += samples;
2187 }
2188 }
2189
2190 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ProcessAudioPacket (ReSample)",
2191 "packet_samples", packet_samples,
2192 "info.channels", info.channels,
2193 "info.sample_rate", info.sample_rate,
2194 "aCodecCtx->sample_fmt", AV_GET_SAMPLE_FORMAT(aStream, aCodecCtx));
2195
2196 // Create output frame
2197 AVFrame *audio_converted = AV_ALLOCATE_FRAME();
2198 AV_RESET_FRAME(audio_converted);
2199 audio_converted->nb_samples = audio_frame->nb_samples;
2200 av_samples_alloc(audio_converted->data, audio_converted->linesize, info.channels, audio_frame->nb_samples, AV_SAMPLE_FMT_FLTP, 0);
2201
2202 SWRCONTEXT *avr = avr_ctx;
2203 // setup resample context if needed
2204 if (!avr) {
2205 avr = SWR_ALLOC();
2206#if HAVE_CH_LAYOUT
2207 av_opt_set_chlayout(avr, "in_chlayout", &AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->ch_layout, 0);
2208 av_opt_set_chlayout(avr, "out_chlayout", &AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->ch_layout, 0);
2209#else
2210 av_opt_set_int(avr, "in_channel_layout", AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->channel_layout, 0);
2211 av_opt_set_int(avr, "out_channel_layout", AV_GET_CODEC_ATTRIBUTES(aStream, aCodecCtx)->channel_layout, 0);
2212 av_opt_set_int(avr, "in_channels", info.channels, 0);
2213 av_opt_set_int(avr, "out_channels", info.channels, 0);
2214#endif
2215 av_opt_set_int(avr, "in_sample_fmt", AV_GET_SAMPLE_FORMAT(aStream, aCodecCtx), 0);
2216 av_opt_set_int(avr, "out_sample_fmt", AV_SAMPLE_FMT_FLTP, 0);
2217 av_opt_set_int(avr, "in_sample_rate", info.sample_rate, 0);
2218 av_opt_set_int(avr, "out_sample_rate", info.sample_rate, 0);
2219 SWR_INIT(avr);
2220 avr_ctx = avr;
2221 }
2222
2223 // Convert audio samples
2224 int nb_samples = SWR_CONVERT(avr, // audio resample context
2225 audio_converted->data, // output data pointers
2226 audio_converted->linesize[0], // output plane size, in bytes. (0 if unknown)
2227 audio_converted->nb_samples, // maximum number of samples that the output buffer can hold
2228 audio_frame->data, // input data pointers
2229 audio_frame->linesize[0], // input plane size, in bytes (0 if unknown)
2230 audio_frame->nb_samples); // number of input samples to convert
2231
2232
2233 int64_t starting_frame_number = -1;
2234 for (int channel_filter = 0; channel_filter < info.channels; channel_filter++) {
2235 // Array of floats (to hold samples for each channel)
2236 starting_frame_number = location.frame;
2237 int channel_buffer_size = nb_samples;
2238 auto *channel_buffer = (float *) (audio_converted->data[channel_filter]);
2239
2240 // Loop through samples, and add them to the correct frames
2241 int start = location.sample_start;
2242 int remaining_samples = channel_buffer_size;
2243 while (remaining_samples > 0) {
2244 // Get Samples per frame (for this frame number)
2245 int samples_per_frame = Frame::GetSamplesPerFrame(starting_frame_number, info.fps, info.sample_rate, info.channels);
2246
2247 // Calculate # of samples to add to this frame
2248 int samples = std::fmin(samples_per_frame - start, remaining_samples);
2249
2250 // Create or get the existing frame object
2251 std::shared_ptr<Frame> f = CreateFrame(starting_frame_number);
2252
2253 // Add samples for current channel to the frame.
2254 f->AddAudio(true, channel_filter, start, channel_buffer, samples, 1.0f);
2255
2256 // Debug output
2257 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ProcessAudioPacket (f->AddAudio)",
2258 "frame", starting_frame_number,
2259 "start", start,
2260 "samples", samples,
2261 "channel", channel_filter,
2262 "samples_per_frame", samples_per_frame);
2263
2264 // Add or update cache
2265 working_cache.Add(f);
2266
2267 // Decrement remaining samples
2268 remaining_samples -= samples;
2269
2270 // Increment buffer (to next set of samples)
2271 if (remaining_samples > 0)
2272 channel_buffer += samples;
2273
2274 // Increment frame number
2275 starting_frame_number++;
2276
2277 // Reset starting sample #
2278 start = 0;
2279 }
2280 }
2281
2282 // Free AVFrames
2283 av_free(audio_converted->data[0]);
2284 AV_FREE_FRAME(&audio_converted);
2285 AV_FREE_FRAME(&audio_frame);
2286
2287 // Get audio PTS in seconds
2288 audio_pts_seconds = (double(audio_pts) * info.audio_timebase.ToDouble()) + pts_offset_seconds;
2289
2290 // Debug output
2291 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::ProcessAudioPacket (After)",
2292 "requested_frame", requested_frame,
2293 "starting_frame", location.frame,
2294 "end_frame", starting_frame_number - 1,
2295 "audio_pts_seconds", audio_pts_seconds);
2296
2297}
2298
2299
2300// Seek to a specific frame. This is not always frame accurate, it's more of an estimation on many codecs.
2301void FFmpegReader::Seek(int64_t requested_frame) {
2302 // Adjust for a requested frame that is too small or too large
2303 if (requested_frame < 1)
2304 requested_frame = 1;
2305 if (requested_frame > info.video_length)
2306 requested_frame = info.video_length;
2307 if (requested_frame > largest_frame_processed && packet_status.end_of_file) {
2308 // Not possible to search past largest_frame once EOF is reached (no more packets)
2309 return;
2310 }
2311
2312 // Debug output
2313 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::Seek",
2314 "requested_frame", requested_frame,
2315 "seek_count", seek_count,
2316 "last_frame", last_frame);
2317
2318 // Clear working cache (since we are seeking to another location in the file)
2319 working_cache.Clear();
2320
2321 // Reset the last frame variable
2322 video_pts = 0.0;
2323 video_pts_seconds = NO_PTS_OFFSET;
2324 audio_pts = 0.0;
2325 audio_pts_seconds = NO_PTS_OFFSET;
2326 hold_packet = false;
2327 last_frame = 0;
2328 current_video_frame = 0;
2329 largest_frame_processed = 0;
2330 last_final_video_frame.reset();
2331 bool has_audio_override = info.has_audio;
2332 bool has_video_override = info.has_video;
2333
2334 // Init end-of-file detection variables
2335 packet_status.reset(false);
2336
2337 // Increment seek count
2338 seek_count++;
2339
2340 // If seeking near frame 1, we need to close and re-open the file (this is more reliable than seeking)
2341 int buffer_amount = 12;
2342 if (requested_frame - buffer_amount < 20) {
2343 // prevent Open() from seeking again
2344 is_seeking = true;
2345
2346 // Close and re-open file (basically seeking to frame 1)
2347 Close();
2348 Open();
2349
2350 // Update overrides (since closing and re-opening might update these)
2351 info.has_audio = has_audio_override;
2352 info.has_video = has_video_override;
2353
2354 // Not actually seeking, so clear these flags
2355 is_seeking = false;
2356 if (seek_count == 1) {
2357 // Don't redefine this on multiple seek attempts for a specific frame
2358 seeking_frame = 1;
2359 seeking_pts = ConvertFrameToVideoPTS(1);
2360 }
2361 seek_audio_frame_found = 0; // used to detect which frames to throw away after a seek
2362 seek_video_frame_found = 0; // used to detect which frames to throw away after a seek
2363
2364 } else {
2365 // Seek to nearest key-frame (aka, i-frame)
2366 bool seek_worked = false;
2367 int64_t seek_target = 0;
2368
2369 // Seek video stream (if any), except album arts
2370 if (!seek_worked && info.has_video && !HasAlbumArt()) {
2371 seek_target = ConvertFrameToVideoPTS(requested_frame - buffer_amount);
2372 if (av_seek_frame(pFormatCtx, info.video_stream_index, seek_target, AVSEEK_FLAG_BACKWARD) < 0) {
2373 ZmqLogger::Instance()->Log(std::string(pFormatCtx->AV_FILENAME) + ": error while seeking video stream");
2374 } else {
2375 // VIDEO SEEK
2376 is_video_seek = true;
2377 seek_worked = true;
2378 }
2379 }
2380
2381 // Seek audio stream (if not already seeked... and if an audio stream is found)
2382 if (!seek_worked && info.has_audio) {
2383 seek_target = ConvertFrameToAudioPTS(requested_frame - buffer_amount);
2384 if (av_seek_frame(pFormatCtx, info.audio_stream_index, seek_target, AVSEEK_FLAG_BACKWARD) < 0) {
2385 ZmqLogger::Instance()->Log(std::string(pFormatCtx->AV_FILENAME) + ": error while seeking audio stream");
2386 } else {
2387 // AUDIO SEEK
2388 is_video_seek = false;
2389 seek_worked = true;
2390 }
2391 }
2392
2393 // Was the seek successful?
2394 if (seek_worked) {
2395 // Flush audio buffer
2396 if (info.has_audio)
2397 avcodec_flush_buffers(aCodecCtx);
2398
2399 // Flush video buffer
2400 if (info.has_video)
2401 avcodec_flush_buffers(pCodecCtx);
2402
2403 // Reset previous audio location to zero
2404 previous_packet_location.frame = -1;
2405 previous_packet_location.sample_start = 0;
2406
2407 // init seek flags
2408 is_seeking = true;
2409 if (seek_count == 1) {
2410 // Don't redefine this on multiple seek attempts for a specific frame
2411 seeking_pts = seek_target;
2412 seeking_frame = requested_frame;
2413 }
2414 seek_audio_frame_found = 0; // used to detect which frames to throw away after a seek
2415 seek_video_frame_found = 0; // used to detect which frames to throw away after a seek
2416
2417 } else {
2418 // seek failed
2419 seeking_pts = 0;
2420 seeking_frame = 0;
2421
2422 // prevent Open() from seeking again
2423 is_seeking = true;
2424
2425 // Close and re-open file (basically seeking to frame 1)
2426 Close();
2427 Open();
2428
2429 // Not actually seeking, so clear these flags
2430 is_seeking = false;
2431
2432 // disable seeking for this reader (since it failed)
2433 enable_seek = false;
2434
2435 // Update overrides (since closing and re-opening might update these)
2436 info.has_audio = has_audio_override;
2437 info.has_video = has_video_override;
2438 }
2439 }
2440}
2441
2442// Get the PTS for the current video packet
2443int64_t FFmpegReader::GetPacketPTS() {
2444 if (packet) {
2445 int64_t current_pts = packet->pts;
2446 if (current_pts == AV_NOPTS_VALUE && packet->dts != AV_NOPTS_VALUE)
2447 current_pts = packet->dts;
2448
2449 // Return adjusted PTS
2450 return current_pts;
2451 } else {
2452 // No packet, return NO PTS
2453 return AV_NOPTS_VALUE;
2454 }
2455}
2456
2457// Update PTS Offset (if any)
2458void FFmpegReader::UpdatePTSOffset() {
2459 if (pts_offset_seconds != NO_PTS_OFFSET) {
2460 // Skip this method if we have already set PTS offset
2461 return;
2462 }
2463 pts_offset_seconds = 0.0;
2464 double video_pts_offset_seconds = 0.0;
2465 double audio_pts_offset_seconds = 0.0;
2466
2467 bool has_video_pts = false;
2468 if (!info.has_video) {
2469 // Mark as checked
2470 has_video_pts = true;
2471 }
2472 bool has_audio_pts = false;
2473 if (!info.has_audio) {
2474 // Mark as checked
2475 has_audio_pts = true;
2476 }
2477
2478 // Loop through the stream (until a packet from all streams is found)
2479 while (!has_video_pts || !has_audio_pts) {
2480 // Get the next packet (if any)
2481 if (GetNextPacket() < 0)
2482 // Break loop when no more packets found
2483 break;
2484
2485 // Get PTS of this packet
2486 int64_t pts = GetPacketPTS();
2487
2488 // Video packet
2489 if (!has_video_pts && packet->stream_index == videoStream) {
2490 // Get the video packet start time (in seconds)
2491 video_pts_offset_seconds = 0.0 - (pts * info.video_timebase.ToDouble());
2492
2493 // Is timestamp close to zero (within X seconds)
2494 // Ignore wildly invalid timestamps (i.e. -234923423423)
2495 if (std::abs(video_pts_offset_seconds) <= 10.0) {
2496 has_video_pts = true;
2497 }
2498 }
2499 else if (!has_audio_pts && packet->stream_index == audioStream) {
2500 // Get the audio packet start time (in seconds)
2501 audio_pts_offset_seconds = 0.0 - (pts * info.audio_timebase.ToDouble());
2502
2503 // Is timestamp close to zero (within X seconds)
2504 // Ignore wildly invalid timestamps (i.e. -234923423423)
2505 if (std::abs(audio_pts_offset_seconds) <= 10.0) {
2506 has_audio_pts = true;
2507 }
2508 }
2509 }
2510
2511 // Choose timestamp origin:
2512 // - If video exists, anchor timeline frame mapping to video start.
2513 // This avoids AAC priming / audio preroll shifting video frame 1 to frame 2.
2514 // - If no video exists (audio-only readers), use audio start.
2515 if (info.has_video && has_video_pts) {
2516 pts_offset_seconds = video_pts_offset_seconds;
2517 } else if (!info.has_video && has_audio_pts) {
2518 pts_offset_seconds = audio_pts_offset_seconds;
2519 } else if (has_video_pts && has_audio_pts) {
2520 // Fallback when stream flags are unusual but both timestamps exist.
2521 pts_offset_seconds = video_pts_offset_seconds;
2522 }
2523}
2524
2525// Convert PTS into Frame Number
2526int64_t FFmpegReader::ConvertVideoPTStoFrame(int64_t pts) {
2527 // Apply PTS offset
2528 int64_t previous_video_frame = current_video_frame;
2529 const double fps_value = (info.fps.num > 0 && info.fps.den > 0) ? info.fps.ToDouble() : 30.0;
2530 const double video_timebase_value =
2533 : (1.0 / 30.0);
2534
2535 // Get the video packet start time (in seconds)
2536 double video_seconds = (double(pts) * video_timebase_value) + pts_offset_seconds;
2537
2538 // Divide by the video timebase, to get the video frame number (frame # is decimal at this point)
2539 int64_t frame = round(video_seconds * fps_value) + 1;
2540
2541 // Keep track of the expected video frame #
2542 if (current_video_frame == 0)
2543 current_video_frame = frame;
2544 else {
2545
2546 // Sometimes frames are duplicated due to identical (or similar) timestamps
2547 if (frame == previous_video_frame) {
2548 // return -1 frame number
2549 frame = -1;
2550 } else {
2551 // Increment expected frame
2552 current_video_frame++;
2553 }
2554 }
2555
2556 // Return frame #
2557 return frame;
2558}
2559
2560// Convert Frame Number into Video PTS
2561int64_t FFmpegReader::ConvertFrameToVideoPTS(int64_t frame_number) {
2562 const double fps_value = (info.fps.num > 0 && info.fps.den > 0) ? info.fps.ToDouble() : 30.0;
2563 const double video_timebase_value =
2566 : (1.0 / 30.0);
2567
2568 // Get timestamp of this frame (in seconds)
2569 double seconds = (double(frame_number - 1) / fps_value) + pts_offset_seconds;
2570
2571 // Calculate the # of video packets in this timestamp
2572 int64_t video_pts = round(seconds / video_timebase_value);
2573
2574 // Apply PTS offset (opposite)
2575 return video_pts;
2576}
2577
2578// Convert Frame Number into Video PTS
2579int64_t FFmpegReader::ConvertFrameToAudioPTS(int64_t frame_number) {
2580 const double fps_value = (info.fps.num > 0 && info.fps.den > 0) ? info.fps.ToDouble() : 30.0;
2581 const double audio_timebase_value =
2584 : (1.0 / 48000.0);
2585
2586 // Get timestamp of this frame (in seconds)
2587 double seconds = (double(frame_number - 1) / fps_value) + pts_offset_seconds;
2588
2589 // Calculate the # of audio packets in this timestamp
2590 int64_t audio_pts = round(seconds / audio_timebase_value);
2591
2592 // Apply PTS offset (opposite)
2593 return audio_pts;
2594}
2595
2596// Calculate Starting video frame and sample # for an audio PTS
2597AudioLocation FFmpegReader::GetAudioPTSLocation(int64_t pts) {
2598 const double audio_timebase_value =
2601 : (1.0 / 48000.0);
2602 const double fps_value = (info.fps.num > 0 && info.fps.den > 0) ? info.fps.ToDouble() : 30.0;
2603
2604 // Get the audio packet start time (in seconds)
2605 double audio_seconds = (double(pts) * audio_timebase_value) + pts_offset_seconds;
2606
2607 // Divide by the video timebase, to get the video frame number (frame # is decimal at this point)
2608 double frame = (audio_seconds * fps_value) + 1;
2609
2610 // Frame # as a whole number (no more decimals)
2611 int64_t whole_frame = int64_t(frame);
2612
2613 // Remove the whole number, and only get the decimal of the frame
2614 double sample_start_percentage = frame - double(whole_frame);
2615
2616 // Get Samples per frame
2617 int samples_per_frame = Frame::GetSamplesPerFrame(whole_frame, info.fps, info.sample_rate, info.channels);
2618
2619 // Calculate the sample # to start on
2620 int sample_start = round(double(samples_per_frame) * sample_start_percentage);
2621
2622 // Protect against broken (i.e. negative) timestamps
2623 if (whole_frame < 1)
2624 whole_frame = 1;
2625 if (sample_start < 0)
2626 sample_start = 0;
2627
2628 // Prepare final audio packet location
2629 AudioLocation location = {whole_frame, sample_start};
2630
2631 // Compare to previous audio packet (and fix small gaps due to varying PTS timestamps)
2632 if (previous_packet_location.frame != -1) {
2633 if (location.is_near(previous_packet_location, samples_per_frame, samples_per_frame)) {
2634 int64_t orig_frame = location.frame;
2635 int orig_start = location.sample_start;
2636
2637 // Update sample start, to prevent gaps in audio
2638 location.sample_start = previous_packet_location.sample_start;
2639 location.frame = previous_packet_location.frame;
2640
2641 // Debug output
2642 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetAudioPTSLocation (Audio Gap Detected)", "Source Frame", orig_frame, "Source Audio Sample", orig_start, "Target Frame", location.frame, "Target Audio Sample", location.sample_start, "pts", pts);
2643
2644 } else {
2645 // Debug output
2646 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::GetAudioPTSLocation (Audio Gap Ignored - too big)", "Previous location frame", previous_packet_location.frame, "Target Frame", location.frame, "Target Audio Sample", location.sample_start, "pts", pts);
2647 }
2648 }
2649
2650 // Set previous location
2651 previous_packet_location = location;
2652
2653 // Return the associated video frame and starting sample #
2654 return location;
2655}
2656
2657// Create a new Frame (or return an existing one) and add it to the working queue.
2658std::shared_ptr<Frame> FFmpegReader::CreateFrame(int64_t requested_frame) {
2659 // Check working cache
2660 std::shared_ptr<Frame> output = working_cache.GetFrame(requested_frame);
2661
2662 if (!output) {
2663 // (re-)Check working cache
2664 output = working_cache.GetFrame(requested_frame);
2665 if(output) return output;
2666
2667 // Create a new frame on the working cache
2668 output = std::make_shared<Frame>(requested_frame, info.width, info.height, "#000000", Frame::GetSamplesPerFrame(requested_frame, info.fps, info.sample_rate, info.channels), info.channels);
2669 output->SetPixelRatio(info.pixel_ratio.num, info.pixel_ratio.den); // update pixel ratio
2670 output->ChannelsLayout(info.channel_layout); // update audio channel layout from the parent reader
2671 output->SampleRate(info.sample_rate); // update the frame's sample rate of the parent reader
2672
2673 working_cache.Add(output);
2674
2675 // Set the largest processed frame (if this is larger)
2676 if (requested_frame > largest_frame_processed)
2677 largest_frame_processed = requested_frame;
2678 }
2679 // Return frame
2680 return output;
2681}
2682
2683// Determine if frame is partial due to seek
2684bool FFmpegReader::IsPartialFrame(int64_t requested_frame) {
2685
2686 // Sometimes a seek gets partial frames, and we need to remove them
2687 bool seek_trash = false;
2688 int64_t max_seeked_frame = seek_audio_frame_found; // determine max seeked frame
2689 if (seek_video_frame_found > max_seeked_frame) {
2690 max_seeked_frame = seek_video_frame_found;
2691 }
2692 if ((info.has_audio && seek_audio_frame_found && max_seeked_frame >= requested_frame) ||
2693 (info.has_video && seek_video_frame_found && max_seeked_frame >= requested_frame)) {
2694 seek_trash = true;
2695 }
2696
2697 return seek_trash;
2698}
2699
2700// Check the working queue, and move finished frames to the finished queue
2701void FFmpegReader::CheckWorkingFrames(int64_t requested_frame) {
2702
2703 // Prevent async calls to the following code
2704 const std::lock_guard<std::recursive_mutex> lock(getFrameMutex);
2705
2706 // Get a list of current working queue frames in the cache (in-progress frames)
2707 std::vector<std::shared_ptr<openshot::Frame>> working_frames = working_cache.GetFrames();
2708 std::vector<std::shared_ptr<openshot::Frame>>::iterator working_itr;
2709
2710 // Loop through all working queue frames (sorted by frame #)
2711 for(working_itr = working_frames.begin(); working_itr != working_frames.end(); ++working_itr)
2712 {
2713 // Get working frame
2714 std::shared_ptr<Frame> f = *working_itr;
2715
2716 // Was a frame found? Is frame requested yet?
2717 if (!f || f->number > requested_frame) {
2718 // If not, skip to next one
2719 continue;
2720 }
2721
2722 // Calculate PTS in seconds (of working frame), and the most recent processed pts value
2723 double frame_pts_seconds = (double(f->number - 1) / info.fps.ToDouble()) + pts_offset_seconds;
2724 double recent_pts_seconds = std::max(video_pts_seconds, audio_pts_seconds);
2725
2726 // Determine if video and audio are ready (based on timestamps)
2727 bool is_video_ready = false;
2728 bool is_audio_ready = false;
2729 double recent_pts_diff = recent_pts_seconds - frame_pts_seconds;
2730 if ((frame_pts_seconds <= video_pts_seconds)
2731 || (recent_pts_diff > 1.5)
2732 || packet_status.video_eof || packet_status.end_of_file) {
2733 // Video stream is past this frame (so it must be done)
2734 // OR video stream is too far behind, missing, or end-of-file
2735 is_video_ready = true;
2736 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::CheckWorkingFrames (video ready)",
2737 "frame_number", f->number,
2738 "frame_pts_seconds", frame_pts_seconds,
2739 "video_pts_seconds", video_pts_seconds,
2740 "recent_pts_diff", recent_pts_diff);
2741 if (info.has_video && !f->has_image_data &&
2742 (packet_status.video_eof || packet_status.end_of_file)) {
2743 // Frame has no image data. Prefer timeline-previous frames to preserve
2744 // visual order, especially when decode/prefetch is out-of-order.
2745 std::shared_ptr<Frame> previous_frame_instance = final_cache.GetFrame(f->number - 1);
2746 if (previous_frame_instance && previous_frame_instance->has_image_data) {
2747 f->AddImage(std::make_shared<QImage>(previous_frame_instance->GetImage()->copy()));
2748 }
2749
2750 // Fall back to last finalized timeline image (survives cache churn).
2751 if (!f->has_image_data
2752 && last_final_video_frame
2753 && last_final_video_frame->has_image_data
2754 && last_final_video_frame->number <= f->number) {
2755 f->AddImage(std::make_shared<QImage>(last_final_video_frame->GetImage()->copy()));
2756 }
2757
2758 // Fall back to the last decoded image only when it is not from the future.
2759 if (!f->has_image_data
2760 && last_video_frame
2761 && last_video_frame->has_image_data
2762 && last_video_frame->number <= f->number) {
2763 f->AddImage(std::make_shared<QImage>(last_video_frame->GetImage()->copy()));
2764 }
2765
2766 // Last-resort fallback if no prior image is available.
2767 if (!f->has_image_data) {
2769 "FFmpegReader::CheckWorkingFrames (no previous image found; using black frame)",
2770 "frame_number", f->number);
2771 f->AddColor("#000000");
2772 }
2773 }
2774 }
2775
2776 double audio_pts_diff = audio_pts_seconds - frame_pts_seconds;
2777 if ((frame_pts_seconds < audio_pts_seconds && audio_pts_diff > 1.0)
2778 || (recent_pts_diff > 1.5)
2779 || packet_status.audio_eof || packet_status.end_of_file) {
2780 // Audio stream is past this frame (so it must be done)
2781 // OR audio stream is too far behind, missing, or end-of-file
2782 // Adding a bit of margin here, to allow for partial audio packets
2783 is_audio_ready = true;
2784 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::CheckWorkingFrames (audio ready)",
2785 "frame_number", f->number,
2786 "frame_pts_seconds", frame_pts_seconds,
2787 "audio_pts_seconds", audio_pts_seconds,
2788 "audio_pts_diff", audio_pts_diff,
2789 "recent_pts_diff", recent_pts_diff);
2790 }
2791 bool is_seek_trash = IsPartialFrame(f->number);
2792
2793 // Adjust for available streams
2794 if (!info.has_video) is_video_ready = true;
2795 if (!info.has_audio) is_audio_ready = true;
2796
2797 // Debug output
2798 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::CheckWorkingFrames",
2799 "frame_number", f->number,
2800 "is_video_ready", is_video_ready,
2801 "is_audio_ready", is_audio_ready,
2802 "video_eof", packet_status.video_eof,
2803 "audio_eof", packet_status.audio_eof,
2804 "end_of_file", packet_status.end_of_file);
2805
2806 // Check if working frame is final
2807 if (info.has_video && !f->has_image_data
2808 && !packet_status.end_of_file && !is_seek_trash) {
2809 if (info.has_single_image) {
2810 // For still-image video (including attached cover art), reuse the most
2811 // recent image so playback does not stall waiting for video EOF.
2812 std::shared_ptr<Frame> previous_frame_instance = final_cache.GetFrame(f->number - 1);
2813 if (previous_frame_instance && previous_frame_instance->has_image_data) {
2814 f->AddImage(std::make_shared<QImage>(previous_frame_instance->GetImage()->copy()));
2815 }
2816 if (!f->has_image_data
2817 && last_final_video_frame
2818 && last_final_video_frame->has_image_data
2819 && last_final_video_frame->number <= f->number) {
2820 f->AddImage(std::make_shared<QImage>(last_final_video_frame->GetImage()->copy()));
2821 }
2822 if (!f->has_image_data
2823 && last_video_frame
2824 && last_video_frame->has_image_data
2825 && last_video_frame->number <= f->number) {
2826 f->AddImage(std::make_shared<QImage>(last_video_frame->GetImage()->copy()));
2827 }
2828 }
2829
2830 // If both streams have advanced past this frame but the decoder never
2831 // produced image data for it, reuse the most recent non-future image.
2832 // This avoids stalling indefinitely on sparse/missing decoded frames.
2833 if (!f->has_image_data && is_video_ready && is_audio_ready) {
2834 std::shared_ptr<Frame> previous_frame_instance = final_cache.GetFrame(f->number - 1);
2835 if (previous_frame_instance && previous_frame_instance->has_image_data) {
2836 f->AddImage(std::make_shared<QImage>(previous_frame_instance->GetImage()->copy()));
2837 }
2838 if (!f->has_image_data
2839 && last_final_video_frame
2840 && last_final_video_frame->has_image_data
2841 && last_final_video_frame->number <= f->number) {
2842 f->AddImage(std::make_shared<QImage>(last_final_video_frame->GetImage()->copy()));
2843 }
2844 if (!f->has_image_data
2845 && last_video_frame
2846 && last_video_frame->has_image_data
2847 && last_video_frame->number <= f->number) {
2848 f->AddImage(std::make_shared<QImage>(last_video_frame->GetImage()->copy()));
2849 }
2850 }
2851
2852 // Do not finalize non-EOF video frames without decoded image data.
2853 // This prevents repeated previous-frame fallbacks being cached as real frames.
2854 if (!f->has_image_data) {
2855 continue;
2856 }
2857 }
2858 if ((!packet_status.end_of_file && is_video_ready && is_audio_ready) || packet_status.end_of_file || is_seek_trash) {
2859 // Debug output
2860 ZmqLogger::Instance()->AppendDebugMethod("FFmpegReader::CheckWorkingFrames (mark frame as final)",
2861 "requested_frame", requested_frame,
2862 "f->number", f->number,
2863 "is_seek_trash", is_seek_trash,
2864 "Working Cache Count", working_cache.Count(),
2865 "Final Cache Count", final_cache.Count(),
2866 "end_of_file", packet_status.end_of_file);
2867
2868 if (!is_seek_trash) {
2869 // Move frame to final cache
2870 final_cache.Add(f);
2871 if (f->has_image_data) {
2872 last_final_video_frame = f;
2873 }
2874
2875 // Remove frame from working cache
2876 working_cache.Remove(f->number);
2877
2878 // Update last frame processed
2879 last_frame = f->number;
2880 } else {
2881 // Seek trash, so delete the frame from the working cache, and never add it to the final cache.
2882 working_cache.Remove(f->number);
2883 }
2884
2885 }
2886 }
2887
2888 // Clear vector of frames
2889 working_frames.clear();
2890 working_frames.shrink_to_fit();
2891}
2892
2893// Check for the correct frames per second (FPS) value by scanning the 1st few seconds of video packets.
2894void FFmpegReader::CheckFPS() {
2895 if (check_fps) {
2896 // Do not check FPS more than 1 time
2897 return;
2898 } else {
2899 check_fps = true;
2900 }
2901
2902 int frames_per_second[3] = {0,0,0};
2903 int max_fps_index = sizeof(frames_per_second) / sizeof(frames_per_second[0]);
2904 int fps_index = 0;
2905
2906 int all_frames_detected = 0;
2907 int starting_frames_detected = 0;
2908
2909 // Loop through the stream
2910 while (true) {
2911 // Get the next packet (if any)
2912 if (GetNextPacket() < 0)
2913 // Break loop when no more packets found
2914 break;
2915
2916 // Video packet
2917 if (packet->stream_index == videoStream) {
2918 // Get the video packet start time (in seconds)
2919 double video_seconds = (double(GetPacketPTS()) * info.video_timebase.ToDouble()) + pts_offset_seconds;
2920 fps_index = int(video_seconds); // truncate float timestamp to int (second 1, second 2, second 3)
2921
2922 // Is this video packet from the first few seconds?
2923 if (fps_index >= 0 && fps_index < max_fps_index) {
2924 // Yes, keep track of how many frames per second (over the first few seconds)
2925 starting_frames_detected++;
2926 frames_per_second[fps_index]++;
2927 }
2928
2929 // Track all video packets detected
2930 all_frames_detected++;
2931 }
2932 }
2933
2934 // Calculate FPS (based on the first few seconds of video packets)
2935 float avg_fps = 30.0;
2936 if (starting_frames_detected > 0 && fps_index > 0) {
2937 avg_fps = float(starting_frames_detected) / std::min(fps_index, max_fps_index);
2938 }
2939
2940 // Verify average FPS is a reasonable value
2941 if (avg_fps < 8.0) {
2942 // Invalid FPS assumed, so switching to a sane default FPS instead
2943 avg_fps = 30.0;
2944 }
2945
2946 // Update FPS (truncate average FPS to Integer)
2947 info.fps = Fraction(int(avg_fps), 1);
2948
2949 // Update Duration and Length
2950 if (all_frames_detected > 0) {
2951 // Use all video frames detected to calculate # of frames
2952 info.video_length = all_frames_detected;
2953 info.duration = all_frames_detected / avg_fps;
2954 } else {
2955 // Use previous duration to calculate # of frames
2956 info.video_length = info.duration * avg_fps;
2957 }
2958
2959 // Update video bit rate
2961}
2962
2963// Remove AVFrame from cache (and deallocate its memory)
2964void FFmpegReader::RemoveAVFrame(AVFrame *remove_frame) {
2965 // Remove pFrame (if exists)
2966 if (remove_frame) {
2967 // Free memory
2968 av_freep(&remove_frame->data[0]);
2969#ifndef WIN32
2970 AV_FREE_FRAME(&remove_frame);
2971#endif
2972 }
2973}
2974
2975// Remove AVPacket from cache (and deallocate its memory)
2976void FFmpegReader::RemoveAVPacket(AVPacket *remove_packet) {
2977 // deallocate memory for packet
2978 AV_FREE_PACKET(remove_packet);
2979
2980 // Delete the object
2981 delete remove_packet;
2982}
2983
2984// Generate JSON string of this object
2985std::string FFmpegReader::Json() const {
2986
2987 // Return formatted string
2988 return JsonValue().toStyledString();
2989}
2990
2991// Generate Json::Value for this object
2992Json::Value FFmpegReader::JsonValue() const {
2993
2994 // Create root json object
2995 Json::Value root = ReaderBase::JsonValue(); // get parent properties
2996 root["type"] = "FFmpegReader";
2997 root["path"] = path;
2998 switch (duration_strategy) {
3000 root["duration_strategy"] = "VideoPreferred";
3001 break;
3003 root["duration_strategy"] = "AudioPreferred";
3004 break;
3006 default:
3007 root["duration_strategy"] = "LongestStream";
3008 break;
3009 }
3010
3011 // return JsonValue
3012 return root;
3013}
3014
3015// Load JSON string into this object
3016void FFmpegReader::SetJson(const std::string value) {
3017
3018 // Parse JSON string into JSON objects
3019 try {
3020 const Json::Value root = openshot::stringToJson(value);
3021 // Set all values that match
3022 SetJsonValue(root);
3023 }
3024 catch (const std::exception& e) {
3025 // Error parsing JSON (or missing keys)
3026 throw InvalidJSON("JSON is invalid (missing keys or invalid data types)");
3027 }
3028}
3029
3030// Load Json::Value into this object
3031void FFmpegReader::SetJsonValue(const Json::Value root) {
3032
3033 // Set parent data
3035
3036 // Set data from Json (if key is found)
3037 if (!root["path"].isNull())
3038 path = root["path"].asString();
3039 if (!root["duration_strategy"].isNull()) {
3040 const std::string strategy = root["duration_strategy"].asString();
3041 if (strategy == "VideoPreferred") {
3042 duration_strategy = DurationStrategy::VideoPreferred;
3043 } else if (strategy == "AudioPreferred") {
3044 duration_strategy = DurationStrategy::AudioPreferred;
3045 } else {
3046 duration_strategy = DurationStrategy::LongestStream;
3047 }
3048 }
3049}
Shared helpers for Crop effect scaling logic.
Header file for all Exception classes.
AVPixelFormat hw_de_av_pix_fmt_global
AVHWDeviceType hw_de_av_device_type_global
int hw_de_on
Header file for FFmpegReader class.
Header file for FFmpegUtilities.
#define AV_FREE_CONTEXT(av_context)
#define SWR_INIT(ctx)
#define AV_FREE_FRAME(av_frame)
#define SWR_CONVERT(ctx, out, linesize, out_count, in, linesize2, in_count)
#define SWR_ALLOC()
#define SWR_CLOSE(ctx)
#define AV_GET_CODEC_TYPE(av_stream)
#define AV_GET_CODEC_PIXEL_FORMAT(av_stream, av_context)
#define AV_GET_CODEC_CONTEXT(av_stream, av_codec)
#define AV_FIND_DECODER_CODEC_ID(av_stream)
#define AV_ALLOCATE_FRAME()
#define AV_REGISTER_ALL
#define PIX_FMT_RGBA
#define SWR_FREE(ctx)
#define AV_COPY_PICTURE_DATA(av_frame, buffer, pix_fmt, width, height)
#define AV_FREE_PACKET(av_packet)
#define SWRCONTEXT
#define AVCODEC_REGISTER_ALL
#define AV_GET_CODEC_ATTRIBUTES(av_stream, av_context)
#define AV_ALLOCATE_IMAGE(av_frame, pix_fmt, width, height)
#define AV_GET_SAMPLE_FORMAT(av_stream, av_context)
#define AV_RESET_FRAME(av_frame)
AVDictionary * opts
Cross-platform helper to encourage returning freed memory to the OS.
#define FF_VIDEO_NUM_PROCESSORS
#define OPEN_MP_NUM_PROCESSORS
#define FF_AUDIO_NUM_PROCESSORS
Header file for Timeline class.
Header file for ZeroMQ-based Logger class.
void SetMaxBytesFromInfo(int64_t number_of_frames, int width, int height, int sample_rate, int channels)
Set maximum bytes to a different amount based on a ReaderInfo struct.
Definition CacheBase.cpp:28
int64_t Count()
Count the frames in the queue.
void Add(std::shared_ptr< openshot::Frame > frame)
Add a Frame to the cache.
std::shared_ptr< openshot::Frame > GetFrame(int64_t frame_number)
Get a frame from the cache.
std::vector< std::shared_ptr< openshot::Frame > > GetFrames()
Get an array of all Frames.
void Remove(int64_t frame_number)
Remove a specific frame.
void Clear()
Clear the cache of all frames.
This class represents a clip (used to arrange readers on the timeline)
Definition Clip.h:89
openshot::Keyframe scale_x
Curve representing the horizontal scaling in percent (0 to 1)
Definition Clip.h:318
openshot::TimelineBase * ParentTimeline() override
Get the associated Timeline pointer (if any)
Definition Clip.h:296
openshot::Keyframe scale_y
Curve representing the vertical scaling in percent (0 to 1)
Definition Clip.h:319
openshot::ScaleType scale
The scale determines how a clip should be resized to fit its parent.
Definition Clip.h:179
double Y
The Y value of the coordinate (usually representing the value of the property being animated)
Definition Coordinate.h:41
This class uses the FFmpeg libraries, to open video files and audio files, and return openshot::Frame...
void Open() override
Open File - which is called by the constructor automatically.
bool HardwareDecodeSuccessful() const override
Return true if hardware decode was requested and successfully produced at least one frame.
FFmpegReader(const std::string &path, bool inspect_reader=true)
Constructor for FFmpegReader.
Json::Value JsonValue() const override
Generate Json::Value for this object.
bool GetIsDurationKnown()
Return true if frame can be read with GetFrame()
void SetJsonValue(const Json::Value root) override
Load Json::Value into this object.
CacheMemory final_cache
Final cache object used to hold final frames.
virtual ~FFmpegReader()
Destructor.
std::string Json() const override
Generate JSON string of this object.
std::shared_ptr< openshot::Frame > GetFrame(int64_t requested_frame) override
void Close() override
Close File.
void SetJson(const std::string value) override
Load JSON string into this object.
This class represents a fraction.
Definition Fraction.h:30
int num
Numerator for the fraction.
Definition Fraction.h:32
float ToFloat()
Return this fraction as a float (i.e. 1/2 = 0.5)
Definition Fraction.cpp:35
double ToDouble() const
Return this fraction as a double (i.e. 1/2 = 0.5)
Definition Fraction.cpp:40
Fraction Reciprocal() const
Return the reciprocal as a Fraction.
Definition Fraction.cpp:78
int den
Denominator for the fraction.
Definition Fraction.h:33
int GetSamplesPerFrame(openshot::Fraction fps, int sample_rate, int channels)
Calculate the # of samples per video frame (for the current frame number)
Definition Frame.cpp:484
Exception when no valid codec is found for a file.
Definition Exceptions.h:179
Exception for files that can not be found or opened.
Definition Exceptions.h:194
Exception for invalid JSON.
Definition Exceptions.h:224
Point GetMaxPoint() const
Get max point (by Y coordinate)
Definition KeyFrame.cpp:245
Exception when no streams are found in the file.
Definition Exceptions.h:292
Exception when memory could not be allocated.
Definition Exceptions.h:355
Coordinate co
This is the primary coordinate.
Definition Point.h:66
openshot::ReaderInfo info
Information about the current media file.
Definition ReaderBase.h:88
virtual void SetJsonValue(const Json::Value root)=0
Load Json::Value into this object.
virtual Json::Value JsonValue() const =0
Generate Json::Value for this object.
std::recursive_mutex getFrameMutex
Mutex for multiple threads.
Definition ReaderBase.h:79
openshot::ClipBase * ParentClip()
Parent clip object of this reader (which can be unparented and NULL)
Exception when a reader is closed, and a frame is requested.
Definition Exceptions.h:370
int DE_LIMIT_WIDTH_MAX
Maximum columns that hardware decode can handle.
Definition Settings.h:80
int HW_DE_DEVICE_SET
Which GPU to use to decode (0 is the first)
Definition Settings.h:83
int DE_LIMIT_HEIGHT_MAX
Maximum rows that hardware decode can handle.
Definition Settings.h:77
static Settings * Instance()
Create or get an instance of this logger singleton (invoke the class with this method)
Definition Settings.cpp:23
int HARDWARE_DECODER
Use video codec for faster video decoding (if supported)
Definition Settings.h:62
int preview_height
Optional preview width of timeline image. If your preview window is smaller than the timeline,...
int preview_width
Optional preview width of timeline image. If your preview window is smaller than the timeline,...
This class represents a timeline.
Definition Timeline.h:153
void Log(std::string message)
Log message to all subscribers of this logger (if any)
void AppendDebugMethod(std::string method_name, std::string arg1_name="", float arg1_value=-1.0, std::string arg2_name="", float arg2_value=-1.0, std::string arg3_name="", float arg3_value=-1.0, std::string arg4_name="", float arg4_value=-1.0, std::string arg5_name="", float arg5_value=-1.0, std::string arg6_name="", float arg6_value=-1.0)
Append debug information.
static ZmqLogger * Instance()
Create or get an instance of this logger singleton (invoke the class with this method)
Definition ZmqLogger.cpp:35
This namespace is the default namespace for all code in the openshot library.
Definition Compressor.h:29
@ SCALE_FIT
Scale the clip until either height or width fills the canvas (with no cropping)
Definition Enums.h:38
@ SCALE_STRETCH
Scale the clip until both height and width fill the canvas (distort to fit)
Definition Enums.h:39
@ SCALE_CROP
Scale the clip until both height and width fill the canvas (cropping the overlap)
Definition Enums.h:37
ChannelLayout
This enumeration determines the audio channel layout (such as stereo, mono, 5 point surround,...
DurationStrategy
This enumeration determines which duration source to favor.
Definition Enums.h:60
@ VideoPreferred
Prefer the video stream's duration, fallback to audio then container.
@ LongestStream
Use the longest value from video, audio, or container.
@ AudioPreferred
Prefer the audio stream's duration, fallback to video then container.
void ApplyCropResizeScale(Clip *clip, int source_width, int source_height, int &max_width, int &max_height)
Scale the requested max_width / max_height based on the Crop resize amount, capped by source size.
const Json::Value stringToJson(const std::string value)
Definition Json.cpp:16
This struct holds the associated video frame and starting sample # for an audio packet.
bool is_near(AudioLocation location, int samples_per_frame, int64_t amount)
void reset(bool eof)
int audio_bit_rate
The bit rate of the audio stream (in bytes)
Definition ReaderBase.h:59
int video_bit_rate
The bit rate of the video stream (in bytes)
Definition ReaderBase.h:49
bool has_single_image
Determines if this file only contains a single image.
Definition ReaderBase.h:42
float duration
Length of time (in seconds)
Definition ReaderBase.h:43
openshot::Fraction audio_timebase
The audio timebase determines how long each audio packet should be played.
Definition ReaderBase.h:64
int width
The width of the video (in pixesl)
Definition ReaderBase.h:46
int channels
The number of audio channels used in the audio stream.
Definition ReaderBase.h:61
openshot::Fraction fps
Frames per second, as a fraction (i.e. 24/1 = 24 fps)
Definition ReaderBase.h:48
openshot::Fraction display_ratio
The ratio of width to height of the video stream (i.e. 640x480 has a ratio of 4/3)
Definition ReaderBase.h:51
int height
The height of the video (in pixels)
Definition ReaderBase.h:45
int pixel_format
The pixel format (i.e. YUV420P, RGB24, etc...)
Definition ReaderBase.h:47
int64_t video_length
The number of frames in the video stream.
Definition ReaderBase.h:53
std::string acodec
The name of the audio codec used to encode / decode the video stream.
Definition ReaderBase.h:58
std::map< std::string, std::string > metadata
An optional map/dictionary of metadata for this reader.
Definition ReaderBase.h:65
std::string vcodec
The name of the video codec used to encode / decode the video stream.
Definition ReaderBase.h:52
openshot::Fraction pixel_ratio
The pixel ratio of the video stream as a fraction (i.e. some pixels are not square)
Definition ReaderBase.h:50
openshot::ChannelLayout channel_layout
The channel layout (mono, stereo, 5 point surround, etc...)
Definition ReaderBase.h:62
bool has_video
Determines if this file has a video stream.
Definition ReaderBase.h:40
bool has_audio
Determines if this file has an audio stream.
Definition ReaderBase.h:41
openshot::Fraction video_timebase
The video timebase determines how long each frame stays on the screen.
Definition ReaderBase.h:55
int video_stream_index
The index of the video stream.
Definition ReaderBase.h:54
int sample_rate
The number of audio samples per second (44100 is a common sample rate)
Definition ReaderBase.h:60
int audio_stream_index
The index of the audio stream.
Definition ReaderBase.h:63
int64_t file_size
Size of file (in bytes)
Definition ReaderBase.h:44