32 #define ENABLE_VAAPI 0
35 #define MAX_SUPPORTED_WIDTH 1950
36 #define MAX_SUPPORTED_HEIGHT 1100
39 #include "libavutil/hwcontext_vaapi.h"
41 typedef struct VAAPIDecodeContext {
43 VAEntrypoint va_entrypoint;
45 VAContextID va_context;
47 #if FF_API_STRUCT_VAAPI_CONTEXT
50 struct vaapi_context *old_context;
51 AVBufferRef *device_ref;
55 AVHWDeviceContext *device;
56 AVVAAPIDeviceContext *hwctx;
58 AVHWFramesContext *frames;
59 AVVAAPIFramesContext *hwfc;
61 enum AVPixelFormat surface_format;
78 static AVPixelFormat NormalizeDeprecatedPixFmt(AVPixelFormat pix_fmt,
bool& is_full_range) {
80 case AV_PIX_FMT_YUVJ420P:
82 return AV_PIX_FMT_YUV420P;
83 case AV_PIX_FMT_YUVJ422P:
85 return AV_PIX_FMT_YUV422P;
86 case AV_PIX_FMT_YUVJ444P:
88 return AV_PIX_FMT_YUV444P;
89 case AV_PIX_FMT_YUVJ440P:
91 return AV_PIX_FMT_YUV440P;
92 #ifdef AV_PIX_FMT_YUVJ411P
93 case AV_PIX_FMT_YUVJ411P:
95 return AV_PIX_FMT_YUV411P;
106 :
path(
path), pFormatCtx(NULL), videoStream(-1), audioStream(-1), pCodecCtx(NULL), aCodecCtx(NULL),
107 pStream(NULL), aStream(NULL), packet(NULL), pFrame(NULL), is_open(false), is_duration_known(false),
108 check_interlace(false), check_fps(false), duration_strategy(duration_strategy), previous_packet_location{-1, 0},
109 is_seeking(false), seeking_pts(0), seeking_frame(0), is_video_seek(true), seek_count(0),
110 seek_audio_frame_found(0), seek_video_frame_found(0), last_seek_max_frame(-1), seek_stagnant_count(0),
111 last_frame(0), largest_frame_processed(0), current_video_frame(0), audio_pts(0), video_pts(0),
112 hold_packet(false), pts_offset_seconds(0.0), audio_pts_seconds(0.0), video_pts_seconds(0.0),
113 NO_PTS_OFFSET(-99999), enable_seek(true) {
120 pts_offset_seconds = NO_PTS_OFFSET;
121 video_pts_seconds = NO_PTS_OFFSET;
122 audio_pts_seconds = NO_PTS_OFFSET;
131 if (inspect_reader) {
153 if (abs(diff) <= amount)
164 static enum AVPixelFormat get_hw_dec_format(AVCodecContext *ctx,
const enum AVPixelFormat *pix_fmts)
166 const enum AVPixelFormat *p;
171 for (p = pix_fmts; *p != AV_PIX_FMT_NONE; p++) {
173 #if defined(__linux__)
175 case AV_PIX_FMT_VAAPI:
182 case AV_PIX_FMT_VDPAU:
192 case AV_PIX_FMT_DXVA2_VLD:
199 case AV_PIX_FMT_D3D11:
207 #if defined(__APPLE__)
209 case AV_PIX_FMT_VIDEOTOOLBOX:
218 case AV_PIX_FMT_CUDA:
238 return AV_PIX_FMT_NONE;
241 int FFmpegReader::IsHardwareDecodeSupported(
int codecid)
245 case AV_CODEC_ID_H264:
246 case AV_CODEC_ID_MPEG2VIDEO:
247 case AV_CODEC_ID_VC1:
248 case AV_CODEC_ID_WMV1:
249 case AV_CODEC_ID_WMV2:
250 case AV_CODEC_ID_WMV3:
265 const std::lock_guard<std::recursive_mutex> lock(
getFrameMutex);
271 hw_decode_failed =
false;
272 hw_decode_error_count = 0;
273 hw_decode_succeeded =
false;
278 if (avformat_open_input(&pFormatCtx,
path.c_str(), NULL, NULL) != 0)
282 if (avformat_find_stream_info(pFormatCtx, NULL) < 0)
289 packet_status.
reset(
true);
292 for (
unsigned int i = 0; i < pFormatCtx->nb_streams; i++) {
294 if (
AV_GET_CODEC_TYPE(pFormatCtx->streams[i]) == AVMEDIA_TYPE_VIDEO && videoStream < 0) {
301 if (
AV_GET_CODEC_TYPE(pFormatCtx->streams[i]) == AVMEDIA_TYPE_AUDIO && audioStream < 0) {
308 if (videoStream == -1 && audioStream == -1)
312 if (videoStream != -1) {
317 pStream = pFormatCtx->streams[videoStream];
323 const AVCodec *pCodec = avcodec_find_decoder(codecId);
324 AVDictionary *
opts = NULL;
325 int retry_decode_open = 2;
330 if (
hw_de_on && (retry_decode_open==2)) {
332 hw_de_supported = IsHardwareDecodeSupported(pCodecCtx->codec_id);
335 retry_decode_open = 0;
340 if (pCodec == NULL) {
341 throw InvalidCodec(
"A valid video codec could not be found for this file.",
path);
345 av_dict_set(&
opts,
"strict",
"experimental", 0);
349 int i_decoder_hw = 0;
351 char *adapter_ptr = NULL;
357 pCodecCtx->get_format = get_hw_dec_format;
359 if (adapter_num < 3 && adapter_num >=0) {
360 #if defined(__linux__)
361 snprintf(adapter,
sizeof(adapter),
"/dev/dri/renderD%d", adapter_num+128);
362 adapter_ptr = adapter;
364 switch (i_decoder_hw) {
366 hw_de_av_device_type = AV_HWDEVICE_TYPE_VAAPI;
369 hw_de_av_device_type = AV_HWDEVICE_TYPE_CUDA;
372 hw_de_av_device_type = AV_HWDEVICE_TYPE_VDPAU;
375 hw_de_av_device_type = AV_HWDEVICE_TYPE_QSV;
378 hw_de_av_device_type = AV_HWDEVICE_TYPE_VAAPI;
382 #elif defined(_WIN32)
385 switch (i_decoder_hw) {
387 hw_de_av_device_type = AV_HWDEVICE_TYPE_CUDA;
390 hw_de_av_device_type = AV_HWDEVICE_TYPE_DXVA2;
393 hw_de_av_device_type = AV_HWDEVICE_TYPE_D3D11VA;
396 hw_de_av_device_type = AV_HWDEVICE_TYPE_QSV;
399 hw_de_av_device_type = AV_HWDEVICE_TYPE_DXVA2;
402 #elif defined(__APPLE__)
405 switch (i_decoder_hw) {
407 hw_de_av_device_type = AV_HWDEVICE_TYPE_VIDEOTOOLBOX;
410 hw_de_av_device_type = AV_HWDEVICE_TYPE_QSV;
413 hw_de_av_device_type = AV_HWDEVICE_TYPE_VIDEOTOOLBOX;
423 #if defined(__linux__)
424 if( adapter_ptr != NULL && access( adapter_ptr, W_OK ) == 0 ) {
425 #elif defined(_WIN32)
426 if( adapter_ptr != NULL ) {
427 #elif defined(__APPLE__)
428 if( adapter_ptr != NULL ) {
437 hw_device_ctx = NULL;
439 if (av_hwdevice_ctx_create(&hw_device_ctx, hw_de_av_device_type, adapter_ptr, NULL, 0) >= 0) {
440 const char* hw_name = av_hwdevice_get_type_name(hw_de_av_device_type);
441 std::string hw_msg =
"HW decode active: ";
442 hw_msg += (hw_name ? hw_name :
"unknown");
444 if (!(pCodecCtx->hw_device_ctx = av_buffer_ref(hw_device_ctx))) {
487 pCodecCtx->thread_type &= ~FF_THREAD_FRAME;
491 int avcodec_return = avcodec_open2(pCodecCtx, pCodec, &
opts);
492 if (avcodec_return < 0) {
493 std::stringstream avcodec_error_msg;
494 avcodec_error_msg <<
"A video codec was found, but could not be opened. Error: " << av_err2string(avcodec_return);
500 AVHWFramesConstraints *constraints = NULL;
501 void *hwconfig = NULL;
502 hwconfig = av_hwdevice_hwconfig_alloc(hw_device_ctx);
506 ((AVVAAPIHWConfig *)hwconfig)->config_id = ((VAAPIDecodeContext *)(pCodecCtx->priv_data))->va_config;
507 constraints = av_hwdevice_get_hwframe_constraints(hw_device_ctx,hwconfig);
510 if (pCodecCtx->coded_width < constraints->min_width ||
511 pCodecCtx->coded_height < constraints->min_height ||
512 pCodecCtx->coded_width > constraints->max_width ||
513 pCodecCtx->coded_height > constraints->max_height) {
516 retry_decode_open = 1;
519 av_buffer_unref(&hw_device_ctx);
520 hw_device_ctx = NULL;
525 ZmqLogger::Instance()->
AppendDebugMethod(
"\nDecode hardware acceleration is used\n",
"Min width :", constraints->min_width,
"Min Height :", constraints->min_height,
"MaxWidth :", constraints->max_width,
"MaxHeight :", constraints->max_height,
"Frame width :", pCodecCtx->coded_width,
"Frame height :", pCodecCtx->coded_height);
526 retry_decode_open = 0;
528 av_hwframe_constraints_free(&constraints);
541 if (pCodecCtx->coded_width < 0 ||
542 pCodecCtx->coded_height < 0 ||
543 pCodecCtx->coded_width > max_w ||
544 pCodecCtx->coded_height > max_h ) {
545 ZmqLogger::Instance()->
AppendDebugMethod(
"DIMENSIONS ARE TOO LARGE for hardware acceleration\n",
"Max Width :", max_w,
"Max Height :", max_h,
"Frame width :", pCodecCtx->coded_width,
"Frame height :", pCodecCtx->coded_height);
547 retry_decode_open = 1;
550 av_buffer_unref(&hw_device_ctx);
551 hw_device_ctx = NULL;
555 ZmqLogger::Instance()->
AppendDebugMethod(
"\nDecode hardware acceleration is used\n",
"Max Width :", max_w,
"Max Height :", max_h,
"Frame width :", pCodecCtx->coded_width,
"Frame height :", pCodecCtx->coded_height);
556 retry_decode_open = 0;
564 retry_decode_open = 0;
566 }
while (retry_decode_open);
575 if (audioStream != -1) {
580 aStream = pFormatCtx->streams[audioStream];
586 const AVCodec *aCodec = avcodec_find_decoder(codecId);
592 bool audio_opened =
false;
593 if (aCodec != NULL) {
595 AVDictionary *
opts = NULL;
596 av_dict_set(&
opts,
"strict",
"experimental", 0);
599 audio_opened = (avcodec_open2(aCodecCtx, aCodec, &
opts) >= 0);
610 const bool invalid_audio_info =
615 (aCodecCtx->sample_fmt == AV_SAMPLE_FMT_NONE);
616 if (invalid_audio_info) {
618 "FFmpegReader::Open (Disable invalid audio stream)",
623 "sample_fmt",
static_cast<int>(aCodecCtx ? aCodecCtx->sample_fmt : AV_SAMPLE_FMT_NONE));
629 if (avcodec_is_open(aCodecCtx)) {
630 avcodec_flush_buffers(aCodecCtx);
640 "FFmpegReader::Open (Audio codec unavailable; disabling audio)",
641 "audioStream", audioStream);
657 "FFmpegReader::Open (Invalid FPS detected; applying fallback)",
665 "FFmpegReader::Open (Invalid video_timebase detected; applying fallback)",
672 AVDictionaryEntry *tag = NULL;
673 while ((tag = av_dict_get(pFormatCtx->metadata,
"", tag, AV_DICT_IGNORE_SUFFIX))) {
674 QString str_key = tag->key;
675 QString str_value = tag->value;
676 info.
metadata[str_key.toStdString()] = str_value.trimmed().toStdString();
680 for (
unsigned int i = 0; i < pFormatCtx->nb_streams; i++) {
681 AVStream* st = pFormatCtx->streams[i];
682 if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
683 size_t side_data_size = 0;
684 const uint8_t *displaymatrix = ffmpeg_stream_get_side_data(
685 st, AV_PKT_DATA_DISPLAYMATRIX, &side_data_size);
687 side_data_size >= 9 *
sizeof(int32_t) &&
689 double rotation = -av_display_rotation_get(
690 reinterpret_cast<const int32_t *
>(displaymatrix));
691 if (std::isnan(rotation))
696 const uint8_t *spherical = ffmpeg_stream_get_side_data(
697 st, AV_PKT_DATA_SPHERICAL, &side_data_size);
698 if (spherical && side_data_size >=
sizeof(AVSphericalMapping)) {
701 const AVSphericalMapping *map =
702 reinterpret_cast<const AVSphericalMapping *
>(spherical);
703 const char *proj_name = av_spherical_projection_name(map->projection);
704 info.
metadata[
"spherical_projection"] = proj_name ? proj_name :
"unknown";
706 auto to_deg = [](int32_t v) {
707 return static_cast<double>(v) / 65536.0;
709 info.
metadata[
"spherical_yaw"] = std::to_string(to_deg(map->yaw));
710 info.
metadata[
"spherical_pitch"] = std::to_string(to_deg(map->pitch));
711 info.
metadata[
"spherical_roll"] = std::to_string(to_deg(map->roll));
718 previous_packet_location.
frame = -1;
752 const std::lock_guard<std::recursive_mutex> lock(
getFrameMutex);
758 AVPacket *recent_packet = packet;
763 int max_attempts = 128;
768 "attempts", attempts);
780 RemoveAVPacket(recent_packet);
785 if(avcodec_is_open(pCodecCtx)) {
786 avcodec_flush_buffers(pCodecCtx);
792 av_buffer_unref(&hw_device_ctx);
793 hw_device_ctx = NULL;
797 if (img_convert_ctx) {
798 sws_freeContext(img_convert_ctx);
799 img_convert_ctx =
nullptr;
801 if (pFrameRGB_cached) {
808 if(avcodec_is_open(aCodecCtx)) {
809 avcodec_flush_buffers(aCodecCtx);
821 working_cache.
Clear();
824 avformat_close_input(&pFormatCtx);
825 av_freep(&pFormatCtx);
832 largest_frame_processed = 0;
833 seek_audio_frame_found = 0;
834 seek_video_frame_found = 0;
835 current_video_frame = 0;
836 last_video_frame.reset();
837 last_final_video_frame.reset();
841 bool FFmpegReader::HasAlbumArt() {
845 return pFormatCtx && videoStream >= 0 && pFormatCtx->streams[videoStream]
846 && (pFormatCtx->streams[videoStream]->disposition & AV_DISPOSITION_ATTACHED_PIC);
849 double FFmpegReader::PickDurationSeconds()
const {
850 auto has_value = [](
double value) {
return value > 0.0; };
852 switch (duration_strategy) {
854 if (has_value(video_stream_duration_seconds))
855 return video_stream_duration_seconds;
856 if (has_value(audio_stream_duration_seconds))
857 return audio_stream_duration_seconds;
858 if (has_value(format_duration_seconds))
859 return format_duration_seconds;
862 if (has_value(audio_stream_duration_seconds))
863 return audio_stream_duration_seconds;
864 if (has_value(video_stream_duration_seconds))
865 return video_stream_duration_seconds;
866 if (has_value(format_duration_seconds))
867 return format_duration_seconds;
872 double longest = 0.0;
873 if (has_value(video_stream_duration_seconds))
874 longest = std::max(longest, video_stream_duration_seconds);
875 if (has_value(audio_stream_duration_seconds))
876 longest = std::max(longest, audio_stream_duration_seconds);
877 if (has_value(format_duration_seconds))
878 longest = std::max(longest, format_duration_seconds);
879 if (has_value(longest))
885 if (has_value(format_duration_seconds))
886 return format_duration_seconds;
887 if (has_value(inferred_duration_seconds))
888 return inferred_duration_seconds;
893 void FFmpegReader::ApplyDurationStrategy() {
895 const double chosen_seconds = PickDurationSeconds();
897 if (chosen_seconds <= 0.0 || fps_value <= 0.0) {
900 is_duration_known =
false;
904 const int64_t frames =
static_cast<int64_t
>(std::llround(chosen_seconds * fps_value));
908 is_duration_known =
false;
913 info.
duration =
static_cast<float>(
static_cast<double>(frames) / fps_value);
914 is_duration_known =
true;
917 void FFmpegReader::UpdateAudioInfo() {
927 AVChannelLayout audio_ch_layout = ffmpeg_get_valid_channel_layout(
929 if (audio_ch_layout.nb_channels > 0) {
932 codec_channels = audio_ch_layout.nb_channels;
944 auto record_duration = [](
double &target,
double seconds) {
946 target = std::max(target, seconds);
951 info.
file_size = pFormatCtx->pb ? avio_size(pFormatCtx->pb) : -1;
982 if (aStream->duration > 0) {
985 if (pFormatCtx->duration > 0) {
987 record_duration(format_duration_seconds,
static_cast<double>(pFormatCtx->duration) / AV_TIME_BASE);
1017 ApplyDurationStrategy();
1020 AVDictionaryEntry *tag = NULL;
1021 while ((tag = av_dict_get(aStream->metadata,
"", tag, AV_DICT_IGNORE_SUFFIX))) {
1022 QString str_key = tag->key;
1023 QString str_value = tag->value;
1024 info.
metadata[str_key.toStdString()] = str_value.trimmed().toStdString();
1027 av_channel_layout_uninit(&audio_ch_layout);
1031 void FFmpegReader::UpdateVideoInfo() {
1037 auto record_duration = [](
double &target,
double seconds) {
1039 target = std::max(target, seconds);
1044 info.
file_size = pFormatCtx->pb ? avio_size(pFormatCtx->pb) : -1;
1051 AVRational framerate = av_guess_frame_rate(pFormatCtx, pStream, NULL);
1063 if (pStream->sample_aspect_ratio.num != 0) {
1086 if (!check_interlace) {
1087 check_interlace =
true;
1089 switch(field_order) {
1090 case AV_FIELD_PROGRESSIVE:
1103 case AV_FIELD_UNKNOWN:
1105 check_interlace =
false;
1120 if (pFormatCtx->duration >= 0) {
1122 record_duration(format_duration_seconds,
static_cast<double>(pFormatCtx->duration) / AV_TIME_BASE);
1132 if (video_stream_duration_seconds <= 0.0 && format_duration_seconds <= 0.0 &&
1133 pStream->duration == AV_NOPTS_VALUE && pFormatCtx->duration == AV_NOPTS_VALUE) {
1135 record_duration(video_stream_duration_seconds, 60 * 60 * 1);
1139 if (video_stream_duration_seconds <= 0.0 && format_duration_seconds <= 0.0 &&
1140 pFormatCtx && pFormatCtx->iformat && strcmp(pFormatCtx->iformat->name,
"gif") == 0) {
1141 record_duration(video_stream_duration_seconds, 60 * 60 * 1);
1145 ApplyDurationStrategy();
1151 const bool likely_still_codec =
1152 codec_id == AV_CODEC_ID_MJPEG ||
1153 codec_id == AV_CODEC_ID_PNG ||
1154 codec_id == AV_CODEC_ID_BMP ||
1155 codec_id == AV_CODEC_ID_TIFF ||
1156 codec_id == AV_CODEC_ID_WEBP ||
1157 codec_id == AV_CODEC_ID_JPEG2000;
1158 const bool likely_image_demuxer =
1159 pFormatCtx && pFormatCtx->iformat && pFormatCtx->iformat->name &&
1160 strstr(pFormatCtx->iformat->name,
"image2");
1161 const bool has_attached_pic = HasAlbumArt();
1162 const bool single_frame_stream =
1163 (pStream && pStream->nb_frames > 0 && pStream->nb_frames <= 1);
1166 const bool is_still_image_video =
1168 ((single_frame_stream || single_frame_clip) &&
1169 (likely_still_codec || likely_image_demuxer));
1171 if (is_still_image_video) {
1176 if (audioStream < 0) {
1177 record_duration(video_stream_duration_seconds, 60 * 60 * 1);
1180 ApplyDurationStrategy();
1185 AVDictionaryEntry *tag = NULL;
1186 while ((tag = av_dict_get(pStream->metadata,
"", tag, AV_DICT_IGNORE_SUFFIX))) {
1187 QString str_key = tag->key;
1188 QString str_value = tag->value;
1189 info.
metadata[str_key.toStdString()] = str_value.trimmed().toStdString();
1194 return this->is_duration_known;
1198 last_seek_max_frame = -1;
1199 seek_stagnant_count = 0;
1202 throw ReaderClosed(
"The FFmpegReader is closed. Call Open() before calling this method.",
path);
1205 if (requested_frame < 1)
1206 requested_frame = 1;
1211 throw InvalidFile(
"Could not detect the duration of the video or audio stream.",
path);
1226 const std::lock_guard<std::recursive_mutex> lock(
getFrameMutex);
1239 int64_t diff = requested_frame - last_frame;
1240 if (diff >= 1 && diff <= 20) {
1242 frame = ReadStream(requested_frame);
1247 Seek(requested_frame);
1256 frame = ReadStream(requested_frame);
1264 std::shared_ptr<Frame> FFmpegReader::ReadStream(int64_t requested_frame) {
1266 bool check_seek =
false;
1267 int packet_error = -1;
1268 int64_t no_progress_count = 0;
1269 int64_t prev_packets_read = packet_status.
packets_read();
1272 double prev_video_pts_seconds = video_pts_seconds;
1282 CheckWorkingFrames(requested_frame);
1287 if (is_cache_found) {
1291 if (!hold_packet || !packet) {
1293 packet_error = GetNextPacket();
1294 if (packet_error < 0 && !packet) {
1305 check_seek = CheckSeek();
1317 if ((
info.
has_video && packet && packet->stream_index == videoStream) ||
1321 ProcessVideoPacket(requested_frame);
1322 if (ReopenWithoutHardwareDecode(requested_frame)) {
1327 if ((
info.
has_audio && packet && packet->stream_index == audioStream) ||
1331 ProcessAudioPacket(requested_frame);
1336 if ((!
info.
has_video && packet && packet->stream_index == videoStream) ||
1337 (!
info.
has_audio && packet && packet->stream_index == audioStream)) {
1339 if (packet->stream_index == videoStream) {
1341 }
else if (packet->stream_index == audioStream) {
1347 RemoveAVPacket(packet);
1357 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::ReadStream (force EOF)",
"packets_read", packet_status.
packets_read(),
"packets_decoded", packet_status.
packets_decoded(),
"packets_eof", packet_status.
packets_eof,
"video_eof", packet_status.
video_eof,
"audio_eof", packet_status.
audio_eof,
"end_of_file", packet_status.
end_of_file);
1370 const bool has_progress =
1374 (video_pts_seconds != prev_video_pts_seconds);
1377 no_progress_count = 0;
1379 no_progress_count++;
1380 if (no_progress_count >= 2000
1385 "requested_frame", requested_frame,
1386 "no_progress_count", no_progress_count,
1400 prev_video_pts_seconds = video_pts_seconds;
1408 "largest_frame_processed", largest_frame_processed,
1409 "Working Cache Count", working_cache.
Count());
1418 CheckWorkingFrames(requested_frame);
1434 std::shared_ptr<Frame> f = CreateFrame(largest_frame_processed);
1437 if (!frame->has_image_data) {
1442 frame->AddAudioSilence(samples_in_frame);
1448 std::shared_ptr<Frame> f = CreateFrame(largest_frame_processed);
1449 if (last_final_video_frame && last_final_video_frame->has_image_data
1450 && last_final_video_frame->number <= requested_frame) {
1451 f->AddImage(std::make_shared<QImage>(last_final_video_frame->GetImage()->copy()));
1452 }
else if (last_video_frame && last_video_frame->has_image_data
1453 && last_video_frame->number <= requested_frame) {
1454 f->AddImage(std::make_shared<QImage>(last_video_frame->GetImage()->copy()));
1458 f->AddAudioSilence(samples_in_frame);
1466 int FFmpegReader::GetNextPacket() {
1467 int found_packet = 0;
1468 AVPacket *next_packet;
1469 next_packet =
new AVPacket();
1470 found_packet = av_read_frame(pFormatCtx, next_packet);
1474 RemoveAVPacket(packet);
1477 if (found_packet >= 0) {
1479 packet = next_packet;
1482 if (packet->stream_index == videoStream) {
1484 }
else if (packet->stream_index == audioStream) {
1493 return found_packet;
1497 bool FFmpegReader::GetAVFrame() {
1498 int frameFinished = 0;
1499 auto note_hw_decode_failure = [&](
int err,
const char* stage) {
1501 if (!
hw_de_on || !hw_de_supported || force_sw_decode) {
1504 if (err == AVERROR_INVALIDDATA && packet_status.
video_decoded == 0) {
1505 hw_decode_error_count++;
1507 std::string(
"FFmpegReader::GetAVFrame (hardware decode failure candidate during ") + stage +
")",
1508 "error_count", hw_decode_error_count,
1510 if (hw_decode_error_count >= 3) {
1511 hw_decode_failed =
true;
1524 int send_packet_err = 0;
1525 int64_t send_packet_pts = 0;
1526 if ((packet && packet->stream_index == videoStream) || !packet) {
1527 send_packet_err = avcodec_send_packet(pCodecCtx, packet);
1529 if (packet && send_packet_err >= 0) {
1530 send_packet_pts = GetPacketPTS();
1531 hold_packet =
false;
1541 if (send_packet_err < 0 && send_packet_err != AVERROR_EOF) {
1542 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (send packet: Not sent [" + av_err2string(send_packet_err) +
"])",
"send_packet_err", send_packet_err,
"send_packet_pts", send_packet_pts);
1543 note_hw_decode_failure(send_packet_err,
"send_packet");
1544 if (send_packet_err == AVERROR(EAGAIN)) {
1546 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (send packet: AVERROR(EAGAIN): user must read output with avcodec_receive_frame()",
"send_packet_pts", send_packet_pts);
1548 if (send_packet_err == AVERROR(EINVAL)) {
1549 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (send packet: AVERROR(EINVAL): codec not opened, it is an encoder, or requires flush",
"send_packet_pts", send_packet_pts);
1551 if (send_packet_err == AVERROR(ENOMEM)) {
1552 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (send packet: AVERROR(ENOMEM): failed to add packet to internal queue, or legitimate decoding errors",
"send_packet_pts", send_packet_pts);
1559 int receive_frame_err = 0;
1560 AVFrame *decoded_frame = next_frame;
1561 AVFrame *next_frame2;
1569 next_frame2 = next_frame;
1572 while (receive_frame_err >= 0) {
1573 receive_frame_err = avcodec_receive_frame(pCodecCtx, next_frame2);
1575 if (receive_frame_err != 0) {
1576 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (receive frame: frame not ready yet from decoder [\" + av_err2string(receive_frame_err) + \"])",
"receive_frame_err", receive_frame_err,
"send_packet_pts", send_packet_pts);
1577 note_hw_decode_failure(receive_frame_err,
"receive_frame");
1579 if (receive_frame_err == AVERROR_EOF) {
1581 "FFmpegReader::GetAVFrame (receive frame: AVERROR_EOF: EOF detected from decoder, flushing buffers)",
"send_packet_pts", send_packet_pts);
1582 avcodec_flush_buffers(pCodecCtx);
1585 if (receive_frame_err == AVERROR(EINVAL)) {
1587 "FFmpegReader::GetAVFrame (receive frame: AVERROR(EINVAL): invalid frame received, flushing buffers)",
"send_packet_pts", send_packet_pts);
1588 avcodec_flush_buffers(pCodecCtx);
1590 if (receive_frame_err == AVERROR(EAGAIN)) {
1592 "FFmpegReader::GetAVFrame (receive frame: AVERROR(EAGAIN): output is not available in this state - user must try to send new input)",
"send_packet_pts", send_packet_pts);
1594 if (receive_frame_err == AVERROR_INPUT_CHANGED) {
1596 "FFmpegReader::GetAVFrame (receive frame: AVERROR_INPUT_CHANGED: current decoded frame has changed parameters with respect to first decoded frame)",
"send_packet_pts", send_packet_pts);
1607 if (next_frame2->format == hw_de_av_pix_fmt) {
1608 if ((err = av_hwframe_transfer_data(next_frame, next_frame2, 0)) < 0) {
1610 "FFmpegReader::GetAVFrame (Failed to transfer data to output frame)",
1613 note_hw_decode_failure(AVERROR_INVALIDDATA,
"hwframe_transfer");
1616 if ((err = av_frame_copy_props(next_frame, next_frame2)) < 0) {
1618 "FFmpegReader::GetAVFrame (Failed to copy props to output frame)",
1621 note_hw_decode_failure(AVERROR_INVALIDDATA,
"hwframe_copy_props");
1624 if (next_frame->format == AV_PIX_FMT_NONE) {
1625 next_frame->format = pCodecCtx->sw_pix_fmt;
1627 if (next_frame->width <= 0) {
1628 next_frame->width = next_frame2->width;
1630 if (next_frame->height <= 0) {
1631 next_frame->height = next_frame2->height;
1633 decoded_frame = next_frame;
1636 decoded_frame = next_frame2;
1642 decoded_frame = next_frame2;
1645 if (!decoded_frame->data[0]) {
1647 "FFmpegReader::GetAVFrame (Decoded frame missing image data)",
1648 "format", decoded_frame->format,
1649 "width", decoded_frame->width,
1650 "height", decoded_frame->height);
1651 note_hw_decode_failure(AVERROR_INVALIDDATA,
"decoded_frame_empty");
1658 hw_decode_error_count = 0;
1660 if (
hw_de_on && hw_de_supported && !force_sw_decode) {
1661 hw_decode_succeeded =
true;
1667 AVPixelFormat decoded_pix_fmt = (AVPixelFormat)(decoded_frame->format);
1668 if (decoded_pix_fmt == AV_PIX_FMT_NONE)
1669 decoded_pix_fmt = (AVPixelFormat)(pStream->codecpar->format);
1673 av_image_copy(pFrame->data, pFrame->linesize, (
const uint8_t**)decoded_frame->data, decoded_frame->linesize,
1675 pFrame->format = decoded_pix_fmt;
1678 pFrame->color_range = decoded_frame->color_range;
1679 pFrame->colorspace = decoded_frame->colorspace;
1680 pFrame->color_primaries = decoded_frame->color_primaries;
1681 pFrame->color_trc = decoded_frame->color_trc;
1682 pFrame->chroma_location = decoded_frame->chroma_location;
1688 if (decoded_frame->pts != AV_NOPTS_VALUE) {
1691 video_pts = decoded_frame->pts;
1692 }
else if (decoded_frame->pkt_dts != AV_NOPTS_VALUE) {
1694 video_pts = decoded_frame->pkt_dts;
1698 "FFmpegReader::GetAVFrame (Successful frame received)",
"video_pts", video_pts,
"send_packet_pts", send_packet_pts);
1704 if (
hw_de_on && hw_de_supported && next_frame2 != next_frame) {
1709 avcodec_decode_video2(pCodecCtx, next_frame, &frameFinished, packet);
1715 if (frameFinished) {
1719 av_picture_copy((AVPicture *) pFrame, (AVPicture *) next_frame, pCodecCtx->pix_fmt,
info.
width,
1728 return frameFinished;
1731 bool FFmpegReader::ReopenWithoutHardwareDecode(int64_t requested_frame) {
1733 if (!hw_decode_failed || force_sw_decode) {
1738 "FFmpegReader::ReopenWithoutHardwareDecode (falling back to software decode)",
1739 "requested_frame", requested_frame,
1740 "video_packets_read", packet_status.
video_read,
1742 "hw_decode_error_count", hw_decode_error_count);
1744 force_sw_decode =
true;
1745 hw_decode_failed =
false;
1746 hw_decode_error_count = 0;
1750 Seek(requested_frame);
1753 (void) requested_frame;
1760 return hw_decode_succeeded;
1767 bool FFmpegReader::CheckSeek() {
1770 const int64_t kSeekRetryMax = 5;
1771 const int kSeekStagnantMax = 2;
1775 if ((is_video_seek && !seek_video_frame_found) || (!is_video_seek && !seek_audio_frame_found))
1783 int64_t max_seeked_frame = std::max(seek_audio_frame_found, seek_video_frame_found);
1785 if (max_seeked_frame == last_seek_max_frame) {
1786 seek_stagnant_count++;
1788 last_seek_max_frame = max_seeked_frame;
1789 seek_stagnant_count = 0;
1793 if (max_seeked_frame >= seeking_frame) {
1796 "is_video_seek", is_video_seek,
1797 "max_seeked_frame", max_seeked_frame,
1798 "seeking_frame", seeking_frame,
1799 "seeking_pts", seeking_pts,
1800 "seek_video_frame_found", seek_video_frame_found,
1801 "seek_audio_frame_found", seek_audio_frame_found);
1804 if (seek_count < kSeekRetryMax) {
1805 Seek(seeking_frame - (10 * seek_count * seek_count));
1806 }
else if (seek_stagnant_count >= kSeekStagnantMax) {
1808 Seek(seeking_frame - (10 * kSeekRetryMax * kSeekRetryMax));
1811 Seek(seeking_frame - (10 * seek_count * seek_count));
1816 "is_video_seek", is_video_seek,
1817 "packet->pts", GetPacketPTS(),
1818 "seeking_pts", seeking_pts,
1819 "seeking_frame", seeking_frame,
1820 "seek_video_frame_found", seek_video_frame_found,
1821 "seek_audio_frame_found", seek_audio_frame_found);
1835 void FFmpegReader::ProcessVideoPacket(int64_t requested_frame) {
1838 int frame_finished = GetAVFrame();
1841 if (!frame_finished) {
1844 RemoveAVFrame(pFrame);
1850 int64_t current_frame = ConvertVideoPTStoFrame(video_pts);
1853 if (!seek_video_frame_found && is_seeking)
1854 seek_video_frame_found = current_frame;
1860 working_cache.
Add(CreateFrame(requested_frame));
1866 AVPixelFormat decoded_pix_fmt = (pFrame && pFrame->format != AV_PIX_FMT_NONE)
1867 ?
static_cast<AVPixelFormat
>(pFrame->format)
1869 bool src_full_range = (pFrame && pFrame->color_range == AVCOL_RANGE_JPEG);
1870 AVPixelFormat src_pix_fmt = NormalizeDeprecatedPixFmt(decoded_pix_fmt, src_full_range);
1871 int src_width = (pFrame && pFrame->width > 0) ? pFrame->width :
info.
width;
1872 int src_height = (pFrame && pFrame->height > 0) ? pFrame->height :
info.
height;
1873 int height = src_height;
1874 int width = src_width;
1876 AVFrame *pFrameRGB = pFrameRGB_cached;
1879 if (pFrameRGB ==
nullptr)
1881 pFrameRGB_cached = pFrameRGB;
1884 uint8_t *buffer =
nullptr;
1905 max_width = std::max(
float(max_width), max_width * max_scale_x);
1906 max_height = std::max(
float(max_height), max_height * max_scale_y);
1912 QSize width_size(max_width * max_scale_x,
1915 max_height * max_scale_y);
1917 if (width_size.width() >= max_width && width_size.height() >= max_height) {
1918 max_width = std::max(max_width, width_size.width());
1919 max_height = std::max(max_height, width_size.height());
1921 max_width = std::max(max_width, height_size.width());
1922 max_height = std::max(max_height, height_size.height());
1929 float preview_ratio = 1.0;
1936 max_width =
info.
width * max_scale_x * preview_ratio;
1937 max_height =
info.
height * max_scale_y * preview_ratio;
1945 int original_height = src_height;
1946 if (max_width != 0 && max_height != 0 && max_width < width && max_height < height) {
1948 float ratio = float(width) / float(height);
1949 int possible_width = round(max_height * ratio);
1950 int possible_height = round(max_width / ratio);
1952 if (possible_width <= max_width) {
1954 width = possible_width;
1955 height = max_height;
1959 height = possible_height;
1964 const int bytes_per_pixel = 4;
1965 int raw_buffer_size = (width * height * bytes_per_pixel) + 128;
1968 constexpr
size_t ALIGNMENT = 32;
1969 int buffer_size = ((raw_buffer_size + ALIGNMENT - 1) / ALIGNMENT) * ALIGNMENT;
1970 buffer = (
unsigned char*) aligned_malloc(buffer_size, ALIGNMENT);
1975 int scale_mode = SWS_FAST_BILINEAR;
1977 scale_mode = SWS_BICUBIC;
1979 img_convert_ctx = sws_getCachedContext(img_convert_ctx, src_width, src_height, src_pix_fmt, width, height,
PIX_FMT_RGBA, scale_mode, NULL, NULL, NULL);
1980 if (!img_convert_ctx)
1982 const int *src_coeff = sws_getCoefficients(SWS_CS_DEFAULT);
1983 const int *dst_coeff = sws_getCoefficients(SWS_CS_DEFAULT);
1984 const int dst_full_range = 1;
1985 sws_setColorspaceDetails(img_convert_ctx, src_coeff, src_full_range ? 1 : 0,
1986 dst_coeff, dst_full_range, 0, 1 << 16, 1 << 16);
1988 if (!pFrame || !pFrame->data[0] || pFrame->linesize[0] <= 0) {
1990 if (
hw_de_on && hw_de_supported && !force_sw_decode) {
1991 hw_decode_failed =
true;
1993 "FFmpegReader::ProcessVideoPacket (Invalid source frame; forcing software fallback)",
1994 "requested_frame", requested_frame,
1995 "current_frame", current_frame,
1996 "src_pix_fmt", src_pix_fmt,
1997 "src_width", src_width,
1998 "src_height", src_height);
2002 RemoveAVFrame(pFrame);
2009 const int scaled_lines = sws_scale(img_convert_ctx, pFrame->data, pFrame->linesize, 0,
2010 original_height, pFrameRGB->data, pFrameRGB->linesize);
2011 if (scaled_lines <= 0) {
2013 if (
hw_de_on && hw_de_supported && !force_sw_decode) {
2014 hw_decode_failed =
true;
2016 "FFmpegReader::ProcessVideoPacket (sws_scale failed; forcing software fallback)",
2017 "requested_frame", requested_frame,
2018 "current_frame", current_frame,
2019 "scaled_lines", scaled_lines,
2020 "src_pix_fmt", src_pix_fmt,
2021 "src_width", src_width,
2022 "src_height", src_height);
2027 RemoveAVFrame(pFrame);
2033 std::shared_ptr<Frame> f = CreateFrame(current_frame);
2036 if (!ffmpeg_has_alpha(src_pix_fmt)) {
2038 f->AddImage(width, height, bytes_per_pixel, QImage::Format_RGBA8888_Premultiplied, buffer);
2041 f->AddImage(width, height, bytes_per_pixel, QImage::Format_RGBA8888, buffer);
2045 working_cache.
Add(f);
2048 last_video_frame = f;
2054 RemoveAVFrame(pFrame);
2060 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::ProcessVideoPacket (After)",
"requested_frame", requested_frame,
"current_frame", current_frame,
"f->number", f->number,
"video_pts_seconds", video_pts_seconds);
2064 void FFmpegReader::ProcessAudioPacket(int64_t requested_frame) {
2067 if (packet && packet->pts != AV_NOPTS_VALUE) {
2069 location = GetAudioPTSLocation(packet->pts);
2072 if (!seek_audio_frame_found && is_seeking)
2073 seek_audio_frame_found = location.
frame;
2080 working_cache.
Add(CreateFrame(requested_frame));
2084 "requested_frame", requested_frame,
2085 "target_frame", location.
frame,
2089 int frame_finished = 0;
2093 int packet_samples = 0;
2097 int send_packet_err = avcodec_send_packet(aCodecCtx, packet);
2098 if (send_packet_err < 0 && send_packet_err != AVERROR_EOF) {
2102 int receive_frame_err = avcodec_receive_frame(aCodecCtx, audio_frame);
2103 if (receive_frame_err >= 0) {
2106 if (receive_frame_err == AVERROR_EOF) {
2110 if (receive_frame_err == AVERROR(EINVAL) || receive_frame_err == AVERROR_EOF) {
2112 avcodec_flush_buffers(aCodecCtx);
2114 if (receive_frame_err != 0) {
2119 int used = avcodec_decode_audio4(aCodecCtx, audio_frame, &frame_finished, packet);
2122 if (frame_finished) {
2128 audio_pts = audio_frame->pts;
2131 location = GetAudioPTSLocation(audio_pts);
2134 int plane_size = -1;
2140 data_size = av_samples_get_buffer_size(&plane_size, nb_channels,
2144 packet_samples = audio_frame->nb_samples * nb_channels;
2153 int pts_remaining_samples = packet_samples /
info.
channels;
2156 if (pts_remaining_samples == 0) {
2158 "packet_samples", packet_samples,
2160 "pts_remaining_samples", pts_remaining_samples);
2164 while (pts_remaining_samples) {
2169 int samples = samples_per_frame - previous_packet_location.
sample_start;
2170 if (samples > pts_remaining_samples)
2171 samples = pts_remaining_samples;
2174 pts_remaining_samples -= samples;
2176 if (pts_remaining_samples > 0) {
2178 previous_packet_location.
frame++;
2187 "packet_samples", packet_samples,
2195 audio_converted->nb_samples = audio_frame->nb_samples;
2196 av_samples_alloc(audio_converted->data, audio_converted->linesize,
info.
channels, audio_frame->nb_samples, AV_SAMPLE_FMT_FLTP, 0);
2203 AVChannelLayout input_layout = ffmpeg_get_valid_channel_layout(
2205 AVChannelLayout output_layout = ffmpeg_get_valid_channel_layout(
2207 int in_layout_err = av_opt_set_chlayout(avr,
"in_chlayout", &input_layout, 0);
2208 int out_layout_err = av_opt_set_chlayout(avr,
"out_chlayout", &output_layout, 0);
2216 av_opt_set_int(avr,
"out_sample_fmt", AV_SAMPLE_FMT_FLTP, 0);
2221 av_channel_layout_uninit(&input_layout);
2222 av_channel_layout_uninit(&output_layout);
2223 if (in_layout_err < 0 || out_layout_err < 0 || swr_init_err < 0) {
2225 throw InvalidChannels(
"Could not initialize FFmpeg audio channel layout or resampler.",
path);
2228 if (swr_init_err < 0) {
2238 audio_converted->data,
2239 audio_converted->linesize[0],
2240 audio_converted->nb_samples,
2242 audio_frame->linesize[0],
2243 audio_frame->nb_samples);
2246 int64_t starting_frame_number = -1;
2247 for (
int channel_filter = 0; channel_filter <
info.
channels; channel_filter++) {
2249 starting_frame_number = location.
frame;
2250 int channel_buffer_size = nb_samples;
2251 auto *channel_buffer = (
float *) (audio_converted->data[channel_filter]);
2255 int remaining_samples = channel_buffer_size;
2256 while (remaining_samples > 0) {
2261 int samples = std::fmin(samples_per_frame - start, remaining_samples);
2264 std::shared_ptr<Frame> f = CreateFrame(starting_frame_number);
2267 f->AddAudio(
true, channel_filter, start, channel_buffer, samples, 1.0f);
2271 "frame", starting_frame_number,
2274 "channel", channel_filter,
2275 "samples_per_frame", samples_per_frame);
2278 working_cache.
Add(f);
2281 remaining_samples -= samples;
2284 if (remaining_samples > 0)
2285 channel_buffer += samples;
2288 starting_frame_number++;
2296 av_free(audio_converted->data[0]);
2305 "requested_frame", requested_frame,
2306 "starting_frame", location.
frame,
2307 "end_frame", starting_frame_number - 1,
2308 "audio_pts_seconds", audio_pts_seconds);
2314 void FFmpegReader::Seek(int64_t requested_frame) {
2316 if (requested_frame < 1)
2317 requested_frame = 1;
2320 if (requested_frame > largest_frame_processed && packet_status.
end_of_file) {
2327 "requested_frame", requested_frame,
2328 "seek_count", seek_count,
2329 "last_frame", last_frame);
2332 working_cache.
Clear();
2336 video_pts_seconds = NO_PTS_OFFSET;
2338 audio_pts_seconds = NO_PTS_OFFSET;
2339 hold_packet =
false;
2341 current_video_frame = 0;
2342 largest_frame_processed = 0;
2343 last_final_video_frame.reset();
2348 packet_status.
reset(
false);
2354 int buffer_amount = 12;
2355 if (requested_frame - buffer_amount < 20) {
2369 if (seek_count == 1) {
2372 seeking_pts = ConvertFrameToVideoPTS(1);
2374 seek_audio_frame_found = 0;
2375 seek_video_frame_found = 0;
2379 bool seek_worked =
false;
2380 int64_t seek_target = 0;
2384 seek_target = ConvertFrameToVideoPTS(requested_frame - buffer_amount);
2389 is_video_seek =
true;
2396 seek_target = ConvertFrameToAudioPTS(requested_frame - buffer_amount);
2401 is_video_seek =
false;
2410 avcodec_flush_buffers(aCodecCtx);
2414 avcodec_flush_buffers(pCodecCtx);
2417 previous_packet_location.
frame = -1;
2422 if (seek_count == 1) {
2424 seeking_pts = seek_target;
2425 seeking_frame = requested_frame;
2427 seek_audio_frame_found = 0;
2428 seek_video_frame_found = 0;
2456 int64_t FFmpegReader::GetPacketPTS() {
2458 int64_t current_pts = packet->pts;
2459 if (current_pts == AV_NOPTS_VALUE && packet->dts != AV_NOPTS_VALUE)
2460 current_pts = packet->dts;
2466 return AV_NOPTS_VALUE;
2471 void FFmpegReader::UpdatePTSOffset() {
2472 if (pts_offset_seconds != NO_PTS_OFFSET) {
2476 pts_offset_seconds = 0.0;
2477 double video_pts_offset_seconds = 0.0;
2478 double audio_pts_offset_seconds = 0.0;
2480 bool has_video_pts =
false;
2483 has_video_pts =
true;
2485 bool has_audio_pts =
false;
2488 has_audio_pts =
true;
2492 while (!has_video_pts || !has_audio_pts) {
2494 if (GetNextPacket() < 0)
2499 int64_t pts = GetPacketPTS();
2502 if (!has_video_pts && packet->stream_index == videoStream) {
2508 if (std::abs(video_pts_offset_seconds) <= 10.0) {
2509 has_video_pts =
true;
2512 else if (!has_audio_pts && packet->stream_index == audioStream) {
2518 if (std::abs(audio_pts_offset_seconds) <= 10.0) {
2519 has_audio_pts =
true;
2529 pts_offset_seconds = video_pts_offset_seconds;
2531 pts_offset_seconds = audio_pts_offset_seconds;
2532 }
else if (has_video_pts && has_audio_pts) {
2534 pts_offset_seconds = video_pts_offset_seconds;
2539 int64_t FFmpegReader::ConvertVideoPTStoFrame(int64_t pts) {
2541 int64_t previous_video_frame = current_video_frame;
2543 const double video_timebase_value =
2549 double video_seconds = (double(pts) * video_timebase_value) + pts_offset_seconds;
2552 int64_t frame = round(video_seconds * fps_value) + 1;
2555 if (current_video_frame == 0)
2556 current_video_frame = frame;
2560 if (frame == previous_video_frame) {
2565 current_video_frame++;
2574 int64_t FFmpegReader::ConvertFrameToVideoPTS(int64_t frame_number) {
2576 const double video_timebase_value =
2582 double seconds = (double(frame_number - 1) / fps_value) + pts_offset_seconds;
2585 int64_t video_pts = round(seconds / video_timebase_value);
2592 int64_t FFmpegReader::ConvertFrameToAudioPTS(int64_t frame_number) {
2594 const double audio_timebase_value =
2600 double seconds = (double(frame_number - 1) / fps_value) + pts_offset_seconds;
2603 int64_t audio_pts = round(seconds / audio_timebase_value);
2610 AudioLocation FFmpegReader::GetAudioPTSLocation(int64_t pts) {
2611 const double audio_timebase_value =
2618 double audio_seconds = (double(pts) * audio_timebase_value) + pts_offset_seconds;
2621 double frame = (audio_seconds * fps_value) + 1;
2624 int64_t whole_frame = int64_t(frame);
2627 double sample_start_percentage = frame - double(whole_frame);
2633 int sample_start = round(
double(samples_per_frame) * sample_start_percentage);
2636 if (whole_frame < 1)
2638 if (sample_start < 0)
2645 if (previous_packet_location.
frame != -1) {
2646 if (location.
is_near(previous_packet_location, samples_per_frame, samples_per_frame)) {
2647 int64_t orig_frame = location.
frame;
2652 location.
frame = previous_packet_location.
frame;
2655 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAudioPTSLocation (Audio Gap Detected)",
"Source Frame", orig_frame,
"Source Audio Sample", orig_start,
"Target Frame", location.
frame,
"Target Audio Sample", location.
sample_start,
"pts", pts);
2664 previous_packet_location = location;
2671 std::shared_ptr<Frame> FFmpegReader::CreateFrame(int64_t requested_frame) {
2673 std::shared_ptr<Frame> output = working_cache.
GetFrame(requested_frame);
2677 output = working_cache.
GetFrame(requested_frame);
2678 if(output)
return output;
2686 working_cache.
Add(output);
2689 if (requested_frame > largest_frame_processed)
2690 largest_frame_processed = requested_frame;
2697 bool FFmpegReader::IsPartialFrame(int64_t requested_frame) {
2700 bool seek_trash =
false;
2701 int64_t max_seeked_frame = seek_audio_frame_found;
2702 if (seek_video_frame_found > max_seeked_frame) {
2703 max_seeked_frame = seek_video_frame_found;
2705 if ((
info.
has_audio && seek_audio_frame_found && max_seeked_frame >= requested_frame) ||
2706 (
info.
has_video && seek_video_frame_found && max_seeked_frame >= requested_frame)) {
2714 void FFmpegReader::CheckWorkingFrames(int64_t requested_frame) {
2717 const std::lock_guard<std::recursive_mutex> lock(
getFrameMutex);
2720 std::vector<std::shared_ptr<openshot::Frame>> working_frames = working_cache.
GetFrames();
2721 std::vector<std::shared_ptr<openshot::Frame>>::iterator working_itr;
2724 for(working_itr = working_frames.begin(); working_itr != working_frames.end(); ++working_itr)
2727 std::shared_ptr<Frame> f = *working_itr;
2730 if (!f || f->number > requested_frame) {
2736 double frame_pts_seconds = (double(f->number - 1) /
info.
fps.
ToDouble()) + pts_offset_seconds;
2737 double recent_pts_seconds = std::max(video_pts_seconds, audio_pts_seconds);
2740 bool is_video_ready =
false;
2741 bool is_audio_ready =
false;
2742 double recent_pts_diff = recent_pts_seconds - frame_pts_seconds;
2743 if ((frame_pts_seconds <= video_pts_seconds)
2744 || (recent_pts_diff > 1.5)
2748 is_video_ready =
true;
2750 "frame_number", f->number,
2751 "frame_pts_seconds", frame_pts_seconds,
2752 "video_pts_seconds", video_pts_seconds,
2753 "recent_pts_diff", recent_pts_diff);
2759 if (previous_frame_instance && previous_frame_instance->has_image_data) {
2760 f->AddImage(std::make_shared<QImage>(previous_frame_instance->GetImage()->copy()));
2764 if (!f->has_image_data
2765 && last_final_video_frame
2766 && last_final_video_frame->has_image_data
2767 && last_final_video_frame->number <= f->number) {
2768 f->AddImage(std::make_shared<QImage>(last_final_video_frame->GetImage()->copy()));
2772 if (!f->has_image_data
2774 && last_video_frame->has_image_data
2775 && last_video_frame->number <= f->number) {
2776 f->AddImage(std::make_shared<QImage>(last_video_frame->GetImage()->copy()));
2780 if (!f->has_image_data) {
2782 "FFmpegReader::CheckWorkingFrames (no previous image found; using black frame)",
2783 "frame_number", f->number);
2784 f->AddColor(
"#000000");
2789 double audio_pts_diff = audio_pts_seconds - frame_pts_seconds;
2790 if ((frame_pts_seconds < audio_pts_seconds && audio_pts_diff > 1.0)
2791 || (recent_pts_diff > 1.5)
2796 is_audio_ready =
true;
2798 "frame_number", f->number,
2799 "frame_pts_seconds", frame_pts_seconds,
2800 "audio_pts_seconds", audio_pts_seconds,
2801 "audio_pts_diff", audio_pts_diff,
2802 "recent_pts_diff", recent_pts_diff);
2804 bool is_seek_trash = IsPartialFrame(f->number);
2812 "frame_number", f->number,
2813 "is_video_ready", is_video_ready,
2814 "is_audio_ready", is_audio_ready,
2821 && !packet_status.
end_of_file && !is_seek_trash) {
2826 if (previous_frame_instance && previous_frame_instance->has_image_data) {
2827 f->AddImage(std::make_shared<QImage>(previous_frame_instance->GetImage()->copy()));
2829 if (!f->has_image_data
2830 && last_final_video_frame
2831 && last_final_video_frame->has_image_data
2832 && last_final_video_frame->number <= f->number) {
2833 f->AddImage(std::make_shared<QImage>(last_final_video_frame->GetImage()->copy()));
2835 if (!f->has_image_data
2837 && last_video_frame->has_image_data
2838 && last_video_frame->number <= f->number) {
2839 f->AddImage(std::make_shared<QImage>(last_video_frame->GetImage()->copy()));
2846 if (!f->has_image_data && is_video_ready && is_audio_ready) {
2848 if (previous_frame_instance && previous_frame_instance->has_image_data) {
2849 f->AddImage(std::make_shared<QImage>(previous_frame_instance->GetImage()->copy()));
2851 if (!f->has_image_data
2852 && last_final_video_frame
2853 && last_final_video_frame->has_image_data
2854 && last_final_video_frame->number <= f->number) {
2855 f->AddImage(std::make_shared<QImage>(last_final_video_frame->GetImage()->copy()));
2857 if (!f->has_image_data
2859 && last_video_frame->has_image_data
2860 && last_video_frame->number <= f->number) {
2861 f->AddImage(std::make_shared<QImage>(last_video_frame->GetImage()->copy()));
2867 if (!f->has_image_data) {
2871 if ((!packet_status.
end_of_file && is_video_ready && is_audio_ready) || packet_status.
end_of_file || is_seek_trash) {
2874 "requested_frame", requested_frame,
2875 "f->number", f->number,
2876 "is_seek_trash", is_seek_trash,
2877 "Working Cache Count", working_cache.
Count(),
2881 if (!is_seek_trash) {
2884 if (f->has_image_data) {
2885 last_final_video_frame = f;
2889 working_cache.
Remove(f->number);
2892 last_frame = f->number;
2895 working_cache.
Remove(f->number);
2902 working_frames.clear();
2903 working_frames.shrink_to_fit();
2907 void FFmpegReader::CheckFPS() {
2915 int frames_per_second[3] = {0,0,0};
2916 int max_fps_index =
sizeof(frames_per_second) /
sizeof(frames_per_second[0]);
2919 int all_frames_detected = 0;
2920 int starting_frames_detected = 0;
2925 if (GetNextPacket() < 0)
2930 if (packet->stream_index == videoStream) {
2933 fps_index = int(video_seconds);
2936 if (fps_index >= 0 && fps_index < max_fps_index) {
2938 starting_frames_detected++;
2939 frames_per_second[fps_index]++;
2943 all_frames_detected++;
2948 float avg_fps = 30.0;
2949 if (starting_frames_detected > 0 && fps_index > 0) {
2950 avg_fps = float(starting_frames_detected) / std::min(fps_index, max_fps_index);
2954 if (avg_fps < 8.0) {
2963 if (all_frames_detected > 0) {
2977 void FFmpegReader::RemoveAVFrame(AVFrame *remove_frame) {
2981 av_freep(&remove_frame->data[0]);
2989 void FFmpegReader::RemoveAVPacket(AVPacket *remove_packet) {
2994 delete remove_packet;
3009 root[
"type"] =
"FFmpegReader";
3010 root[
"path"] =
path;
3011 switch (duration_strategy) {
3013 root[
"duration_strategy"] =
"VideoPreferred";
3016 root[
"duration_strategy"] =
"AudioPreferred";
3020 root[
"duration_strategy"] =
"LongestStream";
3037 catch (
const std::exception& e) {
3039 throw InvalidJSON(
"JSON is invalid (missing keys or invalid data types)");
3050 if (!root[
"path"].isNull())
3051 path = root[
"path"].asString();
3052 if (!root[
"duration_strategy"].isNull()) {
3053 const std::string strategy = root[
"duration_strategy"].asString();
3054 if (strategy ==
"VideoPreferred") {
3056 }
else if (strategy ==
"AudioPreferred") {
Shared helpers for Crop effect scaling logic.
Header file for all Exception classes.
AVPixelFormat hw_de_av_pix_fmt_global
AVHWDeviceType hw_de_av_device_type_global
Header file for FFmpegReader class.
Header file for FFmpegUtilities.
#define AV_FREE_CONTEXT(av_context)
#define AV_FREE_FRAME(av_frame)
#define SWR_CONVERT(ctx, out, linesize, out_count, in, linesize2, in_count)
#define AV_GET_CODEC_TYPE(av_stream)
#define AV_GET_CODEC_PIXEL_FORMAT(av_stream, av_context)
#define AV_GET_CODEC_CONTEXT(av_stream, av_codec)
#define AV_FIND_DECODER_CODEC_ID(av_stream)
#define AV_ALLOCATE_FRAME()
#define AV_COPY_PICTURE_DATA(av_frame, buffer, pix_fmt, width, height)
#define AV_FREE_PACKET(av_packet)
#define AVCODEC_REGISTER_ALL
#define AV_GET_CODEC_ATTRIBUTES(av_stream, av_context)
#define AV_ALLOCATE_IMAGE(av_frame, pix_fmt, width, height)
#define AV_GET_SAMPLE_FORMAT(av_stream, av_context)
#define AV_RESET_FRAME(av_frame)
Cross-platform helper to encourage returning freed memory to the OS.
#define FF_VIDEO_NUM_PROCESSORS
#define OPEN_MP_NUM_PROCESSORS
#define FF_AUDIO_NUM_PROCESSORS
Header file for Timeline class.
Header file for ZeroMQ-based Logger class.
void SetMaxBytesFromInfo(int64_t number_of_frames, int width, int height, int sample_rate, int channels)
Set maximum bytes to a different amount based on a ReaderInfo struct.
int64_t Count()
Count the frames in the queue.
void Add(std::shared_ptr< openshot::Frame > frame)
Add a Frame to the cache.
std::shared_ptr< openshot::Frame > GetFrame(int64_t frame_number)
Get a frame from the cache.
std::vector< std::shared_ptr< openshot::Frame > > GetFrames()
Get an array of all Frames.
void Remove(int64_t frame_number)
Remove a specific frame.
void Clear()
Clear the cache of all frames.
This class represents a clip (used to arrange readers on the timeline)
openshot::Keyframe scale_x
Curve representing the horizontal scaling in percent (0 to 1)
openshot::TimelineBase * ParentTimeline() override
Get the associated Timeline pointer (if any)
openshot::Keyframe scale_y
Curve representing the vertical scaling in percent (0 to 1)
openshot::ScaleType scale
The scale determines how a clip should be resized to fit its parent.
double Y
The Y value of the coordinate (usually representing the value of the property being animated)
This class uses the FFmpeg libraries, to open video files and audio files, and return openshot::Frame...
void Open() override
Open File - which is called by the constructor automatically.
bool HardwareDecodeSuccessful() const override
Return true if hardware decode was requested and successfully produced at least one frame.
FFmpegReader(const std::string &path, bool inspect_reader=true)
Constructor for FFmpegReader.
Json::Value JsonValue() const override
Generate Json::Value for this object.
bool GetIsDurationKnown()
Return true if frame can be read with GetFrame()
void SetJsonValue(const Json::Value root) override
Load Json::Value into this object.
CacheMemory final_cache
Final cache object used to hold final frames.
virtual ~FFmpegReader()
Destructor.
std::string Json() const override
Generate JSON string of this object.
std::shared_ptr< openshot::Frame > GetFrame(int64_t requested_frame) override
void Close() override
Close File.
void SetJson(const std::string value) override
Load JSON string into this object.
This class represents a fraction.
int num
Numerator for the fraction.
float ToFloat()
Return this fraction as a float (i.e. 1/2 = 0.5)
double ToDouble() const
Return this fraction as a double (i.e. 1/2 = 0.5)
Fraction Reciprocal() const
Return the reciprocal as a Fraction.
int den
Denominator for the fraction.
int GetSamplesPerFrame(openshot::Fraction fps, int sample_rate, int channels)
Calculate the # of samples per video frame (for the current frame number)
Exception when an invalid # of audio channels are detected.
Exception when no valid codec is found for a file.
Exception for files that can not be found or opened.
Exception for invalid JSON.
Point GetMaxPoint() const
Get max point (by Y coordinate)
Exception when no streams are found in the file.
Exception when memory could not be allocated.
Coordinate co
This is the primary coordinate.
openshot::ReaderInfo info
Information about the current media file.
virtual void SetJsonValue(const Json::Value root)=0
Load Json::Value into this object.
virtual Json::Value JsonValue() const =0
Generate Json::Value for this object.
std::recursive_mutex getFrameMutex
Mutex for multiple threads.
openshot::ClipBase * ParentClip()
Parent clip object of this reader (which can be unparented and NULL)
Exception when a reader is closed, and a frame is requested.
int DE_LIMIT_WIDTH_MAX
Maximum columns that hardware decode can handle.
int HW_DE_DEVICE_SET
Which GPU to use to decode (0 is the first)
int DE_LIMIT_HEIGHT_MAX
Maximum rows that hardware decode can handle.
static Settings * Instance()
Create or get an instance of this logger singleton (invoke the class with this method)
int HARDWARE_DECODER
Use video codec for faster video decoding (if supported)
int preview_height
Optional preview width of timeline image. If your preview window is smaller than the timeline,...
int preview_width
Optional preview width of timeline image. If your preview window is smaller than the timeline,...
This class represents a timeline.
void Log(std::string message)
Log message to all subscribers of this logger (if any)
void AppendDebugMethod(std::string method_name, std::string arg1_name="", float arg1_value=-1.0, std::string arg2_name="", float arg2_value=-1.0, std::string arg3_name="", float arg3_value=-1.0, std::string arg4_name="", float arg4_value=-1.0, std::string arg5_name="", float arg5_value=-1.0, std::string arg6_name="", float arg6_value=-1.0)
Append debug information.
static ZmqLogger * Instance()
Create or get an instance of this logger singleton (invoke the class with this method)
This namespace is the default namespace for all code in the openshot library.
@ SCALE_FIT
Scale the clip until either height or width fills the canvas (with no cropping)
@ SCALE_STRETCH
Scale the clip until both height and width fill the canvas (distort to fit)
@ SCALE_CROP
Scale the clip until both height and width fill the canvas (cropping the overlap)
ChannelLayout
This enumeration determines the audio channel layout (such as stereo, mono, 5 point surround,...
DurationStrategy
This enumeration determines which duration source to favor.
@ VideoPreferred
Prefer the video stream's duration, fallback to audio then container.
@ LongestStream
Use the longest value from video, audio, or container.
@ AudioPreferred
Prefer the audio stream's duration, fallback to video then container.
void ApplyCropResizeScale(Clip *clip, int source_width, int source_height, int &max_width, int &max_height)
Scale the requested max_width / max_height based on the Crop resize amount, capped by source size.
const Json::Value stringToJson(const std::string value)
This struct holds the associated video frame and starting sample # for an audio packet.
bool is_near(AudioLocation location, int samples_per_frame, int64_t amount)
int64_t packets_decoded()
int audio_bit_rate
The bit rate of the audio stream (in bytes)
int video_bit_rate
The bit rate of the video stream (in bytes)
bool has_single_image
Determines if this file only contains a single image.
float duration
Length of time (in seconds)
openshot::Fraction audio_timebase
The audio timebase determines how long each audio packet should be played.
int width
The width of the video (in pixesl)
int channels
The number of audio channels used in the audio stream.
openshot::Fraction fps
Frames per second, as a fraction (i.e. 24/1 = 24 fps)
openshot::Fraction display_ratio
The ratio of width to height of the video stream (i.e. 640x480 has a ratio of 4/3)
int height
The height of the video (in pixels)
int pixel_format
The pixel format (i.e. YUV420P, RGB24, etc...)
int64_t video_length
The number of frames in the video stream.
std::string acodec
The name of the audio codec used to encode / decode the video stream.
std::map< std::string, std::string > metadata
An optional map/dictionary of metadata for this reader.
std::string vcodec
The name of the video codec used to encode / decode the video stream.
openshot::Fraction pixel_ratio
The pixel ratio of the video stream as a fraction (i.e. some pixels are not square)
openshot::ChannelLayout channel_layout
The channel layout (mono, stereo, 5 point surround, etc...)
bool has_video
Determines if this file has a video stream.
bool has_audio
Determines if this file has an audio stream.
openshot::Fraction video_timebase
The video timebase determines how long each frame stays on the screen.
int video_stream_index
The index of the video stream.
int sample_rate
The number of audio samples per second (44100 is a common sample rate)
int audio_stream_index
The index of the audio stream.
int64_t file_size
Size of file (in bytes)