32 #define ENABLE_VAAPI 0
35 #define MAX_SUPPORTED_WIDTH 1950
36 #define MAX_SUPPORTED_HEIGHT 1100
39 #include "libavutil/hwcontext_vaapi.h"
41 typedef struct VAAPIDecodeContext {
43 VAEntrypoint va_entrypoint;
45 VAContextID va_context;
47 #if FF_API_STRUCT_VAAPI_CONTEXT
50 struct vaapi_context *old_context;
51 AVBufferRef *device_ref;
55 AVHWDeviceContext *device;
56 AVVAAPIDeviceContext *hwctx;
58 AVHWFramesContext *frames;
59 AVVAAPIFramesContext *hwfc;
61 enum AVPixelFormat surface_format;
78 static AVPixelFormat NormalizeDeprecatedPixFmt(AVPixelFormat pix_fmt,
bool& is_full_range) {
80 case AV_PIX_FMT_YUVJ420P:
82 return AV_PIX_FMT_YUV420P;
83 case AV_PIX_FMT_YUVJ422P:
85 return AV_PIX_FMT_YUV422P;
86 case AV_PIX_FMT_YUVJ444P:
88 return AV_PIX_FMT_YUV444P;
89 case AV_PIX_FMT_YUVJ440P:
91 return AV_PIX_FMT_YUV440P;
92 #ifdef AV_PIX_FMT_YUVJ411P
93 case AV_PIX_FMT_YUVJ411P:
95 return AV_PIX_FMT_YUV411P;
106 :
path(
path), pFormatCtx(NULL), videoStream(-1), audioStream(-1), pCodecCtx(NULL), aCodecCtx(NULL),
107 pStream(NULL), aStream(NULL), packet(NULL), pFrame(NULL), is_open(false), is_duration_known(false),
108 check_interlace(false), check_fps(false), duration_strategy(duration_strategy), previous_packet_location{-1, 0},
109 is_seeking(false), seeking_pts(0), seeking_frame(0), is_video_seek(true), seek_count(0),
110 seek_audio_frame_found(0), seek_video_frame_found(0), last_seek_max_frame(-1), seek_stagnant_count(0),
111 last_frame(0), largest_frame_processed(0), current_video_frame(0), audio_pts(0), video_pts(0),
112 hold_packet(false), pts_offset_seconds(0.0), audio_pts_seconds(0.0), video_pts_seconds(0.0),
113 NO_PTS_OFFSET(-99999), enable_seek(true) {
120 pts_offset_seconds = NO_PTS_OFFSET;
121 video_pts_seconds = NO_PTS_OFFSET;
122 audio_pts_seconds = NO_PTS_OFFSET;
131 if (inspect_reader) {
153 if (abs(diff) <= amount)
164 static enum AVPixelFormat get_hw_dec_format(AVCodecContext *ctx,
const enum AVPixelFormat *pix_fmts)
166 const enum AVPixelFormat *p;
171 for (p = pix_fmts; *p != AV_PIX_FMT_NONE; p++) {
173 #if defined(__linux__)
175 case AV_PIX_FMT_VAAPI:
182 case AV_PIX_FMT_VDPAU:
192 case AV_PIX_FMT_DXVA2_VLD:
199 case AV_PIX_FMT_D3D11:
207 #if defined(__APPLE__)
209 case AV_PIX_FMT_VIDEOTOOLBOX:
218 case AV_PIX_FMT_CUDA:
238 return AV_PIX_FMT_NONE;
241 int FFmpegReader::IsHardwareDecodeSupported(
int codecid)
245 case AV_CODEC_ID_H264:
246 case AV_CODEC_ID_MPEG2VIDEO:
247 case AV_CODEC_ID_VC1:
248 case AV_CODEC_ID_WMV1:
249 case AV_CODEC_ID_WMV2:
250 case AV_CODEC_ID_WMV3:
265 const std::lock_guard<std::recursive_mutex> lock(
getFrameMutex);
271 hw_decode_failed =
false;
272 hw_decode_error_count = 0;
273 hw_decode_succeeded =
false;
278 if (avformat_open_input(&pFormatCtx,
path.c_str(), NULL, NULL) != 0)
282 if (avformat_find_stream_info(pFormatCtx, NULL) < 0)
289 packet_status.
reset(
true);
292 for (
unsigned int i = 0; i < pFormatCtx->nb_streams; i++) {
294 if (
AV_GET_CODEC_TYPE(pFormatCtx->streams[i]) == AVMEDIA_TYPE_VIDEO && videoStream < 0) {
301 if (
AV_GET_CODEC_TYPE(pFormatCtx->streams[i]) == AVMEDIA_TYPE_AUDIO && audioStream < 0) {
308 if (videoStream == -1 && audioStream == -1)
312 if (videoStream != -1) {
317 pStream = pFormatCtx->streams[videoStream];
323 const AVCodec *pCodec = avcodec_find_decoder(codecId);
324 AVDictionary *
opts = NULL;
325 int retry_decode_open = 2;
330 if (
hw_de_on && (retry_decode_open==2)) {
332 hw_de_supported = IsHardwareDecodeSupported(pCodecCtx->codec_id);
335 retry_decode_open = 0;
340 if (pCodec == NULL) {
341 throw InvalidCodec(
"A valid video codec could not be found for this file.",
path);
345 av_dict_set(&
opts,
"strict",
"experimental", 0);
349 int i_decoder_hw = 0;
351 char *adapter_ptr = NULL;
357 pCodecCtx->get_format = get_hw_dec_format;
359 if (adapter_num < 3 && adapter_num >=0) {
360 #if defined(__linux__)
361 snprintf(adapter,
sizeof(adapter),
"/dev/dri/renderD%d", adapter_num+128);
362 adapter_ptr = adapter;
364 switch (i_decoder_hw) {
366 hw_de_av_device_type = AV_HWDEVICE_TYPE_VAAPI;
369 hw_de_av_device_type = AV_HWDEVICE_TYPE_CUDA;
372 hw_de_av_device_type = AV_HWDEVICE_TYPE_VDPAU;
375 hw_de_av_device_type = AV_HWDEVICE_TYPE_QSV;
378 hw_de_av_device_type = AV_HWDEVICE_TYPE_VAAPI;
382 #elif defined(_WIN32)
385 switch (i_decoder_hw) {
387 hw_de_av_device_type = AV_HWDEVICE_TYPE_CUDA;
390 hw_de_av_device_type = AV_HWDEVICE_TYPE_DXVA2;
393 hw_de_av_device_type = AV_HWDEVICE_TYPE_D3D11VA;
396 hw_de_av_device_type = AV_HWDEVICE_TYPE_QSV;
399 hw_de_av_device_type = AV_HWDEVICE_TYPE_DXVA2;
402 #elif defined(__APPLE__)
405 switch (i_decoder_hw) {
407 hw_de_av_device_type = AV_HWDEVICE_TYPE_VIDEOTOOLBOX;
410 hw_de_av_device_type = AV_HWDEVICE_TYPE_QSV;
413 hw_de_av_device_type = AV_HWDEVICE_TYPE_VIDEOTOOLBOX;
423 #if defined(__linux__)
424 if( adapter_ptr != NULL && access( adapter_ptr, W_OK ) == 0 ) {
425 #elif defined(_WIN32)
426 if( adapter_ptr != NULL ) {
427 #elif defined(__APPLE__)
428 if( adapter_ptr != NULL ) {
437 hw_device_ctx = NULL;
439 if (av_hwdevice_ctx_create(&hw_device_ctx, hw_de_av_device_type, adapter_ptr, NULL, 0) >= 0) {
440 const char* hw_name = av_hwdevice_get_type_name(hw_de_av_device_type);
441 std::string hw_msg =
"HW decode active: ";
442 hw_msg += (hw_name ? hw_name :
"unknown");
444 if (!(pCodecCtx->hw_device_ctx = av_buffer_ref(hw_device_ctx))) {
487 pCodecCtx->thread_type &= ~FF_THREAD_FRAME;
491 int avcodec_return = avcodec_open2(pCodecCtx, pCodec, &
opts);
492 if (avcodec_return < 0) {
493 std::stringstream avcodec_error_msg;
494 avcodec_error_msg <<
"A video codec was found, but could not be opened. Error: " << av_err2string(avcodec_return);
500 AVHWFramesConstraints *constraints = NULL;
501 void *hwconfig = NULL;
502 hwconfig = av_hwdevice_hwconfig_alloc(hw_device_ctx);
506 ((AVVAAPIHWConfig *)hwconfig)->config_id = ((VAAPIDecodeContext *)(pCodecCtx->priv_data))->va_config;
507 constraints = av_hwdevice_get_hwframe_constraints(hw_device_ctx,hwconfig);
510 if (pCodecCtx->coded_width < constraints->min_width ||
511 pCodecCtx->coded_height < constraints->min_height ||
512 pCodecCtx->coded_width > constraints->max_width ||
513 pCodecCtx->coded_height > constraints->max_height) {
516 retry_decode_open = 1;
519 av_buffer_unref(&hw_device_ctx);
520 hw_device_ctx = NULL;
525 ZmqLogger::Instance()->
AppendDebugMethod(
"\nDecode hardware acceleration is used\n",
"Min width :", constraints->min_width,
"Min Height :", constraints->min_height,
"MaxWidth :", constraints->max_width,
"MaxHeight :", constraints->max_height,
"Frame width :", pCodecCtx->coded_width,
"Frame height :", pCodecCtx->coded_height);
526 retry_decode_open = 0;
528 av_hwframe_constraints_free(&constraints);
541 if (pCodecCtx->coded_width < 0 ||
542 pCodecCtx->coded_height < 0 ||
543 pCodecCtx->coded_width > max_w ||
544 pCodecCtx->coded_height > max_h ) {
545 ZmqLogger::Instance()->
AppendDebugMethod(
"DIMENSIONS ARE TOO LARGE for hardware acceleration\n",
"Max Width :", max_w,
"Max Height :", max_h,
"Frame width :", pCodecCtx->coded_width,
"Frame height :", pCodecCtx->coded_height);
547 retry_decode_open = 1;
550 av_buffer_unref(&hw_device_ctx);
551 hw_device_ctx = NULL;
555 ZmqLogger::Instance()->
AppendDebugMethod(
"\nDecode hardware acceleration is used\n",
"Max Width :", max_w,
"Max Height :", max_h,
"Frame width :", pCodecCtx->coded_width,
"Frame height :", pCodecCtx->coded_height);
556 retry_decode_open = 0;
564 retry_decode_open = 0;
566 }
while (retry_decode_open);
575 if (audioStream != -1) {
580 aStream = pFormatCtx->streams[audioStream];
586 const AVCodec *aCodec = avcodec_find_decoder(codecId);
592 bool audio_opened =
false;
593 if (aCodec != NULL) {
595 AVDictionary *
opts = NULL;
596 av_dict_set(&
opts,
"strict",
"experimental", 0);
599 audio_opened = (avcodec_open2(aCodecCtx, aCodec, &
opts) >= 0);
610 const bool invalid_audio_info =
615 (aCodecCtx->sample_fmt == AV_SAMPLE_FMT_NONE);
616 if (invalid_audio_info) {
618 "FFmpegReader::Open (Disable invalid audio stream)",
623 "sample_fmt",
static_cast<int>(aCodecCtx ? aCodecCtx->sample_fmt : AV_SAMPLE_FMT_NONE));
629 if (avcodec_is_open(aCodecCtx)) {
630 avcodec_flush_buffers(aCodecCtx);
640 "FFmpegReader::Open (Audio codec unavailable; disabling audio)",
641 "audioStream", audioStream);
657 "FFmpegReader::Open (Invalid FPS detected; applying fallback)",
665 "FFmpegReader::Open (Invalid video_timebase detected; applying fallback)",
672 AVDictionaryEntry *tag = NULL;
673 while ((tag = av_dict_get(pFormatCtx->metadata,
"", tag, AV_DICT_IGNORE_SUFFIX))) {
674 QString str_key = tag->key;
675 QString str_value = tag->value;
676 info.
metadata[str_key.toStdString()] = str_value.trimmed().toStdString();
680 for (
unsigned int i = 0; i < pFormatCtx->nb_streams; i++) {
681 AVStream* st = pFormatCtx->streams[i];
682 if (st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
683 size_t side_data_size = 0;
684 const uint8_t *displaymatrix = ffmpeg_stream_get_side_data(
685 st, AV_PKT_DATA_DISPLAYMATRIX, &side_data_size);
687 side_data_size >= 9 *
sizeof(int32_t) &&
689 double rotation = -av_display_rotation_get(
690 reinterpret_cast<const int32_t *
>(displaymatrix));
691 if (std::isnan(rotation))
696 const uint8_t *spherical = ffmpeg_stream_get_side_data(
697 st, AV_PKT_DATA_SPHERICAL, &side_data_size);
698 if (spherical && side_data_size >=
sizeof(AVSphericalMapping)) {
701 const AVSphericalMapping *map =
702 reinterpret_cast<const AVSphericalMapping *
>(spherical);
703 const char *proj_name = av_spherical_projection_name(map->projection);
704 info.
metadata[
"spherical_projection"] = proj_name ? proj_name :
"unknown";
706 auto to_deg = [](int32_t v) {
707 return static_cast<double>(v) / 65536.0;
709 info.
metadata[
"spherical_yaw"] = std::to_string(to_deg(map->yaw));
710 info.
metadata[
"spherical_pitch"] = std::to_string(to_deg(map->pitch));
711 info.
metadata[
"spherical_roll"] = std::to_string(to_deg(map->roll));
718 previous_packet_location.
frame = -1;
752 const std::lock_guard<std::recursive_mutex> lock(
getFrameMutex);
758 AVPacket *recent_packet = packet;
763 int max_attempts = 128;
768 "attempts", attempts);
780 RemoveAVPacket(recent_packet);
785 if(avcodec_is_open(pCodecCtx)) {
786 avcodec_flush_buffers(pCodecCtx);
792 av_buffer_unref(&hw_device_ctx);
793 hw_device_ctx = NULL;
797 if (img_convert_ctx) {
798 sws_freeContext(img_convert_ctx);
799 img_convert_ctx =
nullptr;
801 if (pFrameRGB_cached) {
808 if(avcodec_is_open(aCodecCtx)) {
809 avcodec_flush_buffers(aCodecCtx);
821 working_cache.
Clear();
824 avformat_close_input(&pFormatCtx);
825 av_freep(&pFormatCtx);
832 largest_frame_processed = 0;
833 seek_audio_frame_found = 0;
834 seek_video_frame_found = 0;
835 current_video_frame = 0;
836 last_video_frame.reset();
837 last_final_video_frame.reset();
841 bool FFmpegReader::HasAlbumArt() {
845 return pFormatCtx && videoStream >= 0 && pFormatCtx->streams[videoStream]
846 && (pFormatCtx->streams[videoStream]->disposition & AV_DISPOSITION_ATTACHED_PIC);
849 double FFmpegReader::PickDurationSeconds()
const {
850 auto has_value = [](
double value) {
return value > 0.0; };
852 switch (duration_strategy) {
854 if (has_value(video_stream_duration_seconds))
855 return video_stream_duration_seconds;
856 if (has_value(audio_stream_duration_seconds))
857 return audio_stream_duration_seconds;
858 if (has_value(format_duration_seconds))
859 return format_duration_seconds;
862 if (has_value(audio_stream_duration_seconds))
863 return audio_stream_duration_seconds;
864 if (has_value(video_stream_duration_seconds))
865 return video_stream_duration_seconds;
866 if (has_value(format_duration_seconds))
867 return format_duration_seconds;
872 double longest = 0.0;
873 if (has_value(video_stream_duration_seconds))
874 longest = std::max(longest, video_stream_duration_seconds);
875 if (has_value(audio_stream_duration_seconds))
876 longest = std::max(longest, audio_stream_duration_seconds);
877 if (has_value(format_duration_seconds))
878 longest = std::max(longest, format_duration_seconds);
879 if (has_value(longest))
885 if (has_value(format_duration_seconds))
886 return format_duration_seconds;
887 if (has_value(inferred_duration_seconds))
888 return inferred_duration_seconds;
893 void FFmpegReader::ApplyDurationStrategy() {
895 const double chosen_seconds = PickDurationSeconds();
897 if (chosen_seconds <= 0.0 || fps_value <= 0.0) {
900 is_duration_known =
false;
904 const int64_t frames =
static_cast<int64_t
>(std::llround(chosen_seconds * fps_value));
908 is_duration_known =
false;
913 info.
duration =
static_cast<float>(
static_cast<double>(frames) / fps_value);
914 is_duration_known =
true;
917 void FFmpegReader::UpdateAudioInfo() {
927 AVChannelLayout audio_ch_layout = ffmpeg_get_valid_channel_layout(
929 if (audio_ch_layout.nb_channels > 0) {
932 codec_channels = audio_ch_layout.nb_channels;
944 auto record_duration = [](
double &target,
double seconds) {
946 target = std::max(target, seconds);
951 info.
file_size = pFormatCtx->pb ? avio_size(pFormatCtx->pb) : -1;
982 if (aStream->duration > 0) {
985 if (pFormatCtx->duration > 0) {
987 record_duration(format_duration_seconds,
static_cast<double>(pFormatCtx->duration) / AV_TIME_BASE);
1017 ApplyDurationStrategy();
1020 AVDictionaryEntry *tag = NULL;
1021 while ((tag = av_dict_get(aStream->metadata,
"", tag, AV_DICT_IGNORE_SUFFIX))) {
1022 QString str_key = tag->key;
1023 QString str_value = tag->value;
1024 info.
metadata[str_key.toStdString()] = str_value.trimmed().toStdString();
1027 av_channel_layout_uninit(&audio_ch_layout);
1031 void FFmpegReader::UpdateVideoInfo() {
1037 auto record_duration = [](
double &target,
double seconds) {
1039 target = std::max(target, seconds);
1044 info.
file_size = pFormatCtx->pb ? avio_size(pFormatCtx->pb) : -1;
1051 AVRational framerate = av_guess_frame_rate(pFormatCtx, pStream, NULL);
1063 if (pStream->sample_aspect_ratio.num != 0) {
1086 if (!check_interlace) {
1087 check_interlace =
true;
1089 switch(field_order) {
1090 case AV_FIELD_PROGRESSIVE:
1103 case AV_FIELD_UNKNOWN:
1105 check_interlace =
false;
1120 if (pFormatCtx->duration >= 0) {
1122 record_duration(format_duration_seconds,
static_cast<double>(pFormatCtx->duration) / AV_TIME_BASE);
1132 if (video_stream_duration_seconds <= 0.0 && format_duration_seconds <= 0.0 &&
1133 pStream->duration == AV_NOPTS_VALUE && pFormatCtx->duration == AV_NOPTS_VALUE) {
1135 record_duration(video_stream_duration_seconds, 60 * 60 * 1);
1139 if (video_stream_duration_seconds <= 0.0 && format_duration_seconds <= 0.0 &&
1140 pFormatCtx && pFormatCtx->iformat && strcmp(pFormatCtx->iformat->name,
"gif") == 0) {
1141 record_duration(video_stream_duration_seconds, 60 * 60 * 1);
1145 ApplyDurationStrategy();
1151 const bool likely_still_codec =
1152 codec_id == AV_CODEC_ID_MJPEG ||
1153 codec_id == AV_CODEC_ID_PNG ||
1154 codec_id == AV_CODEC_ID_BMP ||
1155 codec_id == AV_CODEC_ID_TIFF ||
1156 codec_id == AV_CODEC_ID_WEBP ||
1157 codec_id == AV_CODEC_ID_JPEG2000;
1158 const bool likely_image_demuxer =
1159 pFormatCtx && pFormatCtx->iformat && pFormatCtx->iformat->name &&
1160 strstr(pFormatCtx->iformat->name,
"image2");
1161 const bool has_attached_pic = HasAlbumArt();
1162 const bool single_frame_stream =
1163 (pStream && pStream->nb_frames > 0 && pStream->nb_frames <= 1);
1166 const bool is_still_image_video =
1168 ((single_frame_stream || single_frame_clip) &&
1169 (likely_still_codec || likely_image_demuxer));
1171 if (is_still_image_video) {
1176 if (audioStream < 0) {
1177 record_duration(video_stream_duration_seconds, 60 * 60 * 1);
1180 ApplyDurationStrategy();
1185 AVDictionaryEntry *tag = NULL;
1186 while ((tag = av_dict_get(pStream->metadata,
"", tag, AV_DICT_IGNORE_SUFFIX))) {
1187 QString str_key = tag->key;
1188 QString str_value = tag->value;
1189 info.
metadata[str_key.toStdString()] = str_value.trimmed().toStdString();
1194 return this->is_duration_known;
1198 last_seek_max_frame = -1;
1199 seek_stagnant_count = 0;
1202 throw ReaderClosed(
"The FFmpegReader is closed. Call Open() before calling this method.",
path);
1205 if (requested_frame < 1)
1206 requested_frame = 1;
1211 throw InvalidFile(
"Could not detect the duration of the video or audio stream.",
path);
1226 const std::lock_guard<std::recursive_mutex> lock(
getFrameMutex);
1239 int64_t diff = requested_frame - last_frame;
1240 if (diff >= 1 && diff <= 20) {
1242 frame = ReadStream(requested_frame);
1247 Seek(requested_frame);
1256 frame = ReadStream(requested_frame);
1264 std::shared_ptr<Frame> FFmpegReader::ReadStream(int64_t requested_frame) {
1266 bool check_seek =
false;
1267 int packet_error = -1;
1268 int64_t no_progress_count = 0;
1269 int64_t prev_packets_read = packet_status.
packets_read();
1272 double prev_video_pts_seconds = video_pts_seconds;
1282 CheckWorkingFrames(requested_frame);
1287 if (is_cache_found) {
1291 if (!hold_packet || !packet) {
1293 packet_error = GetNextPacket();
1294 if (packet_error < 0 && !packet) {
1305 check_seek = CheckSeek();
1317 if ((
info.
has_video && packet && packet->stream_index == videoStream) ||
1321 ProcessVideoPacket(requested_frame);
1322 if (ReopenWithoutHardwareDecode(requested_frame)) {
1327 if ((
info.
has_audio && packet && packet->stream_index == audioStream) ||
1331 ProcessAudioPacket(requested_frame);
1336 if ((!
info.
has_video && packet && packet->stream_index == videoStream) ||
1337 (!
info.
has_audio && packet && packet->stream_index == audioStream)) {
1339 if (packet->stream_index == videoStream) {
1341 }
else if (packet->stream_index == audioStream) {
1347 RemoveAVPacket(packet);
1357 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::ReadStream (force EOF)",
"packets_read", packet_status.
packets_read(),
"packets_decoded", packet_status.
packets_decoded(),
"packets_eof", packet_status.
packets_eof,
"video_eof", packet_status.
video_eof,
"audio_eof", packet_status.
audio_eof,
"end_of_file", packet_status.
end_of_file);
1370 const bool has_progress =
1374 (video_pts_seconds != prev_video_pts_seconds);
1377 no_progress_count = 0;
1379 no_progress_count++;
1380 if (no_progress_count >= 2000
1385 "requested_frame", requested_frame,
1386 "no_progress_count", no_progress_count,
1400 prev_video_pts_seconds = video_pts_seconds;
1408 "largest_frame_processed", largest_frame_processed,
1409 "Working Cache Count", working_cache.
Count());
1418 CheckWorkingFrames(requested_frame);
1434 std::shared_ptr<Frame> f = CreateFrame(largest_frame_processed);
1435 if (frame->has_image_data) {
1436 f->AddImage(std::make_shared<QImage>(frame->GetImage()->copy()));
1440 if (!frame->has_image_data) {
1445 f->AddAudioSilence(samples_in_frame);
1451 std::shared_ptr<Frame> f = CreateFrame(largest_frame_processed);
1452 if (last_final_video_frame && last_final_video_frame->has_image_data
1453 && last_final_video_frame->number <= requested_frame) {
1454 f->AddImage(std::make_shared<QImage>(last_final_video_frame->GetImage()->copy()));
1455 }
else if (last_video_frame && last_video_frame->has_image_data
1456 && last_video_frame->number <= requested_frame) {
1457 f->AddImage(std::make_shared<QImage>(last_video_frame->GetImage()->copy()));
1461 f->AddAudioSilence(samples_in_frame);
1469 int FFmpegReader::GetNextPacket() {
1470 int found_packet = 0;
1471 AVPacket *next_packet;
1472 next_packet =
new AVPacket();
1473 found_packet = av_read_frame(pFormatCtx, next_packet);
1477 RemoveAVPacket(packet);
1480 if (found_packet >= 0) {
1482 packet = next_packet;
1485 if (packet->stream_index == videoStream) {
1487 }
else if (packet->stream_index == audioStream) {
1496 return found_packet;
1500 bool FFmpegReader::GetAVFrame() {
1501 int frameFinished = 0;
1502 auto note_hw_decode_failure = [&](
int err,
const char* stage) {
1504 if (!
hw_de_on || !hw_de_supported || force_sw_decode) {
1507 if (err == AVERROR_INVALIDDATA && packet_status.
video_decoded == 0) {
1508 hw_decode_error_count++;
1510 std::string(
"FFmpegReader::GetAVFrame (hardware decode failure candidate during ") + stage +
")",
1511 "error_count", hw_decode_error_count,
1513 if (hw_decode_error_count >= 3) {
1514 hw_decode_failed =
true;
1527 int send_packet_err = 0;
1528 int64_t send_packet_pts = 0;
1529 if ((packet && packet->stream_index == videoStream) || !packet) {
1530 send_packet_err = avcodec_send_packet(pCodecCtx, packet);
1532 if (packet && send_packet_err >= 0) {
1533 send_packet_pts = GetPacketPTS();
1534 hold_packet =
false;
1544 if (send_packet_err < 0 && send_packet_err != AVERROR_EOF) {
1545 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (send packet: Not sent [" + av_err2string(send_packet_err) +
"])",
"send_packet_err", send_packet_err,
"send_packet_pts", send_packet_pts);
1546 note_hw_decode_failure(send_packet_err,
"send_packet");
1547 if (send_packet_err == AVERROR(EAGAIN)) {
1549 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (send packet: AVERROR(EAGAIN): user must read output with avcodec_receive_frame()",
"send_packet_pts", send_packet_pts);
1551 if (send_packet_err == AVERROR(EINVAL)) {
1552 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (send packet: AVERROR(EINVAL): codec not opened, it is an encoder, or requires flush",
"send_packet_pts", send_packet_pts);
1554 if (send_packet_err == AVERROR(ENOMEM)) {
1555 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (send packet: AVERROR(ENOMEM): failed to add packet to internal queue, or legitimate decoding errors",
"send_packet_pts", send_packet_pts);
1562 int receive_frame_err = 0;
1563 AVFrame *decoded_frame = next_frame;
1564 AVFrame *next_frame2;
1572 next_frame2 = next_frame;
1575 while (receive_frame_err >= 0) {
1576 receive_frame_err = avcodec_receive_frame(pCodecCtx, next_frame2);
1578 if (receive_frame_err != 0) {
1579 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAVFrame (receive frame: frame not ready yet from decoder [\" + av_err2string(receive_frame_err) + \"])",
"receive_frame_err", receive_frame_err,
"send_packet_pts", send_packet_pts);
1580 note_hw_decode_failure(receive_frame_err,
"receive_frame");
1582 if (receive_frame_err == AVERROR_EOF) {
1584 "FFmpegReader::GetAVFrame (receive frame: AVERROR_EOF: EOF detected from decoder, flushing buffers)",
"send_packet_pts", send_packet_pts);
1585 avcodec_flush_buffers(pCodecCtx);
1588 if (receive_frame_err == AVERROR(EINVAL)) {
1590 "FFmpegReader::GetAVFrame (receive frame: AVERROR(EINVAL): invalid frame received, flushing buffers)",
"send_packet_pts", send_packet_pts);
1591 avcodec_flush_buffers(pCodecCtx);
1593 if (receive_frame_err == AVERROR(EAGAIN)) {
1595 "FFmpegReader::GetAVFrame (receive frame: AVERROR(EAGAIN): output is not available in this state - user must try to send new input)",
"send_packet_pts", send_packet_pts);
1597 if (receive_frame_err == AVERROR_INPUT_CHANGED) {
1599 "FFmpegReader::GetAVFrame (receive frame: AVERROR_INPUT_CHANGED: current decoded frame has changed parameters with respect to first decoded frame)",
"send_packet_pts", send_packet_pts);
1610 if (next_frame2->format == hw_de_av_pix_fmt) {
1611 if ((err = av_hwframe_transfer_data(next_frame, next_frame2, 0)) < 0) {
1613 "FFmpegReader::GetAVFrame (Failed to transfer data to output frame)",
1616 note_hw_decode_failure(AVERROR_INVALIDDATA,
"hwframe_transfer");
1619 if ((err = av_frame_copy_props(next_frame, next_frame2)) < 0) {
1621 "FFmpegReader::GetAVFrame (Failed to copy props to output frame)",
1624 note_hw_decode_failure(AVERROR_INVALIDDATA,
"hwframe_copy_props");
1627 if (next_frame->format == AV_PIX_FMT_NONE) {
1628 next_frame->format = pCodecCtx->sw_pix_fmt;
1630 if (next_frame->width <= 0) {
1631 next_frame->width = next_frame2->width;
1633 if (next_frame->height <= 0) {
1634 next_frame->height = next_frame2->height;
1636 decoded_frame = next_frame;
1639 decoded_frame = next_frame2;
1645 decoded_frame = next_frame2;
1648 if (!decoded_frame->data[0]) {
1650 "FFmpegReader::GetAVFrame (Decoded frame missing image data)",
1651 "format", decoded_frame->format,
1652 "width", decoded_frame->width,
1653 "height", decoded_frame->height);
1654 note_hw_decode_failure(AVERROR_INVALIDDATA,
"decoded_frame_empty");
1661 hw_decode_error_count = 0;
1663 if (
hw_de_on && hw_de_supported && !force_sw_decode) {
1664 hw_decode_succeeded =
true;
1670 AVPixelFormat decoded_pix_fmt = (AVPixelFormat)(decoded_frame->format);
1671 if (decoded_pix_fmt == AV_PIX_FMT_NONE)
1672 decoded_pix_fmt = (AVPixelFormat)(pStream->codecpar->format);
1676 av_image_copy(pFrame->data, pFrame->linesize, (
const uint8_t**)decoded_frame->data, decoded_frame->linesize,
1678 pFrame->format = decoded_pix_fmt;
1681 pFrame->color_range = decoded_frame->color_range;
1682 pFrame->colorspace = decoded_frame->colorspace;
1683 pFrame->color_primaries = decoded_frame->color_primaries;
1684 pFrame->color_trc = decoded_frame->color_trc;
1685 pFrame->chroma_location = decoded_frame->chroma_location;
1691 if (decoded_frame->pts != AV_NOPTS_VALUE) {
1694 video_pts = decoded_frame->pts;
1695 }
else if (decoded_frame->pkt_dts != AV_NOPTS_VALUE) {
1697 video_pts = decoded_frame->pkt_dts;
1701 "FFmpegReader::GetAVFrame (Successful frame received)",
"video_pts", video_pts,
"send_packet_pts", send_packet_pts);
1707 if (
hw_de_on && hw_de_supported && next_frame2 != next_frame) {
1712 avcodec_decode_video2(pCodecCtx, next_frame, &frameFinished, packet);
1718 if (frameFinished) {
1722 av_picture_copy((AVPicture *) pFrame, (AVPicture *) next_frame, pCodecCtx->pix_fmt,
info.
width,
1731 return frameFinished;
1734 bool FFmpegReader::ReopenWithoutHardwareDecode(int64_t requested_frame) {
1736 if (!hw_decode_failed || force_sw_decode) {
1741 "FFmpegReader::ReopenWithoutHardwareDecode (falling back to software decode)",
1742 "requested_frame", requested_frame,
1743 "video_packets_read", packet_status.
video_read,
1745 "hw_decode_error_count", hw_decode_error_count);
1747 force_sw_decode =
true;
1748 hw_decode_failed =
false;
1749 hw_decode_error_count = 0;
1753 Seek(requested_frame);
1756 (void) requested_frame;
1763 return hw_decode_succeeded;
1770 bool FFmpegReader::CheckSeek() {
1773 const int64_t kSeekRetryMax = 5;
1774 const int kSeekStagnantMax = 2;
1778 if ((is_video_seek && !seek_video_frame_found) || (!is_video_seek && !seek_audio_frame_found))
1786 int64_t max_seeked_frame = std::max(seek_audio_frame_found, seek_video_frame_found);
1788 if (max_seeked_frame == last_seek_max_frame) {
1789 seek_stagnant_count++;
1791 last_seek_max_frame = max_seeked_frame;
1792 seek_stagnant_count = 0;
1796 if (max_seeked_frame >= seeking_frame) {
1799 "is_video_seek", is_video_seek,
1800 "max_seeked_frame", max_seeked_frame,
1801 "seeking_frame", seeking_frame,
1802 "seeking_pts", seeking_pts,
1803 "seek_video_frame_found", seek_video_frame_found,
1804 "seek_audio_frame_found", seek_audio_frame_found);
1807 if (seek_count < kSeekRetryMax) {
1808 Seek(seeking_frame - (10 * seek_count * seek_count));
1809 }
else if (seek_stagnant_count >= kSeekStagnantMax) {
1811 Seek(seeking_frame - (10 * kSeekRetryMax * kSeekRetryMax));
1814 Seek(seeking_frame - (10 * seek_count * seek_count));
1819 "is_video_seek", is_video_seek,
1820 "packet->pts", GetPacketPTS(),
1821 "seeking_pts", seeking_pts,
1822 "seeking_frame", seeking_frame,
1823 "seek_video_frame_found", seek_video_frame_found,
1824 "seek_audio_frame_found", seek_audio_frame_found);
1838 void FFmpegReader::ProcessVideoPacket(int64_t requested_frame) {
1841 int frame_finished = GetAVFrame();
1844 if (!frame_finished) {
1847 RemoveAVFrame(pFrame);
1853 int64_t current_frame = ConvertVideoPTStoFrame(video_pts);
1856 if (!seek_video_frame_found && is_seeking)
1857 seek_video_frame_found = current_frame;
1863 working_cache.
Add(CreateFrame(requested_frame));
1869 AVPixelFormat decoded_pix_fmt = (pFrame && pFrame->format != AV_PIX_FMT_NONE)
1870 ?
static_cast<AVPixelFormat
>(pFrame->format)
1872 bool src_full_range = (pFrame && pFrame->color_range == AVCOL_RANGE_JPEG);
1873 AVPixelFormat src_pix_fmt = NormalizeDeprecatedPixFmt(decoded_pix_fmt, src_full_range);
1874 int src_width = (pFrame && pFrame->width > 0) ? pFrame->width :
info.
width;
1875 int src_height = (pFrame && pFrame->height > 0) ? pFrame->height :
info.
height;
1876 int height = src_height;
1877 int width = src_width;
1879 AVFrame *pFrameRGB = pFrameRGB_cached;
1882 if (pFrameRGB ==
nullptr)
1884 pFrameRGB_cached = pFrameRGB;
1887 uint8_t *buffer =
nullptr;
1908 max_width = std::max(
float(max_width), max_width * max_scale_x);
1909 max_height = std::max(
float(max_height), max_height * max_scale_y);
1915 QSize width_size(max_width * max_scale_x,
1918 max_height * max_scale_y);
1920 if (width_size.width() >= max_width && width_size.height() >= max_height) {
1921 max_width = std::max(max_width, width_size.width());
1922 max_height = std::max(max_height, width_size.height());
1924 max_width = std::max(max_width, height_size.width());
1925 max_height = std::max(max_height, height_size.height());
1932 float preview_ratio = 1.0;
1939 max_width =
info.
width * max_scale_x * preview_ratio;
1940 max_height =
info.
height * max_scale_y * preview_ratio;
1948 QSize bounded_size(max_width, max_height);
1950 if (bounded_size.width() > max_decode_size.width() ||
1951 bounded_size.height() > max_decode_size.height()) {
1952 bounded_size.scale(max_decode_size, Qt::KeepAspectRatio);
1953 max_width = bounded_size.width();
1954 max_height = bounded_size.height();
1959 int original_height = src_height;
1960 if (max_width != 0 && max_height != 0 && max_width < width && max_height < height) {
1962 float ratio = float(width) / float(height);
1963 int possible_width = round(max_height * ratio);
1964 int possible_height = round(max_width / ratio);
1966 if (possible_width <= max_width) {
1968 width = possible_width;
1969 height = max_height;
1973 height = possible_height;
1978 const int bytes_per_pixel = 4;
1979 int raw_buffer_size = (width * height * bytes_per_pixel) + 128;
1982 constexpr
size_t ALIGNMENT = 32;
1983 int buffer_size = ((raw_buffer_size + ALIGNMENT - 1) / ALIGNMENT) * ALIGNMENT;
1984 buffer = (
unsigned char*) aligned_malloc(buffer_size, ALIGNMENT);
1989 int scale_mode = SWS_FAST_BILINEAR;
1991 scale_mode = SWS_BICUBIC;
1993 img_convert_ctx = sws_getCachedContext(img_convert_ctx, src_width, src_height, src_pix_fmt, width, height,
PIX_FMT_RGBA, scale_mode, NULL, NULL, NULL);
1994 if (!img_convert_ctx)
1996 const int *src_coeff = sws_getCoefficients(SWS_CS_DEFAULT);
1997 const int *dst_coeff = sws_getCoefficients(SWS_CS_DEFAULT);
1998 const int dst_full_range = 1;
1999 sws_setColorspaceDetails(img_convert_ctx, src_coeff, src_full_range ? 1 : 0,
2000 dst_coeff, dst_full_range, 0, 1 << 16, 1 << 16);
2002 if (!pFrame || !pFrame->data[0] || pFrame->linesize[0] <= 0) {
2004 if (
hw_de_on && hw_de_supported && !force_sw_decode) {
2005 hw_decode_failed =
true;
2007 "FFmpegReader::ProcessVideoPacket (Invalid source frame; forcing software fallback)",
2008 "requested_frame", requested_frame,
2009 "current_frame", current_frame,
2010 "src_pix_fmt", src_pix_fmt,
2011 "src_width", src_width,
2012 "src_height", src_height);
2016 RemoveAVFrame(pFrame);
2023 const int scaled_lines = sws_scale(img_convert_ctx, pFrame->data, pFrame->linesize, 0,
2024 original_height, pFrameRGB->data, pFrameRGB->linesize);
2025 if (scaled_lines <= 0) {
2027 if (
hw_de_on && hw_de_supported && !force_sw_decode) {
2028 hw_decode_failed =
true;
2030 "FFmpegReader::ProcessVideoPacket (sws_scale failed; forcing software fallback)",
2031 "requested_frame", requested_frame,
2032 "current_frame", current_frame,
2033 "scaled_lines", scaled_lines,
2034 "src_pix_fmt", src_pix_fmt,
2035 "src_width", src_width,
2036 "src_height", src_height);
2041 RemoveAVFrame(pFrame);
2047 std::shared_ptr<Frame> f = CreateFrame(current_frame);
2050 if (!ffmpeg_has_alpha(src_pix_fmt)) {
2052 f->AddImage(width, height, bytes_per_pixel, QImage::Format_RGBA8888_Premultiplied, buffer);
2055 f->AddImage(width, height, bytes_per_pixel, QImage::Format_RGBA8888, buffer);
2059 working_cache.
Add(f);
2062 last_video_frame = f;
2068 RemoveAVFrame(pFrame);
2074 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::ProcessVideoPacket (After)",
"requested_frame", requested_frame,
"current_frame", current_frame,
"f->number", f->number,
"video_pts_seconds", video_pts_seconds);
2078 void FFmpegReader::ProcessAudioPacket(int64_t requested_frame) {
2081 if (packet && packet->pts != AV_NOPTS_VALUE) {
2083 location = GetAudioPTSLocation(packet->pts);
2086 if (!seek_audio_frame_found && is_seeking)
2087 seek_audio_frame_found = location.
frame;
2094 working_cache.
Add(CreateFrame(requested_frame));
2098 "requested_frame", requested_frame,
2099 "target_frame", location.
frame,
2103 int frame_finished = 0;
2107 int packet_samples = 0;
2111 int send_packet_err = avcodec_send_packet(aCodecCtx, packet);
2112 if (send_packet_err < 0 && send_packet_err != AVERROR_EOF) {
2116 int receive_frame_err = avcodec_receive_frame(aCodecCtx, audio_frame);
2117 if (receive_frame_err >= 0) {
2120 if (receive_frame_err == AVERROR_EOF) {
2124 if (receive_frame_err == AVERROR(EINVAL) || receive_frame_err == AVERROR_EOF) {
2126 avcodec_flush_buffers(aCodecCtx);
2128 if (receive_frame_err != 0) {
2133 int used = avcodec_decode_audio4(aCodecCtx, audio_frame, &frame_finished, packet);
2136 if (frame_finished) {
2142 audio_pts = audio_frame->pts;
2145 location = GetAudioPTSLocation(audio_pts);
2148 int plane_size = -1;
2154 data_size = av_samples_get_buffer_size(&plane_size, nb_channels,
2158 packet_samples = audio_frame->nb_samples * nb_channels;
2167 int pts_remaining_samples = packet_samples /
info.
channels;
2170 if (pts_remaining_samples == 0) {
2172 "packet_samples", packet_samples,
2174 "pts_remaining_samples", pts_remaining_samples);
2178 while (pts_remaining_samples) {
2183 int samples = samples_per_frame - previous_packet_location.
sample_start;
2184 if (samples > pts_remaining_samples)
2185 samples = pts_remaining_samples;
2188 pts_remaining_samples -= samples;
2190 if (pts_remaining_samples > 0) {
2192 previous_packet_location.
frame++;
2201 "packet_samples", packet_samples,
2209 audio_converted->nb_samples = audio_frame->nb_samples;
2210 av_samples_alloc(audio_converted->data, audio_converted->linesize,
info.
channels, audio_frame->nb_samples, AV_SAMPLE_FMT_FLTP, 0);
2217 AVChannelLayout input_layout = ffmpeg_get_valid_channel_layout(
2219 AVChannelLayout output_layout = ffmpeg_get_valid_channel_layout(
2221 int in_layout_err = av_opt_set_chlayout(avr,
"in_chlayout", &input_layout, 0);
2222 int out_layout_err = av_opt_set_chlayout(avr,
"out_chlayout", &output_layout, 0);
2230 av_opt_set_int(avr,
"out_sample_fmt", AV_SAMPLE_FMT_FLTP, 0);
2235 av_channel_layout_uninit(&input_layout);
2236 av_channel_layout_uninit(&output_layout);
2237 if (in_layout_err < 0 || out_layout_err < 0 || swr_init_err < 0) {
2239 throw InvalidChannels(
"Could not initialize FFmpeg audio channel layout or resampler.",
path);
2242 if (swr_init_err < 0) {
2252 audio_converted->data,
2253 audio_converted->linesize[0],
2254 audio_converted->nb_samples,
2256 audio_frame->linesize[0],
2257 audio_frame->nb_samples);
2260 int64_t starting_frame_number = -1;
2261 for (
int channel_filter = 0; channel_filter <
info.
channels; channel_filter++) {
2263 starting_frame_number = location.
frame;
2264 int channel_buffer_size = nb_samples;
2265 auto *channel_buffer = (
float *) (audio_converted->data[channel_filter]);
2269 int remaining_samples = channel_buffer_size;
2270 while (remaining_samples > 0) {
2275 int samples = std::fmin(samples_per_frame - start, remaining_samples);
2278 std::shared_ptr<Frame> f = CreateFrame(starting_frame_number);
2281 f->AddAudio(
true, channel_filter, start, channel_buffer, samples, 1.0f);
2285 "frame", starting_frame_number,
2288 "channel", channel_filter,
2289 "samples_per_frame", samples_per_frame);
2292 working_cache.
Add(f);
2295 remaining_samples -= samples;
2298 if (remaining_samples > 0)
2299 channel_buffer += samples;
2302 starting_frame_number++;
2310 av_free(audio_converted->data[0]);
2319 "requested_frame", requested_frame,
2320 "starting_frame", location.
frame,
2321 "end_frame", starting_frame_number - 1,
2322 "audio_pts_seconds", audio_pts_seconds);
2328 void FFmpegReader::Seek(int64_t requested_frame) {
2330 if (requested_frame < 1)
2331 requested_frame = 1;
2334 if (requested_frame > largest_frame_processed && packet_status.
end_of_file) {
2341 "requested_frame", requested_frame,
2342 "seek_count", seek_count,
2343 "last_frame", last_frame);
2346 working_cache.
Clear();
2350 video_pts_seconds = NO_PTS_OFFSET;
2352 audio_pts_seconds = NO_PTS_OFFSET;
2353 hold_packet =
false;
2355 current_video_frame = 0;
2356 largest_frame_processed = 0;
2357 last_final_video_frame.reset();
2362 packet_status.
reset(
false);
2368 int buffer_amount = 12;
2369 if (requested_frame - buffer_amount < 20) {
2383 if (seek_count == 1) {
2386 seeking_pts = ConvertFrameToVideoPTS(1);
2388 seek_audio_frame_found = 0;
2389 seek_video_frame_found = 0;
2393 bool seek_worked =
false;
2394 int64_t seek_target = 0;
2398 seek_target = ConvertFrameToVideoPTS(requested_frame - buffer_amount);
2403 is_video_seek =
true;
2410 seek_target = ConvertFrameToAudioPTS(requested_frame - buffer_amount);
2415 is_video_seek =
false;
2424 avcodec_flush_buffers(aCodecCtx);
2428 avcodec_flush_buffers(pCodecCtx);
2431 previous_packet_location.
frame = -1;
2436 if (seek_count == 1) {
2438 seeking_pts = seek_target;
2439 seeking_frame = requested_frame;
2441 seek_audio_frame_found = 0;
2442 seek_video_frame_found = 0;
2470 int64_t FFmpegReader::GetPacketPTS() {
2472 int64_t current_pts = packet->pts;
2473 if (current_pts == AV_NOPTS_VALUE && packet->dts != AV_NOPTS_VALUE)
2474 current_pts = packet->dts;
2480 return AV_NOPTS_VALUE;
2485 void FFmpegReader::UpdatePTSOffset() {
2486 if (pts_offset_seconds != NO_PTS_OFFSET) {
2490 pts_offset_seconds = 0.0;
2491 double video_pts_offset_seconds = 0.0;
2492 double audio_pts_offset_seconds = 0.0;
2494 bool has_video_pts =
false;
2497 has_video_pts =
true;
2499 bool has_audio_pts =
false;
2502 has_audio_pts =
true;
2506 while (!has_video_pts || !has_audio_pts) {
2508 if (GetNextPacket() < 0)
2513 int64_t pts = GetPacketPTS();
2516 if (!has_video_pts && packet->stream_index == videoStream) {
2522 if (std::abs(video_pts_offset_seconds) <= 10.0) {
2523 has_video_pts =
true;
2526 else if (!has_audio_pts && packet->stream_index == audioStream) {
2532 if (std::abs(audio_pts_offset_seconds) <= 10.0) {
2533 has_audio_pts =
true;
2543 pts_offset_seconds = video_pts_offset_seconds;
2545 pts_offset_seconds = audio_pts_offset_seconds;
2546 }
else if (has_video_pts && has_audio_pts) {
2548 pts_offset_seconds = video_pts_offset_seconds;
2553 int64_t FFmpegReader::ConvertVideoPTStoFrame(int64_t pts) {
2555 int64_t previous_video_frame = current_video_frame;
2557 const double video_timebase_value =
2563 double video_seconds = (double(pts) * video_timebase_value) + pts_offset_seconds;
2566 int64_t frame = round(video_seconds * fps_value) + 1;
2569 if (current_video_frame == 0)
2570 current_video_frame = frame;
2574 if (frame == previous_video_frame) {
2579 current_video_frame++;
2588 int64_t FFmpegReader::ConvertFrameToVideoPTS(int64_t frame_number) {
2590 const double video_timebase_value =
2596 double seconds = (double(frame_number - 1) / fps_value) + pts_offset_seconds;
2599 int64_t video_pts = round(seconds / video_timebase_value);
2606 int64_t FFmpegReader::ConvertFrameToAudioPTS(int64_t frame_number) {
2608 const double audio_timebase_value =
2614 double seconds = (double(frame_number - 1) / fps_value) + pts_offset_seconds;
2617 int64_t audio_pts = round(seconds / audio_timebase_value);
2624 AudioLocation FFmpegReader::GetAudioPTSLocation(int64_t pts) {
2625 const double audio_timebase_value =
2632 double audio_seconds = (double(pts) * audio_timebase_value) + pts_offset_seconds;
2635 double frame = (audio_seconds * fps_value) + 1;
2638 int64_t whole_frame = int64_t(frame);
2641 double sample_start_percentage = frame - double(whole_frame);
2647 int sample_start = round(
double(samples_per_frame) * sample_start_percentage);
2650 if (whole_frame < 1)
2652 if (sample_start < 0)
2659 if (previous_packet_location.
frame != -1) {
2660 if (location.
is_near(previous_packet_location, samples_per_frame, samples_per_frame)) {
2661 int64_t orig_frame = location.
frame;
2666 location.
frame = previous_packet_location.
frame;
2669 ZmqLogger::Instance()->
AppendDebugMethod(
"FFmpegReader::GetAudioPTSLocation (Audio Gap Detected)",
"Source Frame", orig_frame,
"Source Audio Sample", orig_start,
"Target Frame", location.
frame,
"Target Audio Sample", location.
sample_start,
"pts", pts);
2678 previous_packet_location = location;
2685 std::shared_ptr<Frame> FFmpegReader::CreateFrame(int64_t requested_frame) {
2687 std::shared_ptr<Frame> output = working_cache.
GetFrame(requested_frame);
2691 output = working_cache.
GetFrame(requested_frame);
2692 if(output)
return output;
2700 working_cache.
Add(output);
2703 if (requested_frame > largest_frame_processed)
2704 largest_frame_processed = requested_frame;
2711 bool FFmpegReader::IsPartialFrame(int64_t requested_frame) {
2714 bool seek_trash =
false;
2715 int64_t max_seeked_frame = seek_audio_frame_found;
2716 if (seek_video_frame_found > max_seeked_frame) {
2717 max_seeked_frame = seek_video_frame_found;
2719 if ((
info.
has_audio && seek_audio_frame_found && max_seeked_frame >= requested_frame) ||
2720 (
info.
has_video && seek_video_frame_found && max_seeked_frame >= requested_frame)) {
2728 void FFmpegReader::CheckWorkingFrames(int64_t requested_frame) {
2731 const std::lock_guard<std::recursive_mutex> lock(
getFrameMutex);
2734 std::vector<std::shared_ptr<openshot::Frame>> working_frames = working_cache.
GetFrames();
2735 std::vector<std::shared_ptr<openshot::Frame>>::iterator working_itr;
2738 for(working_itr = working_frames.begin(); working_itr != working_frames.end(); ++working_itr)
2741 std::shared_ptr<Frame> f = *working_itr;
2744 if (!f || f->number > requested_frame) {
2750 double frame_pts_seconds = (double(f->number - 1) /
info.
fps.
ToDouble()) + pts_offset_seconds;
2751 double recent_pts_seconds = std::max(video_pts_seconds, audio_pts_seconds);
2754 bool is_video_ready =
false;
2755 bool is_audio_ready =
false;
2756 double recent_pts_diff = recent_pts_seconds - frame_pts_seconds;
2757 if ((frame_pts_seconds <= video_pts_seconds)
2758 || (recent_pts_diff > 1.5)
2762 is_video_ready =
true;
2764 "frame_number", f->number,
2765 "frame_pts_seconds", frame_pts_seconds,
2766 "video_pts_seconds", video_pts_seconds,
2767 "recent_pts_diff", recent_pts_diff);
2773 if (previous_frame_instance && previous_frame_instance->has_image_data) {
2774 f->AddImage(std::make_shared<QImage>(previous_frame_instance->GetImage()->copy()));
2778 if (!f->has_image_data
2779 && last_final_video_frame
2780 && last_final_video_frame->has_image_data
2781 && last_final_video_frame->number <= f->number) {
2782 f->AddImage(std::make_shared<QImage>(last_final_video_frame->GetImage()->copy()));
2786 if (!f->has_image_data
2788 && last_video_frame->has_image_data
2789 && last_video_frame->number <= f->number) {
2790 f->AddImage(std::make_shared<QImage>(last_video_frame->GetImage()->copy()));
2794 if (!f->has_image_data) {
2796 "FFmpegReader::CheckWorkingFrames (no previous image found; using black frame)",
2797 "frame_number", f->number);
2798 f->AddColor(
"#000000");
2803 double audio_pts_diff = audio_pts_seconds - frame_pts_seconds;
2804 if ((frame_pts_seconds < audio_pts_seconds && audio_pts_diff > 1.0)
2805 || (recent_pts_diff > 1.5)
2810 is_audio_ready =
true;
2812 "frame_number", f->number,
2813 "frame_pts_seconds", frame_pts_seconds,
2814 "audio_pts_seconds", audio_pts_seconds,
2815 "audio_pts_diff", audio_pts_diff,
2816 "recent_pts_diff", recent_pts_diff);
2818 bool is_seek_trash = IsPartialFrame(f->number);
2826 "frame_number", f->number,
2827 "is_video_ready", is_video_ready,
2828 "is_audio_ready", is_audio_ready,
2835 && !packet_status.
end_of_file && !is_seek_trash) {
2840 if (previous_frame_instance && previous_frame_instance->has_image_data) {
2841 f->AddImage(std::make_shared<QImage>(previous_frame_instance->GetImage()->copy()));
2843 if (!f->has_image_data
2844 && last_final_video_frame
2845 && last_final_video_frame->has_image_data
2846 && last_final_video_frame->number <= f->number) {
2847 f->AddImage(std::make_shared<QImage>(last_final_video_frame->GetImage()->copy()));
2849 if (!f->has_image_data
2851 && last_video_frame->has_image_data
2852 && last_video_frame->number <= f->number) {
2853 f->AddImage(std::make_shared<QImage>(last_video_frame->GetImage()->copy()));
2860 if (!f->has_image_data && is_video_ready && is_audio_ready) {
2862 if (previous_frame_instance && previous_frame_instance->has_image_data) {
2863 f->AddImage(std::make_shared<QImage>(previous_frame_instance->GetImage()->copy()));
2865 if (!f->has_image_data
2866 && last_final_video_frame
2867 && last_final_video_frame->has_image_data
2868 && last_final_video_frame->number <= f->number) {
2869 f->AddImage(std::make_shared<QImage>(last_final_video_frame->GetImage()->copy()));
2871 if (!f->has_image_data
2873 && last_video_frame->has_image_data
2874 && last_video_frame->number <= f->number) {
2875 f->AddImage(std::make_shared<QImage>(last_video_frame->GetImage()->copy()));
2881 if (!f->has_image_data) {
2885 if ((!packet_status.
end_of_file && is_video_ready && is_audio_ready) || packet_status.
end_of_file || is_seek_trash) {
2888 "requested_frame", requested_frame,
2889 "f->number", f->number,
2890 "is_seek_trash", is_seek_trash,
2891 "Working Cache Count", working_cache.
Count(),
2895 if (!is_seek_trash) {
2898 if (f->has_image_data) {
2899 last_final_video_frame = f;
2903 working_cache.
Remove(f->number);
2906 last_frame = f->number;
2909 working_cache.
Remove(f->number);
2916 working_frames.clear();
2917 working_frames.shrink_to_fit();
2921 void FFmpegReader::CheckFPS() {
2929 int frames_per_second[3] = {0,0,0};
2930 int max_fps_index =
sizeof(frames_per_second) /
sizeof(frames_per_second[0]);
2933 int all_frames_detected = 0;
2934 int starting_frames_detected = 0;
2939 if (GetNextPacket() < 0)
2944 if (packet->stream_index == videoStream) {
2947 fps_index = int(video_seconds);
2950 if (fps_index >= 0 && fps_index < max_fps_index) {
2952 starting_frames_detected++;
2953 frames_per_second[fps_index]++;
2957 all_frames_detected++;
2962 float avg_fps = 30.0;
2963 if (starting_frames_detected > 0 && fps_index > 0) {
2964 avg_fps = float(starting_frames_detected) / std::min(fps_index, max_fps_index);
2968 if (avg_fps < 8.0) {
2977 if (all_frames_detected > 0) {
2991 void FFmpegReader::RemoveAVFrame(AVFrame *remove_frame) {
2995 av_freep(&remove_frame->data[0]);
3003 void FFmpegReader::RemoveAVPacket(AVPacket *remove_packet) {
3008 delete remove_packet;
3023 root[
"type"] =
"FFmpegReader";
3024 root[
"path"] =
path;
3025 switch (duration_strategy) {
3027 root[
"duration_strategy"] =
"VideoPreferred";
3030 root[
"duration_strategy"] =
"AudioPreferred";
3034 root[
"duration_strategy"] =
"LongestStream";
3051 catch (
const std::exception& e) {
3053 throw InvalidJSON(
"JSON is invalid (missing keys or invalid data types)");
3064 if (!root[
"path"].isNull())
3065 path = root[
"path"].asString();
3066 if (!root[
"duration_strategy"].isNull()) {
3067 const std::string strategy = root[
"duration_strategy"].asString();
3068 if (strategy ==
"VideoPreferred") {
3070 }
else if (strategy ==
"AudioPreferred") {
Shared helpers for Crop effect scaling logic.
Header file for all Exception classes.
AVPixelFormat hw_de_av_pix_fmt_global
AVHWDeviceType hw_de_av_device_type_global
Header file for FFmpegReader class.
Header file for FFmpegUtilities.
#define AV_FREE_CONTEXT(av_context)
#define AV_FREE_FRAME(av_frame)
#define SWR_CONVERT(ctx, out, linesize, out_count, in, linesize2, in_count)
#define AV_GET_CODEC_TYPE(av_stream)
#define AV_GET_CODEC_PIXEL_FORMAT(av_stream, av_context)
#define AV_GET_CODEC_CONTEXT(av_stream, av_codec)
#define AV_FIND_DECODER_CODEC_ID(av_stream)
#define AV_ALLOCATE_FRAME()
#define AV_COPY_PICTURE_DATA(av_frame, buffer, pix_fmt, width, height)
#define AV_FREE_PACKET(av_packet)
#define AVCODEC_REGISTER_ALL
#define AV_GET_CODEC_ATTRIBUTES(av_stream, av_context)
#define AV_ALLOCATE_IMAGE(av_frame, pix_fmt, width, height)
#define AV_GET_SAMPLE_FORMAT(av_stream, av_context)
#define AV_RESET_FRAME(av_frame)
Cross-platform helper to encourage returning freed memory to the OS.
#define FF_VIDEO_NUM_PROCESSORS
#define OPEN_MP_NUM_PROCESSORS
#define FF_AUDIO_NUM_PROCESSORS
Header file for Timeline class.
Header file for ZeroMQ-based Logger class.
void SetMaxBytesFromInfo(int64_t number_of_frames, int width, int height, int sample_rate, int channels)
Set maximum bytes to a different amount based on a ReaderInfo struct.
int64_t Count()
Count the frames in the queue.
void Add(std::shared_ptr< openshot::Frame > frame)
Add a Frame to the cache.
std::shared_ptr< openshot::Frame > GetFrame(int64_t frame_number)
Get a frame from the cache.
std::vector< std::shared_ptr< openshot::Frame > > GetFrames()
Get an array of all Frames.
void Remove(int64_t frame_number)
Remove a specific frame.
void Clear()
Clear the cache of all frames.
This class represents a clip (used to arrange readers on the timeline)
openshot::Keyframe scale_x
Curve representing the horizontal scaling in percent (0 to 1)
openshot::TimelineBase * ParentTimeline() override
Get the associated Timeline pointer (if any)
openshot::Keyframe scale_y
Curve representing the vertical scaling in percent (0 to 1)
openshot::ScaleType scale
The scale determines how a clip should be resized to fit its parent.
double Y
The Y value of the coordinate (usually representing the value of the property being animated)
This class uses the FFmpeg libraries, to open video files and audio files, and return openshot::Frame...
void Open() override
Open File - which is called by the constructor automatically.
bool HardwareDecodeSuccessful() const override
Return true if hardware decode was requested and successfully produced at least one frame.
FFmpegReader(const std::string &path, bool inspect_reader=true)
Constructor for FFmpegReader.
Json::Value JsonValue() const override
Generate Json::Value for this object.
bool GetIsDurationKnown()
Return true if frame can be read with GetFrame()
void SetJsonValue(const Json::Value root) override
Load Json::Value into this object.
CacheMemory final_cache
Final cache object used to hold final frames.
virtual ~FFmpegReader()
Destructor.
std::string Json() const override
Generate JSON string of this object.
std::shared_ptr< openshot::Frame > GetFrame(int64_t requested_frame) override
void Close() override
Close File.
void SetJson(const std::string value) override
Load JSON string into this object.
This class represents a fraction.
int num
Numerator for the fraction.
float ToFloat()
Return this fraction as a float (i.e. 1/2 = 0.5)
double ToDouble() const
Return this fraction as a double (i.e. 1/2 = 0.5)
Fraction Reciprocal() const
Return the reciprocal as a Fraction.
int den
Denominator for the fraction.
int GetSamplesPerFrame(openshot::Fraction fps, int sample_rate, int channels)
Calculate the # of samples per video frame (for the current frame number)
Exception when an invalid # of audio channels are detected.
Exception when no valid codec is found for a file.
Exception for files that can not be found or opened.
Exception for invalid JSON.
Point GetMaxPoint() const
Get max point (by Y coordinate)
Exception when no streams are found in the file.
Exception when memory could not be allocated.
Coordinate co
This is the primary coordinate.
openshot::ReaderInfo info
Information about the current media file.
int MaxDecodeWidth() const
Return the current maximum decoded frame width (0 when unlimited).
virtual void SetJsonValue(const Json::Value root)=0
Load Json::Value into this object.
virtual Json::Value JsonValue() const =0
Generate Json::Value for this object.
int MaxDecodeHeight() const
Return the current maximum decoded frame height (0 when unlimited).
std::recursive_mutex getFrameMutex
Mutex for multiple threads.
bool HasMaxDecodeSize() const
Return true when a maximum decoded frame size is active.
openshot::ClipBase * ParentClip()
Parent clip object of this reader (which can be unparented and NULL)
Exception when a reader is closed, and a frame is requested.
int DE_LIMIT_WIDTH_MAX
Maximum columns that hardware decode can handle.
int HW_DE_DEVICE_SET
Which GPU to use to decode (0 is the first)
int DE_LIMIT_HEIGHT_MAX
Maximum rows that hardware decode can handle.
static Settings * Instance()
Create or get an instance of this logger singleton (invoke the class with this method)
int HARDWARE_DECODER
Use video codec for faster video decoding (if supported)
int preview_height
Optional preview width of timeline image. If your preview window is smaller than the timeline,...
int preview_width
Optional preview width of timeline image. If your preview window is smaller than the timeline,...
This class represents a timeline.
void Log(std::string message)
Log message to all subscribers of this logger (if any)
void AppendDebugMethod(std::string method_name, std::string arg1_name="", float arg1_value=-1.0, std::string arg2_name="", float arg2_value=-1.0, std::string arg3_name="", float arg3_value=-1.0, std::string arg4_name="", float arg4_value=-1.0, std::string arg5_name="", float arg5_value=-1.0, std::string arg6_name="", float arg6_value=-1.0)
Append debug information.
static ZmqLogger * Instance()
Create or get an instance of this logger singleton (invoke the class with this method)
This namespace is the default namespace for all code in the openshot library.
@ SCALE_FIT
Scale the clip until either height or width fills the canvas (with no cropping)
@ SCALE_STRETCH
Scale the clip until both height and width fill the canvas (distort to fit)
@ SCALE_CROP
Scale the clip until both height and width fill the canvas (cropping the overlap)
ChannelLayout
This enumeration determines the audio channel layout (such as stereo, mono, 5 point surround,...
DurationStrategy
This enumeration determines which duration source to favor.
@ VideoPreferred
Prefer the video stream's duration, fallback to audio then container.
@ LongestStream
Use the longest value from video, audio, or container.
@ AudioPreferred
Prefer the audio stream's duration, fallback to video then container.
void ApplyCropResizeScale(Clip *clip, int source_width, int source_height, int &max_width, int &max_height)
Scale the requested max_width / max_height based on the Crop resize amount, capped by source size.
const Json::Value stringToJson(const std::string value)
This struct holds the associated video frame and starting sample # for an audio packet.
bool is_near(AudioLocation location, int samples_per_frame, int64_t amount)
int64_t packets_decoded()
int audio_bit_rate
The bit rate of the audio stream (in bytes)
int video_bit_rate
The bit rate of the video stream (in bytes)
bool has_single_image
Determines if this file only contains a single image.
float duration
Length of time (in seconds)
openshot::Fraction audio_timebase
The audio timebase determines how long each audio packet should be played.
int width
The width of the video (in pixesl)
int channels
The number of audio channels used in the audio stream.
openshot::Fraction fps
Frames per second, as a fraction (i.e. 24/1 = 24 fps)
openshot::Fraction display_ratio
The ratio of width to height of the video stream (i.e. 640x480 has a ratio of 4/3)
int height
The height of the video (in pixels)
int pixel_format
The pixel format (i.e. YUV420P, RGB24, etc...)
int64_t video_length
The number of frames in the video stream.
std::string acodec
The name of the audio codec used to encode / decode the video stream.
std::map< std::string, std::string > metadata
An optional map/dictionary of metadata for this reader.
std::string vcodec
The name of the video codec used to encode / decode the video stream.
openshot::Fraction pixel_ratio
The pixel ratio of the video stream as a fraction (i.e. some pixels are not square)
openshot::ChannelLayout channel_layout
The channel layout (mono, stereo, 5 point surround, etc...)
bool has_video
Determines if this file has a video stream.
bool has_audio
Determines if this file has an audio stream.
openshot::Fraction video_timebase
The video timebase determines how long each frame stays on the screen.
int video_stream_index
The index of the video stream.
int sample_rate
The number of audio samples per second (44100 is a common sample rate)
int audio_stream_index
The index of the audio stream.
int64_t file_size
Size of file (in bytes)