Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ static std::shared_ptr<dnnl::convolution_forward::primitive_desc> get_convolutio
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

random spot)
Some thoughts about this PR:

  • Separation of layout_to_memory_desc looks good. Especially against use_strides case.
  • Does it cleanup the lambda function that was introduced in https://github.com/openvinotoolkit/openvino/pull/32391/files?
  • layout_to_memory_desc is separated into multiple functions, but it is implemented based on calculate_memory_dims. Does it bring value?
  • I think some call path is too deep. Is it inevitable? For example:
    layout_to_memory_desc_grouped -> calculate_memory_dims -> calculate_default_dims -> calculate_3d_tensor_dims

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated


if (prim->bias.is_valid()) {
auto bias_md = onednn::layout_to_memory_desc(impl_params.get_input_layout(2), dnnl::memory::format_tag::any, onednn::mem_flags::flatten);
auto bias_md = onednn::layout_to_memory_desc_flatten(impl_params.get_input_layout(2), dnnl::memory::format_tag::any);
return std::make_shared<dnnl::convolution_forward::primitive_desc>(
engine.get_onednn_engine(),
dnnl::prop_kind::forward_inference,
Expand Down Expand Up @@ -178,7 +178,7 @@ struct convolution_onednn : typed_primitive_onednn_impl<convolution> {
a_zp = a_zp_node.get_attached_memory_ptr();
}

dnnl::memory::desc desc = onednn::layout_to_memory_desc(a_zp->get_layout(), dnnl::memory::format_tag::a, onednn::mem_flags::flatten);
dnnl::memory::desc desc = onednn::layout_to_memory_desc_flatten(a_zp->get_layout(), dnnl::memory::format_tag::a);
args.insert({DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_SRC, a_zp->get_onednn_memory(desc)});

GPU_DEBUG_TRACE_DETAIL << instance.id() << " activations_zero_points: "
Expand All @@ -187,7 +187,7 @@ struct convolution_onednn : typed_primitive_onednn_impl<convolution> {

if (instance.weights_zero_points_term()) {
auto w_zp = instance.weights_zero_points_memory();
dnnl::memory::desc desc = onednn::layout_to_memory_desc(w_zp->get_layout(), dnnl::memory::format_tag::a, onednn::mem_flags::flatten);
dnnl::memory::desc desc = onednn::layout_to_memory_desc_flatten(w_zp->get_layout(), dnnl::memory::format_tag::a);
args.insert({DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_WEIGHTS, w_zp->get_onednn_memory(desc)});

GPU_DEBUG_TRACE_DETAIL << instance.id() << " weights_zero_points: "
Expand Down Expand Up @@ -263,7 +263,7 @@ struct convolution_onednn : typed_primitive_onednn_impl<convolution> {
auto shape_consistent = onednn::keep_weights_reorder_shape_consistent(source_weights_layout, target_weights_desc);
OPENVINO_ASSERT(shape_consistent, "[GPU] Input shape and output shape of weight reorder should be same.");

auto source_weights_desc = onednn::layout_to_memory_desc(source_weights_layout);
auto source_weights_desc = onednn::layout_to_memory_desc(source_weights_layout, dnnl::memory::format_tag::undef);

const bool weights_format = true;
auto traits = convert_memory_desc_to_traits(target_weights_desc, weights_format, grouped_weights);
Expand Down Expand Up @@ -352,7 +352,7 @@ struct convolution_onednn : typed_primitive_onednn_impl<convolution> {
*_attrs.get());
_pd = *prim_desc;
} else {
auto bias_md = onednn::layout_to_memory_desc(impl_params->get_input_layout(2), dnnl::memory::format_tag::any, onednn::mem_flags::flatten);
auto bias_md = onednn::layout_to_memory_desc_flatten(impl_params->get_input_layout(2), dnnl::memory::format_tag::any);
auto prim_desc = std::make_shared<dnnl::convolution_forward::primitive_desc>(
ib.get_engine().get_onednn_engine(),
dnnl::prop_kind::forward_inference, dnnl::algorithm::convolution_direct,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ static std::shared_ptr<dnnl::deconvolution_forward::primitive_desc> get_deconvol
}

if (prim->bias.is_valid()) {
auto bias_md = onednn::layout_to_memory_desc(impl_params.get_input_layout(2), dnnl::memory::format_tag::any, onednn::mem_flags::flatten);
auto bias_md = onednn::layout_to_memory_desc_flatten(impl_params.get_input_layout(2), dnnl::memory::format_tag::any);
return std::make_shared<dnnl::deconvolution_forward::primitive_desc>(
engine.get_onednn_engine(),
dnnl::prop_kind::forward_inference,
Expand Down Expand Up @@ -192,7 +192,7 @@ struct deconvolution_onednn : typed_primitive_onednn_impl<deconvolution> {
*_attrs.get());
_pd = *prim_desc;
} else {
auto bias_md = onednn::layout_to_memory_desc(impl_params->get_input_layout(2), dnnl::memory::format_tag::any, onednn::mem_flags::flatten);
auto bias_md = onednn::layout_to_memory_desc_flatten(impl_params->get_input_layout(2), dnnl::memory::format_tag::any);
auto prim_desc = std::make_shared<dnnl::deconvolution_forward::primitive_desc>(
ib.get_engine().get_onednn_engine(),
dnnl::prop_kind::forward_inference, dnnl::algorithm::deconvolution_direct,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,37 +69,36 @@ struct fully_connected_onednn : typed_primitive_onednn_impl<fully_connected> {
if (prim->decompression_scale.is_valid()) {
auto decompression_scale_idx = idx++;
auto scale_mem = instance.dep_memory_ptr(decompression_scale_idx);
dnnl::memory::desc desc = onednn::layout_to_memory_desc(scale_mem->get_layout(), dnnl::memory::format_tag::a, onednn::mem_flags::flatten);
dnnl::memory::desc desc = onednn::layout_to_memory_desc_flatten(scale_mem->get_layout(), dnnl::memory::format_tag::a);
args.insert({DNNL_ARG_ATTR_SCALES | DNNL_ARG_WEIGHTS, scale_mem->get_onednn_memory(desc)});
}

if (prim->decompression_zero_point.is_valid()) {
auto decompression_zp_idx = idx++;
auto zp_mem = instance.dep_memory_ptr(decompression_zp_idx);
dnnl::memory::desc desc = onednn::layout_to_memory_desc(zp_mem->get_layout(), dnnl::memory::format_tag::a, onednn::mem_flags::flatten);
dnnl::memory::desc desc = onednn::layout_to_memory_desc_flatten(zp_mem->get_layout(), dnnl::memory::format_tag::a);
args.insert({DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_WEIGHTS, zp_mem->get_onednn_memory(desc)});
}
bool is_dyn_quan_input = instance.get_input_layout(0).data_type == data_types::i8 || instance.get_input_layout(0).data_type == data_types::u8;

if (is_dyn_quan_input && prim->activation_scale.is_valid()) {
auto activation_scale_idx = idx++;
auto act_scale_mem = instance.dep_memory_ptr(activation_scale_idx);
dnnl::memory::desc desc = onednn::layout_to_memory_desc(act_scale_mem->get_layout(), dnnl::memory::format_tag::ab, onednn::mem_flags::flatten);
dnnl::memory::desc desc = onednn::layout_to_memory_desc_flatten(act_scale_mem->get_layout(), dnnl::memory::format_tag::ab);
args.insert({DNNL_ARG_ATTR_SCALES | DNNL_ARG_SRC_0, act_scale_mem->get_onednn_memory(desc)});
}

if (is_dyn_quan_input && prim->activation_zero_point.is_valid()) {
auto activation_zp_idx = idx++;
auto act_zp_mem = instance.dep_memory_ptr(activation_zp_idx);
dnnl::memory::desc desc = onednn::layout_to_memory_desc(act_zp_mem->get_layout(), dnnl::memory::format_tag::ab, onednn::mem_flags::flatten);
dnnl::memory::desc desc = onednn::layout_to_memory_desc_flatten(act_zp_mem->get_layout(), dnnl::memory::format_tag::ab);
args.insert({DNNL_ARG_ATTR_ZERO_POINTS | DNNL_ARG_SRC_0, act_zp_mem->get_onednn_memory(desc)});
}

if (is_dyn_quan_input && prim->activation_precomputed_reduction.is_valid()) {
auto activation_precomputed_reduction_idx = idx++;
auto act_precomputed_reduction_mem = instance.dep_memory_ptr(activation_precomputed_reduction_idx);
dnnl::memory::desc desc = onednn::layout_to_memory_desc(act_precomputed_reduction_mem->get_layout(),
dnnl::memory::format_tag::ab, onednn::mem_flags::flatten);
dnnl::memory::desc desc = onednn::layout_to_memory_desc_flatten(act_precomputed_reduction_mem->get_layout(), dnnl::memory::format_tag::ab);
args.insert({DNNL_ARG_ATTR_PRECOMPUTED_REDUCTIONS | DNNL_ARG_SRC_0, act_precomputed_reduction_mem->get_onednn_memory(desc)});
}
}
Expand Down Expand Up @@ -197,13 +196,16 @@ struct fully_connected_onednn : typed_primitive_onednn_impl<fully_connected> {
weights_layout.format = input_layout.format;
}

auto use_strides_for_weight_md = (weights_layout.data_padding
&& format::is_default_format(weights_layout.format)
&& (weights_layout.data_type == data_types::i4 || weights_layout.data_type == data_types::u4)) ?
onednn::mem_flags::use_strides : onednn::mem_flags::None;
dnnl::memory::desc weights_md;
if (weights_layout.data_padding
&& format::is_default_format(weights_layout.format)
&& (weights_layout.data_type == data_types::i4 || weights_layout.data_type == data_types::u4)) {
weights_md = onednn::layout_to_memory_desc_strides(weights_layout, weights_fmt);
} else {
weights_md = onednn::layout_to_memory_desc(weights_layout, weights_fmt);
}

dnnl::memory::desc input_md = onednn::layout_to_memory_desc(input_layout, target_fmt);
dnnl::memory::desc weights_md = onednn::layout_to_memory_desc(weights_layout, weights_fmt, use_strides_for_weight_md);
dnnl::memory::desc output_md = onednn::layout_to_memory_desc(output_layout, target_fmt);

if (has_bias) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -268,10 +268,9 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl<PType> {
_post_ops.append_binary(aalgorithm,
dnnl::memory::desc(fused_desc.at(idx).dims, fused_desc.at(idx).dt, fused_desc.at(idx).tag));
} else {
dnnl::memory::desc md = onednn::layout_to_memory_desc(
impl_params->get_input_layout(fused_desc.at(idx).mem_dep),
fused_desc.at(idx).tag,
(fused_desc.at(idx).flatten ? onednn::mem_flags::flatten : onednn::mem_flags::None));
dnnl::memory::desc md = fused_desc.at(idx).flatten
? onednn::layout_to_memory_desc_flatten(impl_params->get_input_layout(fused_desc.at(idx).mem_dep), fused_desc.at(idx).tag)
: onednn::layout_to_memory_desc(impl_params->get_input_layout(fused_desc.at(idx).mem_dep), fused_desc.at(idx).tag);

_post_ops.append_binary(aalgorithm, md);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ struct reduction_onednn : typed_primitive_onednn_impl<reduce> {
// oneDNN reduction does not allow this. So this function reverts it.
reorder_unreduced_axis_no_fusion(input_layout, output_layout, prim->axes);

auto input_md = onednn::layout_to_memory_desc(input_layout, dnnl::memory::format_tag::undef, mem_flags::need_blocked);
auto output_md = onednn::layout_to_memory_desc(output_layout, dnnl::memory::format_tag::undef, mem_flags::need_blocked);
auto input_md = onednn::layout_to_memory_desc_blocked(input_layout, dnnl::memory::format_tag::undef);
auto output_md = onednn::layout_to_memory_desc_blocked(output_layout, dnnl::memory::format_tag::undef);

float p = 0.f;
float eps = 0.f;
Expand Down Expand Up @@ -122,8 +122,8 @@ struct reduction_onednn : typed_primitive_onednn_impl<reduce> {
dnnl::algorithm alg;
ib >> make_data(&alg, sizeof(dnnl::algorithm));

auto input_md = onednn::layout_to_memory_desc(impl_params->get_input_layout(0), dnnl::memory::format_tag::undef, mem_flags::need_blocked);
auto output_md = onednn::layout_to_memory_desc(impl_params->get_output_layout(), dnnl::memory::format_tag::undef, mem_flags::need_blocked);
auto input_md = onednn::layout_to_memory_desc_blocked(impl_params->get_input_layout(0), dnnl::memory::format_tag::undef);
auto output_md = onednn::layout_to_memory_desc_blocked(impl_params->get_output_layout(), dnnl::memory::format_tag::undef);

float p, eps;
ib >> p >> eps;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ struct reorder_onednn : typed_primitive_onednn_impl<reorder, dnnl::reorder::prim
auto input_layout = impl_params.get_input_layout(0);
auto output_layout = impl_params.get_output_layout();

auto input_md = onednn::layout_to_memory_desc(input_layout, dnnl::memory::format_tag::undef, onednn::mem_flags::need_blocked);
auto output_md = onednn::layout_to_memory_desc(output_layout, dnnl::memory::format_tag::undef, onednn::mem_flags::need_blocked);
auto input_md = onednn::layout_to_memory_desc_blocked(input_layout, dnnl::memory::format_tag::undef);
auto output_md = onednn::layout_to_memory_desc_blocked(output_layout, dnnl::memory::format_tag::undef);

OPENVINO_ASSERT(input_md.get_format_kind() != dnnl::memory::format_kind::any,
"[GPU] The format kind of the input memory descriptor of onednn reorder cannot be 'any'.");
Expand Down Expand Up @@ -87,8 +87,8 @@ struct reorder_onednn : typed_primitive_onednn_impl<reorder, dnnl::reorder::prim

const kernel_impl_params* impl_params = reinterpret_cast<kernel_impl_params*>(ib.getKernelImplParams());

auto input_md = onednn::layout_to_memory_desc(impl_params->get_input_layout(0), dnnl::memory::format_tag::undef, onednn::mem_flags::need_blocked);
auto output_md = onednn::layout_to_memory_desc(impl_params->get_output_layout(), dnnl::memory::format_tag::undef, onednn::mem_flags::need_blocked);
auto input_md = onednn::layout_to_memory_desc_blocked(impl_params->get_input_layout(0), dnnl::memory::format_tag::undef);
auto output_md = onednn::layout_to_memory_desc_blocked(impl_params->get_output_layout(), dnnl::memory::format_tag::undef);

auto prim_desc = std::make_shared<dnnl::reorder::primitive_desc>(
ib.get_engine().get_onednn_engine(),
Expand Down
Loading
Loading