Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions js/web/lib/wasm/jsep/webgpu/ops/attention.ts
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,7 @@ const createInPlaceSoftmaxProgramInfo = (
let local_offset = local_idx * uniforms.elements_per_thread;
let offset = (global_idx / ${WG}) * uniforms.total_sequence_length + local_offset;
let seq_causal_length = ${seqLens ? 'u32(past_sequence_length + workgroup_id.y + 1)' : 'total_sequence_length'};
var thread_max_vector = ${f32Type}(-3.402823e+38f);
var thread_max_vector = ${f32Type}(-3.4028234663852886e+38f);
for (var i: u32 = 0; i < uniforms.elements_per_thread && i + local_offset < seq_causal_length; i++) {
thread_max_vector = max(${f32Type}(x[offset + i]), thread_max_vector);
}
Expand All @@ -378,7 +378,7 @@ const createInPlaceSoftmaxProgramInfo = (
})()};
workgroupBarrier();

var max_value = f32(-3.402823e+38f);
var max_value = f32(-3.4028234663852886e+38f);
for (var i = 0u; i < ${WG}; i++) {
max_value = max(thread_max[i], max_value);
}
Expand Down
2 changes: 1 addition & 1 deletion js/web/lib/wasm/jsep/webgpu/ops/softmax.ts
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ const createSoftmaxProgramInfo = (context: ComputeContext, attributes: SoftmaxAt
// 6.2.4 in wgsl spec
const threadMaxDecl =
tensorTypeToWsglStorageType(transposedInput.dataType) === 'f32'
? `var threadMax = ${valueType}(-3.402823e+38f);`
? `var threadMax = ${valueType}(-3.4028234663852886e+38f);`
: `var threadMax = ${valueType}(-65504.0h);`;
const getShaderSource = (shaderHelper: ShaderHelper) => `
var<workgroup> rowMaxShared : ${valueType};
Expand Down
6 changes: 3 additions & 3 deletions onnxruntime/contrib_ops/webgpu/bert/attention.cc
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ Status AttentionProbsProgram::GenerateShaderCode(ShaderHelper& shader) const {
<< " let query_pos = m + local_id.y + past_sequence_length;\n"
<< " let key_pos = n + local_id.x;\n"
<< " if (key_pos > query_pos) {\n"
<< " sum = -3.40282e+38; // Set to very negative value for masking\n"
<< " sum = -3.4028234663852886e+38; // Set to very negative value for masking\n"
<< " }\n";
}

Expand Down Expand Up @@ -272,7 +272,7 @@ Status InPlaceSoftmaxProgram::GenerateShaderCode(ShaderHelper& shader) const {
<< "let effective_seq_length = seq_causal_length;\n";
}
shader.MainFunctionBody()
<< "var thread_max_vector = f32_val_t(-3.402823e+38f);\n"
<< "var thread_max_vector = f32_val_t(-3.4028234663852886e+38f);\n"
<< "for (var i: u32 = 0; i < uniforms.elements_per_thread && i + local_offset < effective_seq_length; i++) {\n"
<< " let actual_pos = local_offset + i + start_offset;\n"
<< " if (!should_apply_local_window || actual_pos < seq_causal_length) {\n"
Expand All @@ -289,7 +289,7 @@ Status InPlaceSoftmaxProgram::GenerateShaderCode(ShaderHelper& shader) const {
} else if (use_smooth_softmax_) {
shader.MainFunctionBody() << "var max_value: f32 = 0.0;\n";
} else {
shader.MainFunctionBody() << "var max_value = f32(-3.402823e+38f);\n";
shader.MainFunctionBody() << "var max_value = f32(-3.4028234663852886e+38f);\n";
}

shader.MainFunctionBody() << "for (var i = 0u; i < " << work_group_size_ << "; i++) {\n"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ fn get_total_sequence_length() -> u32 {
#if is_fp16
const min_value = q_element_t(-65504.0);
#else
const min_value = q_element_t(-3.402823e+38f);
const min_value = q_element_t(-3.4028234663852886e+38f);
#endif

// For max performance max_k_step should be the same as sg_size, however we might run out of registers
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ $MAIN {

if (local_idx == 0u) {
// Calculate the max and sum in current split.
var l_max = f32(-3.402823e+38f);
var l_max = f32(-3.4028234663852886e+38f);
var l_sum = f32(0);
for (var i = 0u; i < tile_size && (total_seq_offset + i) < total_sequence_length; i++) {
l_max = max(l_max, f32(tile_qk[i]));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ $MAIN {
// Calculate the global max and sum in qk.
if (head_idx < uniforms.num_heads)
{
var g_max = f32(-3.402823e+38f);
var g_max = f32(-3.4028234663852886e+38f);
var g_sum = f32(0);
for (var i = 0u; i < num_total_seq_length_tile; i++)
{
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/contrib_ops/webgpu/moe/gate.wgsl.template
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ const K: u32 = k;
#if is_fp16
const MAX_FLOAT: f16 = 65504.0;
#else
const MAX_FLOAT: f32 = 3.402823466e+38;
const MAX_FLOAT: f32 = 3.4028234663852886e+38;
#endif

var<workgroup> shared_vals: array<hidden_state_element_t, workgroup_size_x>;
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/providers/js/operators/unary.cc
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ JSEP_ELEMENTWISE_TYPED_KERNEL(Not, 1, bool, Not)

// activation

JSEP_CLASS_IMPL_ATTRIBUTE_FLOAT_2_DEFAULT(ClipV10, Clip, min, 3.402823e+38f, max, -3.402823e+38f)
JSEP_CLASS_IMPL_ATTRIBUTE_FLOAT_2_DEFAULT(ClipV10, Clip, min, 3.4028234663852886e+38f, max, -3.4028234663852886e+38f)
JSEP_ELEMENTWISE_VERSIONED_KERNEL(Clip, 6, 10, ClipV10)
JSEP_KERNEL_IMPL(Clip, Clip)
ONNX_OPERATOR_VERSIONED_KERNEL_EX(Clip, kOnnxDomain, 11, 11, kJsExecutionProvider,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ bool ClipOpBuilder::HandleBuildOp(vsi::npu::GraphEP* graph_ep,
LOGS_DEFAULT(INFO) << "Creating Clip Op.";
if (node_unit.SinceVersion() <= 6) {
NodeAttrHelper helper(node_unit.GetNode());
auto min = helper.Get("min", -3.402e+38f);
auto max = helper.Get("max", 3.402e+38f);
auto min = helper.Get("min", -3.4028234663852886e+38f);
auto max = helper.Get("max", 3.4028234663852886e+38f);
auto op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Clip>(min, max);
(*op).BindInputs(inputs).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(op));
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/providers/webgpu/math/softmax.cc
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ Status SoftmaxProgram::GenerateShaderCode(ShaderHelper& shader) const {
int components = input.NumComponents();

const std::string thread_max_decl = is_fp32_
? "var thread_max = x_value_t(-3.402823e+38f);\n"
? "var thread_max = x_value_t(-3.4028234663852886e+38f);\n"
: "var thread_max = x_value_t(-65504.0h);\n";

// Define shared memory for row max and row sum
Expand Down
Loading