OpenTelemetry blocks on tracer shutdown

I'm probably doing something weird or wrong here, but calling opentelemetry::global::shutdown_tracer_provider() hangs. I'm using the OTLP exporter with Tonic.

I'm setting up OTEL like so:

pub fn init<S>(
    otlp_endpoint: String,
    sample_ratio: f64,
) -> Result<impl Layer<S>, Box<dyn std::error::Error>>
where
    for<'span> S: Subscriber + LookupSpan<'span>,
{
    opentelemetry::global::set_text_map_propagator(TraceContextPropagator::new());

    opentelemetry::global::set_error_handler(|error| {
        error!(target: "opentelemetry", ?error);
    })?;

    let exporter = opentelemetry_otlp::new_exporter()
        .tonic()
        .with_endpoint(otlp_endpoint)
        .with_timeout(Duration::from_secs(5));

    let tracer = opentelemetry_otlp::new_pipeline()
        .tracing()
        .with_exporter(exporter)
        .with_trace_config(
            Config::default()
                .with_resource(Resource::new(vec![KeyValue::new(
                    opentelemetry_semantic_conventions::resource::SERVICE_NAME,
                    env!("CARGO_PKG_NAME"),
                )]))
                .with_sampler(Sampler::ParentBased(Box::new(Sampler::TraceIdRatioBased(
                    sample_ratio,
                )))),
        )
        .install_batch(TokioCurrentThread)?;
    Ok(tracing_opentelemetry::layer().with_tracer(tracer))
}

My main function looks something like:

#[tokio::main(flavor = "current_thread")]
async fn main() {

    // ...

    let otel = otel::init(args.otel_endpoint, args.otel_sample_ratio).unwrap();

    let env_filter = EnvFilter::builder()
        .with_default_directive(LevelFilter::TRACE.into())
        .from_env_lossy();

    tracing_subscriber::registry()
        .with(otel) // This is the previously set up otel layer
        .with(
            tracing_subscriber::fmt::layer()
                .with_writer(std::io::stdout)
                .with_span_events(FmtSpan::FULL),
        )
        // .with(console_subscriber::spawn())
        .with(env_filter)
        .init();

    let (prometheus_layer, metric_handle) = PrometheusMetricLayer::pair();

    // Axum server setup
    // ...

    let listener = tokio::net::TcpListener::bind(args.address).await.unwrap();
    axum::serve(listener, app)
        .with_graceful_shutdown(shutdown_signal())
        .await
        .unwrap();

    // Axum server is shut down properly - no issues here 

   // This blocks
   opentelemetry::global::shutdown_tracer_provider(); 
}

I used tokio-console, and it shows only 2 tasks left after the Axum shutdown, both paused for a very long time.

Any help would be much appreciated.

It may be best to file an issue with the opentelemetry project.

I have been able to get it working with the following:

pub async fn flush_traces() {
    // finish sending remaining spans
    tokio::task::spawn_blocking(|| {
        opentelemetry::global::shutdown_tracer_provider();
        opentelemetry::global::shutdown_logger_provider();
    })
    .await
    .expect("failed to shutdown opentelemetry");
}

with this handler:

async fn shutdown_signal() {
    let ctrl_c = async {
        signal::ctrl_c()
            .await
            .expect("failed to install Ctrl+C handler");
    };

    #[cfg(unix)]
    let terminate = async {
        signal::unix::signal(signal::unix::SignalKind::terminate())
            .expect("failed to install signal handler")
            .recv()
            .await;
    };

    #[cfg(not(unix))]
    let terminate = std::future::pending::<()>();

    tokio::select! {
        _ = ctrl_c => {},
        _ = terminate => {},
    }

    flush_traces().await
}

This topic was automatically closed 90 days after the last reply. We invite you to open a new topic if you have further questions or comments.