Skip to content

Commit aa31abc

Browse files
authored
perf(bigtable): Create attributes only when enabled (#12647)
* perf(bigtable): Create OTEL instruments only when client side metrics are enabled * add benchmark tests * add license
1 parent 3f4a99b commit aa31abc

3 files changed

Lines changed: 265 additions & 30 deletions

File tree

bigtable/benchmark_test.go

Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
/*
2+
Copyright 2025 Google LLC
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package bigtable
18+
19+
import (
20+
"context"
21+
"fmt"
22+
"log"
23+
"os"
24+
"runtime/pprof"
25+
"sync"
26+
"testing"
27+
28+
"github.com/google/uuid"
29+
"google.golang.org/api/iterator"
30+
)
31+
32+
/*
33+
To run benchmark tests,
34+
go test -v -run=^$ -bench="BenchmarkReadRowsWithMetrics" -benchmem -memprofile=heap-metrics-enabled.prof .
35+
go test -v -run=^$ -bench="BenchmarkReadRowsWithoutMetrics" -benchmem -memprofile=heap-metrics-disabled.prof .
36+
37+
38+
Compare Heap Allocation Profiles:
39+
To understand the impact of enabling metrics on heap allocations, compare the two profiles using pprof's -diff_base feature.
40+
41+
Compare Total Bytes Allocated:
42+
go tool pprof -http=:5000 -sample_index=alloc_space -diff_base=heap-metrics-disabled.prof heap-metrics-enabled.prof
43+
The flame graph will highlight functions where the difference in total allocated bytes is significant.
44+
Positive values indicate more bytes allocated in heap-metrics-enabled.prof.
45+
46+
Compare Total Number of Allocations:
47+
go tool pprof -http=:5001 -sample_index=alloc_objects -diff_base=heap-metrics-disabled.prof heap-metrics-enabled.prof
48+
This shows the difference in the number of allocations.
49+
50+
51+
Compare CPU Profiles:
52+
This highlights the functions contributing most to the CPU overhead due to metrics being enabled.
53+
54+
Generate a diff flame graph:
55+
This shows what's "new" or "more expensive" in the enabled profile
56+
go tool pprof -http=:5002 -diff_base=cpu-metrics-disabled.prof cpu-metrics-enabled.prof
57+
58+
59+
60+
View individual CPU profiles:
61+
go tool pprof -http=:5003 cpu-metrics-enabled.prof
62+
go tool pprof -http=:5004 cpu-metrics-disabled.prof
63+
*/
64+
65+
const (
66+
project = "my_project"
67+
instance = "my_instance"
68+
tableNamePrefix = "profile-test-"
69+
columnFamilyName = "cf1"
70+
columnName = "col1"
71+
totalRows = 10000000
72+
rowsPerApplyBulk = 100000
73+
numGoRoutines = 100 // Number of concurrent readers
74+
)
75+
76+
// setup performs the initial configuration for the benchmark, including client creation,
77+
// table creation, and data population. It also handles the profiling setup and cleanup.
78+
func setup(b *testing.B, metricsEnabled bool) (client *Client, tableName, rowKeyPrefix string, cleanup func()) {
79+
ctx := context.Background()
80+
b.Logf("Setting up for metrics enabled: %v", metricsEnabled)
81+
82+
// 1. Create Admin Client
83+
adminClient, err := NewAdminClient(ctx, project, instance)
84+
if err != nil {
85+
b.Fatalf("Failed to create admin client: %v", err)
86+
}
87+
88+
// 2. Create Table If Not Exists
89+
tableName = tableNamePrefix + uuid.New().String()
90+
b.Logf("Creating table: %s", tableName)
91+
if err := adminClient.CreateTable(ctx, tableName); err != nil {
92+
b.Fatalf("Failed to create table '%s': %v", tableName, err)
93+
}
94+
if err := adminClient.CreateColumnFamily(ctx, tableName, columnFamilyName); err != nil {
95+
b.Fatalf("Failed to create column family '%s': %v", columnFamilyName, err)
96+
}
97+
98+
// Create Data Client for writing data
99+
writerClient, err := NewClient(ctx, project, instance)
100+
if err != nil {
101+
b.Fatalf("Failed to create writer data client: %v", err)
102+
}
103+
104+
// 3. Write rows
105+
rowKeyPrefix = "row-" + uuid.New().String()
106+
b.Logf("Writing %d rows to table '%s' with prefix '%s'...", totalRows, tableName, rowKeyPrefix)
107+
for i := 0; i < totalRows; i += rowsPerApplyBulk {
108+
start := i
109+
end := i + rowsPerApplyBulk
110+
if end > totalRows {
111+
end = totalRows
112+
}
113+
writeBatch(b, writerClient, tableName, rowKeyPrefix, start, end)
114+
}
115+
b.Log("Finished writing data.")
116+
writerClient.Close()
117+
118+
// 4. Create Data Client for benchmark
119+
clientConfig := ClientConfig{}
120+
if !metricsEnabled {
121+
clientConfig.MetricsProvider = NoopMetricsProvider{}
122+
}
123+
client, err = NewClientWithConfig(ctx, project, instance, clientConfig)
124+
if err != nil {
125+
b.Fatalf("Failed to create data client (metrics: %v): %v", metricsEnabled, err)
126+
}
127+
128+
// Profiling setup
129+
profileSuffix := "disabled"
130+
if metricsEnabled {
131+
profileSuffix = "enabled"
132+
}
133+
cpuFile, err := os.Create(fmt.Sprintf("cpu-metrics-%s.prof", profileSuffix))
134+
if err != nil {
135+
b.Fatalf("could not create CPU profile: %v", err)
136+
}
137+
pprof.StartCPUProfile(cpuFile)
138+
139+
cleanup = func() {
140+
b.Log("Running cleanup...")
141+
pprof.StopCPUProfile()
142+
cpuFile.Close()
143+
144+
if err := adminClient.DeleteTable(ctx, tableName); err != nil {
145+
b.Logf("Warning: failed to delete table '%s': %v", tableName, err)
146+
}
147+
adminClient.Close()
148+
client.Close()
149+
b.Log("Cleanup complete.")
150+
}
151+
152+
return client, tableName, rowKeyPrefix, cleanup
153+
}
154+
155+
// writeBatch writes a batch of rows to the specified table.
156+
func writeBatch(b *testing.B, client *Client, tableName, rowKeyPrefix string, start, end int) {
157+
muts := make([]*Mutation, end-start)
158+
rowKeys := make([]string, end-start)
159+
for i := 0; i < len(muts); i++ {
160+
muts[i] = NewMutation()
161+
muts[i].Set(columnFamilyName, columnName, Now(), []byte("p"))
162+
rowKeys[i] = fmt.Sprintf("%s-%010d", rowKeyPrefix, start+i)
163+
}
164+
165+
errs, err := client.Open(tableName).ApplyBulk(context.Background(), rowKeys, muts)
166+
if err != nil {
167+
b.Fatalf("ApplyBulk failed: %v", err)
168+
}
169+
for _, err := range errs {
170+
if err != nil {
171+
b.Fatalf("An error occurred during ApplyBulk: %v", err)
172+
}
173+
}
174+
}
175+
176+
// readRowsConcurrently simulates multiple clients reading from the table.
177+
func readRowsConcurrently(b *testing.B, client *Client, tableName, rowKeyPrefix string) {
178+
var wg sync.WaitGroup
179+
wg.Add(numGoRoutines)
180+
181+
rowsPerRoutine := totalRows / numGoRoutines
182+
183+
for i := 0; i < numGoRoutines; i++ {
184+
startKey := fmt.Sprintf("%s-%010d", rowKeyPrefix, i*rowsPerRoutine)
185+
endKey := fmt.Sprintf("%s-%010d", rowKeyPrefix, (i+1)*rowsPerRoutine)
186+
187+
go func(start, end string) {
188+
defer wg.Done()
189+
tbl := client.Open(tableName)
190+
err := tbl.ReadRows(context.Background(), NewRange(start, end), func(r Row) bool {
191+
// consume the row to simulate a real read.
192+
_ = r[columnFamilyName][0].Value
193+
return true
194+
})
195+
if err != nil && err != iterator.Done {
196+
b.Errorf("ReadRows failed for range %s-%s: %v", start, end, err)
197+
}
198+
}(startKey, endKey)
199+
}
200+
wg.Wait()
201+
}
202+
203+
func BenchmarkReadRowsWithMetrics(b *testing.B) {
204+
log.SetFlags(log.LstdFlags | log.Lshortfile)
205+
client, tableName, rowKeyPrefix, cleanup := setup(b, true)
206+
defer cleanup()
207+
208+
b.ResetTimer()
209+
for i := 0; i < b.N; i++ {
210+
readRowsConcurrently(b, client, tableName, rowKeyPrefix)
211+
}
212+
b.StopTimer()
213+
}
214+
215+
func BenchmarkReadRowsWithoutMetrics(b *testing.B) {
216+
log.SetFlags(log.LstdFlags | log.Lshortfile)
217+
client, tableName, rowKeyPrefix, cleanup := setup(b, false)
218+
defer cleanup()
219+
220+
b.ResetTimer()
221+
for i := 0; i < b.N; i++ {
222+
readRowsConcurrently(b, client, tableName, rowKeyPrefix)
223+
}
224+
b.StopTimer()
225+
}

bigtable/metrics.go

Lines changed: 30 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -176,13 +176,29 @@ type builtinMetricsTracerFactory struct {
176176
}
177177

178178
func newBuiltinMetricsTracerFactory(ctx context.Context, project, instance, appProfile string, metricsProvider MetricsProvider, opts ...option.ClientOption) (*builtinMetricsTracerFactory, error) {
179+
if metricsProvider != nil {
180+
switch metricsProvider.(type) {
181+
case NoopMetricsProvider:
182+
return &builtinMetricsTracerFactory{
183+
enabled: false,
184+
shutdown: func() {},
185+
}, nil
186+
default:
187+
return &builtinMetricsTracerFactory{
188+
enabled: false,
189+
shutdown: func() {},
190+
}, errors.New("unknown MetricsProvider type")
191+
}
192+
}
193+
194+
// Metrics are enabled.
179195
clientUID, err := generateClientUID()
180196
if err != nil {
181197
return nil, err
182198
}
183199

184200
tracerFactory := &builtinMetricsTracerFactory{
185-
enabled: false,
201+
enabled: true,
186202
clientAttributes: []attribute.KeyValue{
187203
attribute.String(monitoredResLabelKeyProject, project),
188204
attribute.String(monitoredResLabelKeyInstance, instance),
@@ -193,31 +209,21 @@ func newBuiltinMetricsTracerFactory(ctx context.Context, project, instance, appP
193209
shutdown: func() {},
194210
}
195211

196-
var meterProvider *sdkmetric.MeterProvider
197-
if metricsProvider == nil {
198-
// Create default meter provider
199-
mpOptions, err := builtInMeterProviderOptions(project, opts...)
200-
if err != nil {
201-
return tracerFactory, err
202-
}
203-
meterProvider = sdkmetric.NewMeterProvider(mpOptions...)
204-
205-
tracerFactory.enabled = true
206-
tracerFactory.shutdown = func() { meterProvider.Shutdown(ctx) }
207-
} else {
208-
switch metricsProvider.(type) {
209-
case NoopMetricsProvider:
210-
tracerFactory.enabled = false
211-
return tracerFactory, nil
212-
default:
213-
tracerFactory.enabled = false
214-
return tracerFactory, errors.New("unknown MetricsProvider type")
215-
}
212+
// Create default meter provider
213+
mpOptions, err := builtInMeterProviderOptions(project, opts...)
214+
if err != nil {
215+
tracerFactory.enabled = false
216+
return tracerFactory, err
216217
}
218+
meterProvider := sdkmetric.NewMeterProvider(mpOptions...)
219+
tracerFactory.shutdown = func() { meterProvider.Shutdown(ctx) }
217220

218221
// Create meter and instruments
219222
meter := meterProvider.Meter(builtInMetricsMeterName, metric.WithInstrumentationVersion(internal.Version))
220223
err = tracerFactory.createInstruments(meter)
224+
if err != nil {
225+
tracerFactory.enabled = false
226+
}
221227
return tracerFactory, err
222228
}
223229

@@ -413,6 +419,9 @@ func (a *attemptTracer) setServerLatencyErr(err error) {
413419
}
414420

415421
func (tf *builtinMetricsTracerFactory) createBuiltinMetricsTracer(ctx context.Context, tableName string, isStreaming bool) builtinMetricsTracer {
422+
if !tf.enabled {
423+
return builtinMetricsTracer{builtInEnabled: false}
424+
}
416425
// Operation has started but not the attempt.
417426
// So, create only operation tracer and not attempt tracer
418427
currOpTracer := opTracer{}

bigtable/metrics_test.go

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -180,13 +180,6 @@ func TestNewBuiltinMetricsTracerFactory(t *testing.T) {
180180
appProfile := "test-app-profile"
181181
clientUID := "test-uid"
182182

183-
wantClientAttributes := []attribute.KeyValue{
184-
attribute.String(monitoredResLabelKeyProject, project),
185-
attribute.String(monitoredResLabelKeyInstance, instance),
186-
attribute.String(metricLabelKeyAppProfile, appProfile),
187-
attribute.String(metricLabelKeyClientUID, clientUID),
188-
attribute.String(metricLabelKeyClientName, clientName),
189-
}
190183
wantMetricNamesStdout := []string{metricNameAttemptLatencies, metricNameAttemptLatencies, metricNameConnErrCount, metricNameConnErrCount, metricNameOperationLatencies, metricNameRetryCount, metricNameServerLatencies, metricNameAppBlockingLatencies}
191184
wantMetricTypesGCM := []string{}
192185
for _, wantMetricName := range wantMetricNamesStdout {
@@ -260,12 +253,20 @@ func TestNewBuiltinMetricsTracerFactory(t *testing.T) {
260253
wantBuiltinEnabled bool
261254
setEmulator bool
262255
wantCreateTSCallsCount int // No. of CreateTimeSeries calls
256+
wantClientAttributes []attribute.KeyValue
263257
}{
264258
{
265259
desc: "should create a new tracer factory with default meter provider",
266260
config: ClientConfig{AppProfile: appProfile},
267261
wantBuiltinEnabled: true,
268262
wantCreateTSCallsCount: 2,
263+
wantClientAttributes: []attribute.KeyValue{
264+
attribute.String(monitoredResLabelKeyProject, project),
265+
attribute.String(monitoredResLabelKeyInstance, instance),
266+
attribute.String(metricLabelKeyAppProfile, appProfile),
267+
attribute.String(metricLabelKeyClientUID, clientUID),
268+
attribute.String(metricLabelKeyClientName, clientName),
269+
},
269270
},
270271
{
271272
desc: "should create a new tracer factory with noop meter provider",
@@ -298,8 +299,8 @@ func TestNewBuiltinMetricsTracerFactory(t *testing.T) {
298299
t.Errorf("builtinEnabled: got: %v, want: %v", gotClient.metricsTracerFactory.enabled, test.wantBuiltinEnabled)
299300
}
300301

301-
if !equalsKeyValue(gotClient.metricsTracerFactory.clientAttributes, wantClientAttributes) {
302-
t.Errorf("clientAttributes: got: %+v, want: %+v", gotClient.metricsTracerFactory.clientAttributes, wantClientAttributes)
302+
if !equalsKeyValue(gotClient.metricsTracerFactory.clientAttributes, test.wantClientAttributes) {
303+
t.Errorf("clientAttributes: got: %+v, want: %+v", gotClient.metricsTracerFactory.clientAttributes, test.wantClientAttributes)
303304
}
304305

305306
// Check instruments

0 commit comments

Comments
 (0)