@@ -23,6 +23,7 @@ import (
2323	"strings" 
2424	"time" 
2525
26+ 	"github.com/golang/glog" 
2627	"google.golang.org/grpc/metadata" 
2728
2829	api "github.com/kubeflow/pipelines/backend/api/v1beta1/go_client" 
@@ -39,7 +40,7 @@ const (
3940type  PipelineClientInterface  interface  {
4041	ReportWorkflow (workflow  util.ExecutionSpec ) error 
4142	ReportScheduledWorkflow (swf  * util.ScheduledWorkflow ) error 
42- 	ReadArtifactForMetrics (request  * util.ArtifactRequest ) (* util.ArtifactResponse , error )
43+ 	ReadArtifact (request  * util.ReadArtifactRequest ) (* util.ReadArtifactResponse , error )
4344	ReportRunMetrics (request  * api.ReportRunMetricsRequest ) (* api.ReportRunMetricsResponse , error )
4445}
4546
@@ -122,7 +123,10 @@ func (p *PipelineClient) ReportWorkflow(workflow util.ExecutionSpec) error {
122123				workflow .ToStringForStore ())
123124		} else  if  statusCode .Code () ==  codes .Unauthenticated  &&  strings .Contains (err .Error (), "service account token has expired" ) {
124125			// If unauthenticated because SA token is expired, re-read/refresh the token and try again 
125- 			p .tokenRefresher .RefreshToken ()
126+ 			if  refreshErr  :=  p .tokenRefresher .RefreshToken (); refreshErr  !=  nil  {
127+ 				return  util .NewCustomError (refreshErr , util .CUSTOM_CODE_PERMANENT ,
128+ 					"Failed to refresh token: %v" , refreshErr .Error ())
129+ 			}
126130			return  util .NewCustomError (err , util .CUSTOM_CODE_TRANSIENT ,
127131				"Error while reporting workflow resource (code: %v, message: %v): %v, %+v" ,
128132				statusCode .Code (),
@@ -167,7 +171,10 @@ func (p *PipelineClient) ReportScheduledWorkflow(swf *util.ScheduledWorkflow) er
167171				swf .ScheduledWorkflow )
168172		} else  if  statusCode .Code () ==  codes .Unauthenticated  &&  strings .Contains (err .Error (), "service account token has expired" ) {
169173			// If unauthenticated because SA token is expired, re-read/refresh the token and try again 
170- 			p .tokenRefresher .RefreshToken ()
174+ 			if  refreshErr  :=  p .tokenRefresher .RefreshToken (); refreshErr  !=  nil  {
175+ 				return  util .NewCustomError (refreshErr , util .CUSTOM_CODE_PERMANENT ,
176+ 					"Failed to refresh token: %v" , refreshErr .Error ())
177+ 			}
171178			return  util .NewCustomError (err , util .CUSTOM_CODE_TRANSIENT ,
172179				"Error while reporting workflow resource (code: %v, message: %v): %v, %+v" ,
173180				statusCode .Code (),
@@ -187,15 +194,17 @@ func (p *PipelineClient) ReportScheduledWorkflow(swf *util.ScheduledWorkflow) er
187194	return  nil 
188195}
189196
190- // ReadArtifactForMetrics reads artifact content using the new util.ArtifactRequest/Response types. 
191- // This method is used by the metrics collection system. 
192- func  (p  * PipelineClient ) ReadArtifactForMetrics (request  * util.ArtifactRequest ) (* util.ArtifactResponse , error ) {
193- 	// Construct the HTTP streaming endpoint URL 
194- 	// Format: /apis/v1beta1/runs/{run_id}/nodes/{node_id}/artifacts/{artifact_name}:stream 
197+ // ReadArtifact reads artifact content using HTTP streaming. 
198+ // 
199+ // Error Handling: 
200+ // - Returns nil for artifacts that don't exist (HTTP 404) 
201+ // - Returns CUSTOM_CODE_PERMANENT for client errors (400, 403) and unexpected failures 
202+ // - Returns CUSTOM_CODE_TRANSIENT for retryable errors (401, 500, network issues) 
203+ // - Automatically refreshes tokens on expiry; callers should retry transient errors 
204+ func  (p  * PipelineClient ) ReadArtifact (request  * util.ReadArtifactRequest ) (* util.ReadArtifactResponse , error ) {
195205	url  :=  fmt .Sprintf ("%s/apis/v1beta1/runs/%s/nodes/%s/artifacts/%s:stream" ,
196206		p .httpBaseURL , request .RunID , request .NodeID , request .ArtifactName )
197207
198- 	// Create HTTP request with timeout 
199208	ctx , cancel  :=  context .WithTimeout (context .Background (), time .Minute )
200209	defer  cancel ()
201210
@@ -205,61 +214,65 @@ func (p *PipelineClient) ReadArtifactForMetrics(request *util.ArtifactRequest) (
205214			"Failed to create HTTP request: %v" , err .Error ())
206215	}
207216
208- 	// Add authorization header 
209217	req .Header .Set ("Authorization" , "Bearer " + p .tokenRefresher .GetToken ())
210218
211- 	// Make the HTTP request 
212219	resp , err  :=  p .httpClient .Do (req )
213220	if  err  !=  nil  {
214221		if  strings .Contains (err .Error (), "service account token has expired" ) {
215- 			// If unauthenticated because SA token is expired, re-read/refresh the token and try again 
216- 			p .tokenRefresher .RefreshToken ()
222+ 			// If unauthenticated because SA token is expired, refresh the token and the caller should retry 
223+ 			if  refreshErr  :=  p .tokenRefresher .RefreshToken (); refreshErr  !=  nil  {
224+ 				return  nil , util .NewCustomError (refreshErr , util .CUSTOM_CODE_PERMANENT ,
225+ 					"Failed to refresh token: %v" , refreshErr .Error ())
226+ 			}
217227			return  nil , util .NewCustomError (err , util .CUSTOM_CODE_TRANSIENT ,
218228				"Error while reading artifact due to token expiry: %v" , err .Error ())
219229		}
220230		return  nil , util .NewCustomError (err , util .CUSTOM_CODE_PERMANENT ,
221231			"Failed to make HTTP request: %v" , err .Error ())
222232	}
223- 	defer  resp .Body .Close ()
233+ 	defer  func () {
234+ 		if  closeErr  :=  resp .Body .Close (); closeErr  !=  nil  {
235+ 			glog .Warningf ("Failed to close response body: %v" , closeErr )
236+ 		}
237+ 	}()
224238
225- 	// Handle HTTP status codes 
226239	switch  resp .StatusCode  {
227240	case  http .StatusOK :
228- 		// Success case - read the artifact data 
229241		data , err  :=  io .ReadAll (resp .Body )
230242		if  err  !=  nil  {
231243			return  nil , util .NewCustomError (err , util .CUSTOM_CODE_PERMANENT ,
232244				"Failed to read artifact data: %v" , err .Error ())
233245		}
234- 		return  & util.ArtifactResponse {Data : data }, nil 
246+ 		return  & util.ReadArtifactResponse {Data : data }, nil 
235247
236248	case  http .StatusNotFound :
237- 		// Artifact not found - return nil as per original behavior 
238249		return  nil , nil 
239250
240251	case  http .StatusUnauthorized :
241252		// Unauthorized - refresh token and return transient error 
242- 		p .tokenRefresher .RefreshToken ()
253+ 		if  refreshErr  :=  p .tokenRefresher .RefreshToken (); refreshErr  !=  nil  {
254+ 			if  closeErr  :=  resp .Body .Close (); closeErr  !=  nil  {
255+ 				glog .Warningf ("Failed to close response body: %v" , closeErr )
256+ 			}
257+ 			return  nil , util .NewCustomError (refreshErr , util .CUSTOM_CODE_PERMANENT ,
258+ 				"Failed to refresh token: %v" , refreshErr .Error ())
259+ 		}
243260		return  nil , util .NewCustomError (fmt .Errorf ("HTTP 401" ), util .CUSTOM_CODE_TRANSIENT ,
244261			"Failed to read artifact, unauthorized (token may have expired)" )
245262
246263	case  http .StatusForbidden :
247- 		// Forbidden - return permanent error 
248264		return  nil , util .NewCustomError (fmt .Errorf ("HTTP 403" ), util .CUSTOM_CODE_PERMANENT ,
249265			"Failed to read artifact, forbidden" )
250266
251267	case  http .StatusBadRequest :
252- 		// Bad request - return permanent error 
253268		return  nil , util .NewCustomError (fmt .Errorf ("HTTP 400" ), util .CUSTOM_CODE_PERMANENT ,
254269			"Failed to read artifact, bad request" )
255270
256271	case  http .StatusInternalServerError :
257- 		// Internal server error - return transient error 
258272		return  nil , util .NewCustomError (fmt .Errorf ("HTTP 500" ), util .CUSTOM_CODE_TRANSIENT ,
259273			"Failed to read artifact, internal server error" )
260274
261275	default :
262- 		// Other status codes - return permanent error 
263276		return  nil , util .NewCustomError (fmt .Errorf ("HTTP %d" , resp .StatusCode ), util .CUSTOM_CODE_PERMANENT ,
264277			"Failed to read artifact, HTTP status: %d" , resp .StatusCode )
265278	}
0 commit comments