@@ -154,6 +154,49 @@ def test_inference_with_string_model_success(
154154 }
155155 ),
156156 )
157+ assert inference_result .candidate_name == "gemini-pro"
158+ assert inference_result .gcs_source is None
159+
160+ @mock .patch .object (_evals_utils , "EvalDatasetLoader" )
161+ def test_inference_with_callable_model_sets_candidate_name (
162+ self , mock_eval_dataset_loader
163+ ):
164+ mock_df = pd .DataFrame ({"prompt" : ["test prompt" ]})
165+ mock_eval_dataset_loader .return_value .load .return_value = mock_df .to_dict (
166+ orient = "records"
167+ )
168+
169+ def my_model_fn (contents ):
170+ return "callable response"
171+
172+ inference_result = self .client .evals .run_inference (
173+ model = my_model_fn ,
174+ src = mock_df ,
175+ )
176+ assert inference_result .candidate_name == "my_model_fn"
177+ assert inference_result .gcs_source is None
178+
179+ @mock .patch .object (_evals_utils , "EvalDatasetLoader" )
180+ def test_inference_with_lambda_model_candidate_name_is_none (
181+ self , mock_eval_dataset_loader
182+ ):
183+ mock_df = pd .DataFrame ({"prompt" : ["test prompt" ]})
184+ mock_eval_dataset_loader .return_value .load .return_value = mock_df .to_dict (
185+ orient = "records"
186+ )
187+
188+ inference_result = self .client .evals .run_inference (
189+ model = lambda x : "lambda response" , # pylint: disable=unnecessary-lambda
190+ src = mock_df ,
191+ )
192+ # Lambdas may or may not have a __name__ depending on Python version/env
193+ # but it's typically '<lambda>' if it exists.
194+ # The code under test uses getattr(model, "__name__", None)
195+ assert (
196+ inference_result .candidate_name == "<lambda>"
197+ or inference_result .candidate_name is None
198+ )
199+ assert inference_result .gcs_source is None
157200
158201 @mock .patch .object (_evals_utils , "EvalDatasetLoader" )
159202 def test_inference_with_callable_model_success (self , mock_eval_dataset_loader ):
@@ -179,6 +222,8 @@ def mock_model_fn(contents):
179222 }
180223 ),
181224 )
225+ assert inference_result .candidate_name == "mock_model_fn"
226+ assert inference_result .gcs_source is None
182227
183228 @mock .patch .object (_evals_common , "Models" )
184229 @mock .patch .object (_evals_utils , "EvalDatasetLoader" )
@@ -224,6 +269,8 @@ def test_inference_with_prompt_template(
224269 }
225270 ),
226271 )
272+ assert inference_result .candidate_name == "gemini-pro"
273+ assert inference_result .gcs_source is None
227274
228275 @mock .patch .object (_evals_common , "Models" )
229276 @mock .patch .object (_evals_utils , "EvalDatasetLoader" )
@@ -273,6 +320,10 @@ def test_inference_with_gcs_destination(
273320 pd .testing .assert_frame_equal (
274321 inference_result .eval_dataset_df , expected_df_to_save
275322 )
323+ assert inference_result .candidate_name == "gemini-pro"
324+ assert inference_result .gcs_source == vertexai_genai_types .GcsSource (
325+ uris = [gcs_dest_path ]
326+ )
276327
277328 @mock .patch .object (_evals_common , "Models" )
278329 @mock .patch .object (_evals_utils , "EvalDatasetLoader" )
@@ -322,6 +373,8 @@ def test_inference_with_local_destination(
322373 }
323374 )
324375 pd .testing .assert_frame_equal (inference_result .eval_dataset_df , expected_df )
376+ assert inference_result .candidate_name == "gemini-pro"
377+ assert inference_result .gcs_source is None
325378
326379 @mock .patch .object (_evals_common , "Models" )
327380 @mock .patch .object (_evals_utils , "EvalDatasetLoader" )
@@ -405,6 +458,8 @@ def test_inference_from_request_column_save_locally(
405458 expected_records , key = lambda x : x ["request" ]
406459 )
407460 os .remove (local_dest_path )
461+ assert inference_result .candidate_name == "gemini-pro"
462+ assert inference_result .gcs_source is None
408463
409464 @mock .patch .object (_evals_common , "Models" )
410465 def test_inference_from_local_jsonl_file (self , mock_models ):
@@ -478,6 +533,8 @@ def test_inference_from_local_jsonl_file(self, mock_models):
478533 any_order = True ,
479534 )
480535 os .remove (local_src_path )
536+ assert inference_result .candidate_name == "gemini-pro"
537+ assert inference_result .gcs_source is None
481538
482539 @mock .patch .object (_evals_common , "Models" )
483540 def test_inference_from_local_csv_file (self , mock_models ):
@@ -548,6 +605,8 @@ def test_inference_from_local_csv_file(self, mock_models):
548605 any_order = True ,
549606 )
550607 os .remove (local_src_path )
608+ assert inference_result .candidate_name == "gemini-pro"
609+ assert inference_result .gcs_source is None
551610
552611 @mock .patch .object (_evals_common , "Models" )
553612 @mock .patch .object (_evals_utils , "EvalDatasetLoader" )
@@ -719,6 +778,8 @@ def mock_generate_content_logic(*args, **kwargs):
719778 expected_df .sort_values (by = "id" ).reset_index (drop = True ),
720779 check_dtype = False ,
721780 )
781+ assert inference_result .candidate_name == "gemini-pro"
782+ assert inference_result .gcs_source is None
722783
723784 @mock .patch .object (_evals_common , "Models" )
724785 @mock .patch .object (_evals_utils , "EvalDatasetLoader" )
@@ -794,6 +855,8 @@ def test_inference_with_multimodal_content(
794855 }
795856 ),
796857 )
858+ assert inference_result .candidate_name == "gemini-pro"
859+ assert inference_result .gcs_source is None
797860
798861
799862class TestMetricPromptBuilder :
@@ -3295,3 +3358,76 @@ def test_execute_evaluation_multiple_datasets(
32953358 assert summary_metric .mean_score == 1.0
32963359
32973360 assert mock_eval_dependencies ["mock_evaluate_instances" ].call_count == 2
3361+
3362+ def test_execute_evaluation_deduplicates_candidate_names (
3363+ self , mock_api_client_fixture , mock_eval_dependencies
3364+ ):
3365+ """Tests that duplicate candidate names are indexed."""
3366+ dataset1 = vertexai_genai_types .EvaluationDataset (
3367+ eval_dataset_df = pd .DataFrame (
3368+ [{"prompt" : "p1" , "response" : "r1" , "reference" : "ref1" }]
3369+ ),
3370+ candidate_name = "gemini-pro" ,
3371+ )
3372+ dataset2 = vertexai_genai_types .EvaluationDataset (
3373+ eval_dataset_df = pd .DataFrame (
3374+ [{"prompt" : "p1" , "response" : "r2" , "reference" : "ref1" }]
3375+ ),
3376+ candidate_name = "gemini-flash" ,
3377+ )
3378+ dataset3 = vertexai_genai_types .EvaluationDataset (
3379+ eval_dataset_df = pd .DataFrame (
3380+ [{"prompt" : "p1" , "response" : "r3" , "reference" : "ref1" }]
3381+ ),
3382+ candidate_name = "gemini-pro" ,
3383+ )
3384+
3385+ mock_eval_dependencies [
3386+ "mock_evaluate_instances"
3387+ ].return_value = vertexai_genai_types .EvaluateInstancesResponse (
3388+ exact_match_results = vertexai_genai_types .ExactMatchResults (
3389+ exact_match_metric_values = [
3390+ vertexai_genai_types .ExactMatchMetricValue (score = 1.0 )
3391+ ]
3392+ )
3393+ )
3394+
3395+ result = _evals_common ._execute_evaluation (
3396+ api_client = mock_api_client_fixture ,
3397+ dataset = [dataset1 , dataset2 , dataset3 ],
3398+ metrics = [vertexai_genai_types .Metric (name = "exact_match" )],
3399+ )
3400+
3401+ assert result .metadata .candidate_names == [
3402+ "gemini-pro #1" ,
3403+ "gemini-flash" ,
3404+ "gemini-pro #2" ,
3405+ ]
3406+
3407+ @mock .patch ("vertexai._genai._evals_common.datetime" )
3408+ def test_execute_evaluation_adds_creation_timestamp (
3409+ self , mock_datetime , mock_api_client_fixture , mock_eval_dependencies
3410+ ):
3411+ """Tests that creation_timestamp is added to the result metadata."""
3412+ import datetime
3413+
3414+ mock_now = datetime .datetime (
3415+ 2025 , 6 , 18 , 12 , 0 , 0 , tzinfo = datetime .timezone .utc
3416+ )
3417+ mock_datetime .datetime .now .return_value = mock_now
3418+
3419+ dataset = vertexai_genai_types .EvaluationDataset (
3420+ eval_dataset_df = pd .DataFrame (
3421+ [{"prompt" : "p" , "response" : "r" , "reference" : "r" }]
3422+ )
3423+ )
3424+ metric = vertexai_genai_types .Metric (name = "exact_match" )
3425+
3426+ result = _evals_common ._execute_evaluation (
3427+ api_client = mock_api_client_fixture ,
3428+ dataset = dataset ,
3429+ metrics = [metric ],
3430+ )
3431+
3432+ assert result .metadata is not None
3433+ assert result .metadata .creation_timestamp == mock_now
0 commit comments