diff --git a/CHANGELOG.md b/CHANGELOG.md index 804c0ae1c..b9a2c3149 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.23.1](https://github.com/googleapis/python-bigquery/compare/v3.23.0...v3.23.1) (2024-05-21) + + +### Performance Improvements + +* Decrease the threshold in which we use the BQ Storage Read API ([#1925](https://github.com/googleapis/python-bigquery/issues/1925)) ([eaa1a52](https://github.com/googleapis/python-bigquery/commit/eaa1a52b360646909c14ca7194b8c6b17fefdd79)) + ## [3.23.0](https://github.com/googleapis/python-bigquery/compare/v3.22.0...v3.23.0) (2024-05-16) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index ad1253195..6ebb0709a 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -108,7 +108,17 @@ # How many of the total rows need to be downloaded already for us to skip # calling the BQ Storage API? -ALMOST_COMPLETELY_CACHED_RATIO = 0.333 +# +# In microbenchmarks on 2024-05-21, I (tswast@) measure that at about 2 MB of +# remaining results, it's faster to use the BQ Storage Read API to download +# the results than use jobs.getQueryResults. Since we don't have a good way to +# know the remaining bytes, we estimate by remaining number of rows. +# +# Except when rows themselves are larger, I observe that the a single page of +# results will be around 10 MB. Therefore, the proportion of rows already +# downloaded should be 10 (first page) / 12 (all results) or less for it to be +# worth it to make a call to jobs.getQueryResults. +ALMOST_COMPLETELY_CACHED_RATIO = 0.833333 def _reference_getter(table): diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 0938c08f6..a62f73ed4 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.23.0" +__version__ = "3.23.1" diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 8c268759e..ff614977b 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -21,7 +21,7 @@ google-cloud-core==2.4.1 google-crc32c==1.5.0 google-resumable-media==2.7.0 googleapis-common-protos==1.63.0 -grpcio==1.62.2; python_version == '3.7' +grpcio===1.62.2; python_version == '3.7' grpcio==1.63.0; python_version >= '3.8' idna==3.7 munch==4.0.0 @@ -32,7 +32,7 @@ pandas===2.0.3; python_version == '3.8' pandas==2.2.2; python_version >= '3.9' proto-plus==1.23.0 pyarrow==12.0.1; python_version == '3.7' -pyarrow==16.0.0; python_version >= '3.8' +pyarrow==16.1.0; python_version >= '3.8' pyasn1===0.5.1; python_version == '3.7' pyasn1==0.6.0; python_version >= '3.8' pyasn1-modules===0.3.0; python_version == '3.7' diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index a60175de5..3407323ee 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -6,7 +6,7 @@ ipython===8.0.1; python_version == '3.8' ipython===8.18.1; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib===3.7.4; python_version == '3.8' -matplotlib==3.8.4; python_version >= '3.9' +matplotlib==3.9.0; python_version >= '3.9' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' pandas==2.2.2; python_version >= '3.9' diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 099529f95..fcbba03aa 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -2307,9 +2307,17 @@ def test__is_almost_completely_cached_returns_true_with_some_rows_remaining(self rows = [ {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + {"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]}, + {"f": [{"v": "Bhetty Rhubble"}, {"v": "28"}]}, + {"f": [{"v": "Pebbles Phlyntstone"}, {"v": "4"}]}, + {"f": [{"v": "Bamm-Bamm Rhubble"}, {"v": "5"}]}, + {"f": [{"v": "Joseph Rockhead"}, {"v": "32"}]}, + {"f": [{"v": "Perry Masonry"}, {"v": "33"}]}, ] first_page = {"pageToken": "next-page", "rows": rows} - iterator = self._make_one(first_page_response=first_page, total_rows=6) + iterator = self._make_one( + first_page_response=first_page, total_rows=len(rows) + 1 + ) self.assertTrue(iterator._is_almost_completely_cached()) def test__is_almost_completely_cached_returns_true_with_no_rows_remaining(self):