Merge branch '1678-awswranglerathenaread_sql_query-is-leaking-memory' of github.com:aws/aws-sdk-pandas into 1678-awswranglerathenaread_sql_query-is-leaking-memory

malachi-constant · malachi-constant · commit fb962444650e · 2022-10-21T11:11:02.000-07:00
diff --git a/awswrangler/athena/__init__.py b/awswrangler/athena/__init__.py
@@ -9,7 +9,9 @@
     get_named_query_statement,
     get_query_columns_types,
     get_query_execution,
+    get_query_executions,
     get_work_group,
+    list_query_executions,
     repair_table,
     show_create_table,
     start_query_execution,
@@ -24,10 +26,12 @@
     "describe_table",
     "get_query_columns_types",
     "get_query_execution",
+    "get_query_executions",
     "get_query_results",
     "get_named_query_statement",
     "get_work_group",
     "generate_create_query",
+    "list_query_executions",
     "repair_table",
     "create_ctas_table",
     "show_create_table",
diff --git a/awswrangler/athena/_utils.py b/awswrangler/athena/_utils.py
@@ -1144,3 +1144,104 @@ def get_query_execution(query_execution_id: str, boto3_session: Optional[boto3.S
         QueryExecutionId=query_execution_id,
     )
     return cast(Dict[str, Any], response["QueryExecution"])
+
+
+def get_query_executions(
+    query_execution_ids: List[str], return_unprocessed: bool = False, boto3_session: Optional[boto3.Session] = None
+) -> Union[Tuple[pd.DataFrame, pd.DataFrame], pd.DataFrame]:
+    """From specified query execution IDs, return a DataFrame of query execution details.
+
+    https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/athena.html#Athena.Client.batch_get_query_execution
+
+    Parameters
+    ----------
+    query_execution_ids : List[str]
+        Athena query execution IDs.
+    return_unprocessed: bool.
+        True to also return query executions id that are unable to be processed.
+        False to only return DataFrame of query execution details.
+        Default is False
+    boto3_session : boto3.Session(), optional
+        Boto3 Session. The default boto3 session will be used if boto3_session receive None.
+
+    Returns
+    -------
+    DataFrame
+        DataFrame contain information about query execution details.
+
+    DataFrame
+        DataFrame contain information about unprocessed query execution ids.
+
+    Examples
+    --------
+    >>> import awswrangler as wr
+    >>> query_executions_df, unprocessed_query_executions_df = wr.athena.get_query_executions(
+            query_execution_ids=['query-execution-id','query-execution-id1']
+        )
+    """
+    chunked_size: int = 50
+    query_executions: List[Dict[str, Any]] = []
+    unprocessed_query_execution: List[Dict[str, str]] = []
+    client_athena: boto3.client = _utils.client(service_name="athena", session=boto3_session)
+    for i in range(0, len(query_execution_ids), chunked_size):
+        response = client_athena.batch_get_query_execution(QueryExecutionIds=query_execution_ids[i : i + chunked_size])
+        query_executions += response["QueryExecutions"]
+        unprocessed_query_execution += response["UnprocessedQueryExecutionIds"]
+    if unprocessed_query_execution and not return_unprocessed:
+        _logger.warning(
+            "Some of query execution ids are unable to be processed."
+            "Set return_unprocessed to True to get unprocessed query execution ids"
+        )
+    if return_unprocessed:
+        return pd.json_normalize(query_executions), pd.json_normalize(unprocessed_query_execution)
+    return pd.json_normalize(query_executions)
+
+
+def list_query_executions(workgroup: Optional[str] = None, boto3_session: Optional[boto3.Session] = None) -> List[str]:
+    """Fetch list query execution IDs ran in specified workgroup or primary work group if not specified.
+
+    https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/athena.html#Athena.Client.list_query_executions
+
+    Parameters
+    ----------
+    workgroup : str
+        The name of the workgroup from which the query_id are being returned.
+        If not specified, a list of available query execution IDs for the queries in the primary workgroup is returned.
+    boto3_session : boto3.Session(), optional
+        Boto3 Session. The default boto3 session will be used if boto3_session receive None.
+
+    Returns
+    -------
+    List[str]
+        List of query execution IDs.
+
+    Examples
+    --------
+    >>> import awswrangler as wr
+    >>> res = wr.athena.list_query_executions(workgroup='workgroup-name')
+
+    """
+    client_athena: boto3.client = _utils.client(service_name="athena", session=boto3_session)
+    kwargs: Dict[str, Any] = {"base": 1}
+    if workgroup:
+        kwargs["WorkGroup"] = workgroup
+    query_list: List[str] = []
+    response: Dict[str, Any] = _utils.try_it(
+        f=client_athena.list_query_executions,
+        ex=botocore.exceptions.ClientError,
+        ex_code="ThrottlingException",
+        max_num_tries=5,
+        **kwargs,
+    )
+    query_list += response["QueryExecutionIds"]
+    while "NextToken" in response:
+        kwargs["NextToken"] = response["NextToken"]
+        response = _utils.try_it(
+            f=client_athena.list_query_executions,
+            ex=botocore.exceptions.ClientError,
+            ex_code="ThrottlingException",
+            max_num_tries=5,
+            **kwargs,
+        )
+        query_list += response["QueryExecutionIds"]
+    return query_list
diff --git a/docs/source/api.rst b/docs/source/api.rst
@@ -119,9 +119,11 @@ Amazon Athena
     generate_create_query
     get_query_columns_types
     get_query_execution
+    get_query_executions
     get_query_results
     get_named_query_statement
     get_work_group
+    list_query_executions
     read_sql_query
     read_sql_table
     repair_table
diff --git a/tests/test_athena.py b/tests/test_athena.py
@@ -1233,3 +1233,22 @@ def test_athena_generate_create_query(path, glue_database, glue_table):
     )
     wr.athena.start_query_execution(sql=query, database=glue_database, wait=True)
     assert query == wr.athena.generate_create_query(database=glue_database, table=glue_table)
+
+
+def test_get_query_execution(workgroup0, workgroup1):
+    query_execution_ids = wr.athena.list_query_executions(workgroup=workgroup0) + wr.athena.list_query_executions(
+        workgroup=workgroup1
+    )
+    assert query_execution_ids
+    query_execution_detail = wr.athena.get_query_execution(query_execution_id=query_execution_ids[0])
+    query_executions_df = wr.athena.get_query_executions(query_execution_ids)
+    assert isinstance(query_executions_df, pd.DataFrame)
+    assert isinstance(query_execution_detail, dict)
+    assert set(query_execution_ids).intersection(set(query_executions_df["QueryExecutionId"].values.tolist()))
+    query_execution_ids1 = query_execution_ids + ["aaa", "bbb"]
+    query_executions_df, unprocessed_query_executions_df = wr.athena.get_query_executions(
+        query_execution_ids1, return_unprocessed=True
+    )
+    assert isinstance(unprocessed_query_executions_df, pd.DataFrame)
+    assert set(query_execution_ids).intersection(set(query_executions_df["QueryExecutionId"].values.tolist()))
+    assert {"aaa", "bbb"}.intersection(set(unprocessed_query_executions_df["QueryExecutionId"].values.tolist()))
diff --git a/tutorials/006 - Amazon Athena.ipynb b/tutorials/006 - Amazon Athena.ipynb
@@ -143,7 +143,7 @@
     "    mode=\"overwrite\",\n",
     "    database=\"awswrangler_test\",\n",
     "    table=\"noaa\"\n",
-    ");"
+    ")"
    ]
   },
   {
diff --git a/tutorials/007 - Redshift, MySQL, PostgreSQL, SQL Server, Oracle.ipynb b/tutorials/007 - Redshift, MySQL, PostgreSQL, SQL Server, Oracle.ipynb
@@ -8,7 +8,7 @@
     "\n",
     "# 7 - Redshift, MySQL, PostgreSQL, SQL Server and Oracle\n",
     "\n",
-    "[awswrangler](https://github.com/aws/aws-sdk-pandas)'s Redshift, MySQL and PostgreSQL have two basic function in common that tries to follow the Pandas conventions, but add more data type consistency.\n",
+    "[awswrangler](https://github.com/aws/aws-sdk-pandas)'s Redshift, MySQL and PostgreSQL have two basic functions in common that try to follow Pandas conventions, but add more data type consistency.\n",
     "\n",
     "- [wr.redshift.to_sql()](https://aws-sdk-pandas.readthedocs.io/en/2.17.0/stubs/awswrangler.redshift.to_sql.html)\n",
     "- [wr.redshift.read_sql_query()](https://aws-sdk-pandas.readthedocs.io/en/2.17.0/stubs/awswrangler.redshift.read_sql_query.html)\n",
diff --git a/tutorials/014 - Schema Evolution.ipynb b/tutorials/014 - Schema Evolution.ipynb
@@ -8,7 +8,7 @@
     "\n",
     "# 14 - Schema Evolution\n",
     "\n",
-    "awswrangler support new **columns** on Parquet and CSV datasets through:\n",
+    "awswrangler supports new **columns** on Parquet and CSV datasets through:\n",
     "\n",
     "- [wr.s3.to_parquet()](https://aws-sdk-pandas.readthedocs.io/en/2.17.0/stubs/awswrangler.s3.to_parquet.html#awswrangler.s3.to_parquet)\n",
     "- [wr.s3.store_parquet_metadata()](https://aws-sdk-pandas.readthedocs.io/en/2.17.0/stubs/awswrangler.s3.store_parquet_metadata.html#awswrangler.s3.store_parquet_metadata) i.e. \"Crawler\"\n",
diff --git a/tutorials/015 - EMR.ipynb b/tutorials/015 - EMR.ipynb
@@ -160,13 +160,6 @@
    "source": [
     "wr.emr.terminate_cluster(cluster_id)"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
diff --git a/tutorials/016 - EMR & Docker.ipynb b/tutorials/016 - EMR & Docker.ipynb
@@ -201,7 +201,7 @@
     "print(f\"awswrangler version: {wr.__version__}\")\n",
     "\"\"\"\n",
     "\n",
-    "boto3.client(\"s3\").put_object(Body=script, Bucket=bucket, Key=\"test_docker.py\");"
+    "boto3.client(\"s3\").put_object(Body=script, Bucket=bucket, Key=\"test_docker.py\")"
    ]
   },
   {
@@ -329,13 +329,6 @@
     "\n",
     "wr.emr.terminate_cluster(cluster_id)"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
diff --git a/tutorials/017 - Partition Projection.ipynb b/tutorials/017 - Partition Projection.ipynb
@@ -159,7 +159,7 @@
     "        \"month\": \"1,12\",\n",
     "        \"day\": \"1,31\"\n",
     "    },\n",
-    ");"
+    ")"
    ]
   },
   {
@@ -334,7 +334,7 @@
     "    projection_values={\n",
     "        \"city\": \"São Paulo,Tokio,Seattle\"\n",
     "    },\n",
-    ");"
+    ")"
    ]
   },
   {
@@ -511,7 +511,7 @@
     "        \"dt\": \"2020-01-01,2020-01-03\",\n",
     "        \"ts\": \"2020-01-01 00:00:00,2020-01-01 00:00:02\"\n",
     "    },\n",
-    ");"
+    ")"
    ]
   },
   {
@@ -679,7 +679,7 @@
     "    projection_types={\n",
     "        \"uuid\": \"injected\",\n",
     "    }\n",
-    ");"
+    ")"
    ]
   },
   {
diff --git a/tutorials/018 - QuickSight.ipynb b/tutorials/018 - QuickSight.ipynb
@@ -16,7 +16,7 @@
     "* [Exploring the public AWS COVID-19 data lake](https://aws.amazon.com/blogs/big-data/exploring-the-public-aws-covid-19-data-lake/)\n",
     "* [CloudFormation template](https://covid19-lake.s3.us-east-2.amazonaws.com/cfn/CovidLakeStack.template.json)\n",
     "\n",
-    "*Please, install the Cloudformation template above to have access to the public data lake.*\n",
+    "*Please, install the CloudFormation template above to have access to the public data lake.*\n",
     "\n",
     "*P.S. To be able to access the public data lake, you must allow explicitly QuickSight to access the related external bucket.*"
    ]
diff --git a/tutorials/019 - Athena Cache.ipynb b/tutorials/019 - Athena Cache.ipynb
@@ -8,13 +8,13 @@
     "\n",
     "# 19 - Amazon Athena Cache\n",
     "\n",
-    "[awswrangler](https://github.com/aws/aws-sdk-pandas) has a cache strategy that is disabled by default and can be enabled passing `max_cache_seconds` biggier than 0. This cache strategy for Amazon Athena can help you to **decrease query times and costs**.\n",
+    "[awswrangler](https://github.com/aws/aws-sdk-pandas) has a cache strategy that is disabled by default and can be enabled by passing `max_cache_seconds` bigger than 0. This cache strategy for Amazon Athena can help you to **decrease query times and costs**.\n",
     "\n",
     "When calling `read_sql_query`, instead of just running the query, we now can verify if the query has been run before. If so, and this last run was within `max_cache_seconds` (a new parameter to `read_sql_query`), we return the same results as last time if they are still available in S3. We have seen this increase performance more than 100x, but the potential is pretty much infinite.\n",
     "\n",
     "The detailed approach is:\n",
     "- When `read_sql_query` is called with `max_cache_seconds > 0` (it defaults to 0), we check for the last queries run by the same workgroup (the most we can get without pagination).\n",
-    "- By default it will check the last 50 queries, but you can customize it throught the `max_cache_query_inspections` argument.\n",
+    "- By default it will check the last 50 queries, but you can customize it through the `max_cache_query_inspections` argument.\n",
     "- We then sort those queries based on CompletionDateTime, descending\n",
     "- For each of those queries, we check if their CompletionDateTime is still within the `max_cache_seconds` window. If so, we check if the query string is the same as now (with some smart heuristics to guarantee coverage over both `ctas_approach`es). If they are the same, we check if the last one's results are still on S3, and then return them instead of re-running the query.\n",
     "- During the whole cache resolution phase, if there is anything wrong, the logic falls back to the usual `read_sql_query` path.\n",
@@ -292,7 +292,7 @@
     "    mode=\"overwrite\",\n",
     "    database=\"awswrangler_test\",\n",
     "    table=\"noaa\"\n",
-    ");"
+    ")"
    ]
   },
   {
diff --git a/tutorials/020 - Spark Table Interoperability.ipynb b/tutorials/020 - Spark Table Interoperability.ipynb
@@ -8,9 +8,9 @@
     "\n",
     "# 20 - Spark Table Interoperability\n",
     "\n",
-    "[awswrangler](https://github.com/aws/aws-sdk-pandas) has no difficults to insert, overwrite or do any other kind of interaction with a Table created by Apache Spark.\n",
+    "[awswrangler](https://github.com/aws/aws-sdk-pandas) has no difficulty to insert, overwrite or do any other kind of interaction with a Table created by Apache Spark.\n",
     "\n",
-    "But if you want to do the oposite (Spark interacting with a table created by awswrangler) you should be aware that awswrangler follows the Hive's format and you must be explicit when using the Spark's `saveAsTable` method:"
+    "But if you want to do the opposite (Spark interacting with a table created by awswrangler) you should be aware that awswrangler follows the Hive's format and you must be explicit when using the Spark's `saveAsTable` method:"
    ]
   },
   {
diff --git a/tutorials/022 - Writing Partitions Concurrently.ipynb b/tutorials/022 - Writing Partitions Concurrently.ipynb
@@ -11,7 +11,7 @@
     "* `concurrent_partitioning` argument:\n",
     "\n",
     "        If True will increase the parallelism level during the partitions writing. It will decrease the\n",
-    "        writing time and increase the memory usage.\n",
+    "        writing time and increase memory usage.\n",
     "\n",
     "*P.S. Check the [function API doc](https://aws-sdk-pandas.readthedocs.io/en/2.17.0/api.html) to see it has some argument that can be configured through Global configurations.*"
    ]
@@ -121,7 +121,7 @@
     "    dataset=True,\n",
     "    mode=\"overwrite\",\n",
     "    partition_cols=[\"year\"],\n",
-    ");"
+    ")"
    ]
   },
   {
@@ -157,15 +157,8 @@
     "    mode=\"overwrite\",\n",
     "    partition_cols=[\"year\"],\n",
     "    concurrent_partitioning=True  # <-----\n",
-    ");"
+    ")"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
diff --git a/tutorials/025 - Redshift - Loading Parquet files with Spectrum.ipynb b/tutorials/025 - Redshift - Loading Parquet files with Spectrum.ipynb
@@ -164,7 +164,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "wr.s3.to_parquet(df, PATH, max_rows_by_file=2, dataset=True, mode=\"overwrite\");"
+    "wr.s3.to_parquet(df, PATH, max_rows_by_file=2, dataset=True, mode=\"overwrite\")"
    ]
   },
   {
@@ -252,7 +252,7 @@
     "    \"col0\": [10, 11],\n",
     "    \"col1\": [\"k\", \"l\"],\n",
     "})\n",
-    "wr.s3.to_parquet(df, PATH, dataset=True, mode=\"overwrite\");"
+    "wr.s3.to_parquet(df, PATH, dataset=True, mode=\"overwrite\")"
    ]
   },
   {
diff --git a/tutorials/026 - Amazon Timestream.ipynb b/tutorials/026 - Amazon Timestream.ipynb
@@ -27,7 +27,7 @@
     "from datetime import datetime\n",
     "\n",
     "wr.timestream.create_database(\"sampleDB\")\n",
-    "wr.timestream.create_table(\"sampleDB\", \"sampleTable\", memory_retention_hours=1, magnetic_retention_days=1);"
+    "wr.timestream.create_table(\"sampleDB\", \"sampleTable\", memory_retention_hours=1, magnetic_retention_days=1)"
    ]
   },
   {
diff --git a/tutorials/027 - Amazon Timestream 2.ipynb b/tutorials/027 - Amazon Timestream 2.ipynb
@@ -101,7 +101,7 @@
    "outputs": [],
    "source": [
     "wr.timestream.create_database(\"sampleDB\")\n",
-    "wr.timestream.create_table(\"sampleDB\", \"sampleTable\", memory_retention_hours=1, magnetic_retention_days=1);"
+    "wr.timestream.create_table(\"sampleDB\", \"sampleTable\", memory_retention_hours=1, magnetic_retention_days=1)"
    ]
   },
   {
diff --git a/tutorials/033 - Amazon Neptune.ipynb b/tutorials/033 - Amazon Neptune.ipynb

Original file line number	Diff line number	Diff line change
`@@ -143,7 +143,7 @@`
`143`	`143`	`" mode=\"overwrite\",\n",`
`144`	`144`	`" database=\"awswrangler_test\",\n",`
`145`	`145`	`" table=\"noaa\"\n",`
`146`		`- ");"`
	`146`	`+ ")"`
`147`	`147`	`]`
`148`	`148`	`},`
`149`	`149`	`{`
Original file line number	Diff line number	Diff line change
`@@ -160,13 +160,6 @@`
`160`	`160`	`"source": [`
`161`	`161`	`"wr.emr.terminate_cluster(cluster_id)"`
`162`	`162`	`]`
`163`		`- },`
`164`		`- {`
`165`		`- "cell_type": "code",`
`166`		`- "execution_count": null,`
`167`		`- "metadata": {},`
`168`		`- "outputs": [],`
`169`		`- "source": []`
`170`	`163`	`}`
`171`	`164`	`],`
`172`	`165`	`"metadata": {`
Original file line number	Diff line number	Diff line change
`@@ -201,7 +201,7 @@`
`201`	`201`	`"print(f\"awswrangler version: {wr.__version__}\")\n",`
`202`	`202`	`"\"\"\"\n",`
`203`	`203`	`"\n",`
`204`		`- "boto3.client(\"s3\").put_object(Body=script, Bucket=bucket, Key=\"test_docker.py\");"`
	`204`	`+ "boto3.client(\"s3\").put_object(Body=script, Bucket=bucket, Key=\"test_docker.py\")"`
`205`	`205`	`]`
`206`	`206`	`},`
`207`	`207`	`{`
`@@ -329,13 +329,6 @@`
`329`	`329`	`"\n",`
`330`	`330`	`"wr.emr.terminate_cluster(cluster_id)"`
`331`	`331`	`]`
`332`		`- },`
`333`		`- {`
`334`		`- "cell_type": "code",`
`335`		`- "execution_count": null,`
`336`		`- "metadata": {},`
`337`		`- "outputs": [],`
`338`		`- "source": []`
`339`	`332`	`}`
`340`	`333`	`],`
`341`	`334`	`"metadata": {`
Original file line number	Diff line number	Diff line change
`@@ -159,7 +159,7 @@`
`159`	`159`	`" \"month\": \"1,12\",\n",`
`160`	`160`	`" \"day\": \"1,31\"\n",`
`161`	`161`	`" },\n",`
`162`		`- ");"`
	`162`	`+ ")"`
`163`	`163`	`]`
`164`	`164`	`},`
`165`	`165`	`{`
`@@ -334,7 +334,7 @@`
`334`	`334`	`" projection_values={\n",`
`335`	`335`	`" \"city\": \"São Paulo,Tokio,Seattle\"\n",`
`336`	`336`	`" },\n",`
`337`		`- ");"`
	`337`	`+ ")"`
`338`	`338`	`]`
`339`	`339`	`},`
`340`	`340`	`{`
`@@ -511,7 +511,7 @@`
`511`	`511`	`" \"dt\": \"2020-01-01,2020-01-03\",\n",`
`512`	`512`	`" \"ts\": \"2020-01-01 00:00:00,2020-01-01 00:00:02\"\n",`
`513`	`513`	`" },\n",`
`514`		`- ");"`
	`514`	`+ ")"`
`515`	`515`	`]`
`516`	`516`	`},`
`517`	`517`	`{`
`@@ -679,7 +679,7 @@`
`679`	`679`	`" projection_types={\n",`
`680`	`680`	`" \"uuid\": \"injected\",\n",`
`681`	`681`	`" }\n",`
`682`		`- ");"`
	`682`	`+ ")"`
`683`	`683`	`]`
`684`	`684`	`},`
`685`	`685`	`{`
Original file line number	Diff line number	Diff line change
`@@ -16,7 +16,7 @@`
`16`	`16`	`"* [Exploring the public AWS COVID-19 data lake](https://aws.amazon.com/blogs/big-data/exploring-the-public-aws-covid-19-data-lake/)\n",`
`17`	`17`	`"* [CloudFormation template](https://covid19-lake.s3.us-east-2.amazonaws.com/cfn/CovidLakeStack.template.json)\n",`
`18`	`18`	`"\n",`
`19`		`- "Please, install the Cloudformation template above to have access to the public data lake.\n",`
	`19`	`+ "Please, install the CloudFormation template above to have access to the public data lake.\n",`
`20`	`20`	`"\n",`
`21`	`21`	`"P.S. To be able to access the public data lake, you must allow explicitly QuickSight to access the related external bucket."`
`22`	`22`	`]`
Original file line number	Diff line number	Diff line change
`@@ -8,9 +8,9 @@`
`8`	`8`	`"\n",`
`9`	`9`	`"# 20 - Spark Table Interoperability\n",`
`10`	`10`	`"\n",`
`11`		`- "[awswrangler](https://github.com/aws/aws-sdk-pandas) has no difficults to insert, overwrite or do any other kind of interaction with a Table created by Apache Spark.\n",`
	`11`	`+ "[awswrangler](https://github.com/aws/aws-sdk-pandas) has no difficulty to insert, overwrite or do any other kind of interaction with a Table created by Apache Spark.\n",`
`12`	`12`	`"\n",`
`13`		- "But if you want to do the oposite (Spark interacting with a table created by awswrangler) you should be aware that awswrangler follows the Hive's format and you must be explicit when using the Spark's `saveAsTable` method:"
	`13`	+ "But if you want to do the opposite (Spark interacting with a table created by awswrangler) you should be aware that awswrangler follows the Hive's format and you must be explicit when using the Spark's `saveAsTable` method:"
`14`	`14`	`]`
`15`	`15`	`},`
`16`	`16`	`{`
Original file line number	Diff line number	Diff line change
`@@ -164,7 +164,7 @@`
`164`	`164`	`"metadata": {},`
`165`	`165`	`"outputs": [],`
`166`	`166`	`"source": [`
`167`		`- "wr.s3.to_parquet(df, PATH, max_rows_by_file=2, dataset=True, mode=\"overwrite\");"`
	`167`	`+ "wr.s3.to_parquet(df, PATH, max_rows_by_file=2, dataset=True, mode=\"overwrite\")"`
`168`	`168`	`]`
`169`	`169`	`},`
`170`	`170`	`{`
`@@ -252,7 +252,7 @@`
`252`	`252`	`" \"col0\": [10, 11],\n",`
`253`	`253`	`" \"col1\": [\"k\", \"l\"],\n",`
`254`	`254`	`"})\n",`
`255`		`- "wr.s3.to_parquet(df, PATH, dataset=True, mode=\"overwrite\");"`
	`255`	`+ "wr.s3.to_parquet(df, PATH, dataset=True, mode=\"overwrite\")"`
`256`	`256`	`]`
`257`	`257`	`},`
`258`	`258`	`{`
Original file line number	Diff line number	Diff line change
`@@ -27,7 +27,7 @@`
`27`	`27`	`"from datetime import datetime\n",`
`28`	`28`	`"\n",`
`29`	`29`	`"wr.timestream.create_database(\"sampleDB\")\n",`
`30`		`- "wr.timestream.create_table(\"sampleDB\", \"sampleTable\", memory_retention_hours=1, magnetic_retention_days=1);"`
	`30`	`+ "wr.timestream.create_table(\"sampleDB\", \"sampleTable\", memory_retention_hours=1, magnetic_retention_days=1)"`
`31`	`31`	`]`
`32`	`32`	`},`
`33`	`33`	`{`
Original file line number	Diff line number	Diff line change
`@@ -101,7 +101,7 @@`
`101`	`101`	`"outputs": [],`
`102`	`102`	`"source": [`
`103`	`103`	`"wr.timestream.create_database(\"sampleDB\")\n",`
`104`		`- "wr.timestream.create_table(\"sampleDB\", \"sampleTable\", memory_retention_hours=1, magnetic_retention_days=1);"`
	`104`	`+ "wr.timestream.create_table(\"sampleDB\", \"sampleTable\", memory_retention_hours=1, magnetic_retention_days=1)"`
`105`	`105`	`]`
`106`	`106`	`},`
`107`	`107`	`{`