diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 29ff08391e0e4..903a19be80f45 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -315,7 +315,7 @@ def read_sql_table(table_name, con, schema=None, index_col=None, except sqlalchemy.exc.InvalidRequestError: raise ValueError("Table %s not found" % table_name) - pandas_sql = PandasSQLAlchemy(con, meta=meta) + pandas_sql = SQLDatabase(con, meta=meta) table = pandas_sql.read_table( table_name, index_col=index_col, coerce_float=coerce_float, parse_dates=parse_dates, columns=columns) @@ -374,7 +374,7 @@ def read_sql_query(sql, con, index_col=None, coerce_float=True, params=None, """ pandas_sql = pandasSQL_builder(con) - return pandas_sql.read_sql( + return pandas_sql.read_query( sql, index_col=index_col, params=params, coerce_float=coerce_float, parse_dates=parse_dates) @@ -388,7 +388,7 @@ def read_sql(sql, con, index_col=None, coerce_float=True, params=None, ---------- sql : string SQL query to be executed or database table name. - con : SQLAlchemy engine or DBAPI2 connection (legacy mode) + con : SQLAlchemy engine or DBAPI2 connection (fallback mode) Using SQLAlchemy makes it possible to use any DB supported by that library. If a DBAPI2 object, only sqlite3 is supported. @@ -435,8 +435,8 @@ def read_sql(sql, con, index_col=None, coerce_float=True, params=None, """ pandas_sql = pandasSQL_builder(con) - if isinstance(pandas_sql, PandasSQLLegacy): - return pandas_sql.read_sql( + if isinstance(pandas_sql, SQLiteDatabase): + return pandas_sql.read_query( sql, index_col=index_col, params=params, coerce_float=coerce_float, parse_dates=parse_dates) @@ -451,7 +451,7 @@ def read_sql(sql, con, index_col=None, coerce_float=True, params=None, sql, index_col=index_col, coerce_float=coerce_float, parse_dates=parse_dates, columns=columns) else: - return pandas_sql.read_sql( + return pandas_sql.read_query( sql, index_col=index_col, params=params, coerce_float=coerce_float, parse_dates=parse_dates) @@ -551,14 +551,14 @@ def pandasSQL_builder(con, flavor=None, schema=None, meta=None, # When support for DBAPI connections is removed, # is_cursor should not be necessary. if _is_sqlalchemy_engine(con): - return PandasSQLAlchemy(con, schema=schema, meta=meta) + return SQLDatabase(con, schema=schema, meta=meta) else: if flavor == 'mysql': warnings.warn(_MYSQL_WARNING, FutureWarning) - return PandasSQLLegacy(con, flavor, is_cursor=is_cursor) + return SQLiteDatabase(con, flavor, is_cursor=is_cursor) -class PandasSQLTable(PandasObject): +class SQLTable(PandasObject): """ For mapping Pandas tables to SQL tables. Uses fact that table is reflected by SQLAlchemy to @@ -890,10 +890,24 @@ def to_sql(self, *args, **kwargs): " or connection+sql flavor") -class PandasSQLAlchemy(PandasSQL): +class SQLDatabase(PandasSQL): """ This class enables convertion between DataFrame and SQL databases using SQLAlchemy to handle DataBase abstraction + + Parameters + ---------- + engine : SQLAlchemy engine + Engine to connect with the database. Using SQLAlchemy makes it possible to use any DB supported by that + library. + schema : string, default None + Name of SQL schema in database to write to (if database flavor + supports this). If None, use default schema (default). + meta : SQLAlchemy MetaData object, default None + If provided, this MetaData object is used instead of a newly + created. This allows to specify database flavor specific + arguments in the MetaData object. + """ def __init__(self, engine, schema=None, meta=None): @@ -913,13 +927,86 @@ def execute(self, *args, **kwargs): def read_table(self, table_name, index_col=None, coerce_float=True, parse_dates=None, columns=None, schema=None): - table = PandasSQLTable( - table_name, self, index=index_col, schema=schema) + """Read SQL database table into a DataFrame. + + Parameters + ---------- + table_name : string + Name of SQL table in database + index_col : string, optional + Column to set as index + coerce_float : boolean, default True + Attempt to convert values to non-string, non-numeric objects (like + decimal.Decimal) to floating point. Can result in loss of Precision. + parse_dates : list or dict + - List of column names to parse as dates + - Dict of ``{column_name: format string}`` where format string is + strftime compatible in case of parsing string times or is one of + (D, s, ns, ms, us) in case of parsing integer timestamps + - Dict of ``{column_name: arg dict}``, where the arg dict corresponds + to the keyword arguments of :func:`pandas.to_datetime` + Especially useful with databases without native Datetime support, + such as SQLite + columns : list + List of column names to select from sql table + schema : string, default None + Name of SQL schema in database to query (if database flavor + supports this). If specified, this overwrites the default + schema of the SQLDatabase object. + + Returns + ------- + DataFrame + + See also + -------- + pandas.read_sql_table + SQLDatabase.read_query + + """ + table = SQLTable(table_name, self, index=index_col, schema=schema) return table.read(coerce_float=coerce_float, parse_dates=parse_dates, columns=columns) - - def read_sql(self, sql, index_col=None, coerce_float=True, + + def read_query(self, sql, index_col=None, coerce_float=True, parse_dates=None, params=None): + """Read SQL query into a DataFrame. + + Parameters + ---------- + sql : string + SQL query to be executed + index_col : string, optional + Column name to use as index for the returned DataFrame object. + coerce_float : boolean, default True + Attempt to convert values to non-string, non-numeric objects (like + decimal.Decimal) to floating point, useful for SQL result sets + params : list, tuple or dict, optional + List of parameters to pass to execute method. The syntax used + to pass parameters is database driver dependent. Check your + database driver documentation for which of the five syntax styles, + described in PEP 249's paramstyle, is supported. + Eg. for psycopg2, uses %(name)s so use params={'name' : 'value'} + parse_dates : list or dict + - List of column names to parse as dates + - Dict of ``{column_name: format string}`` where format string is + strftime compatible in case of parsing string times or is one of + (D, s, ns, ms, us) in case of parsing integer timestamps + - Dict of ``{column_name: arg dict}``, where the arg dict corresponds + to the keyword arguments of :func:`pandas.to_datetime` + Especially useful with databases without native Datetime support, + such as SQLite + + Returns + ------- + DataFrame + + See also + -------- + read_sql_table : Read SQL database table into a DataFrame + read_sql + + """ args = _convert_params(sql, params) result = self.execute(*args) @@ -935,12 +1022,41 @@ def read_sql(self, sql, index_col=None, coerce_float=True, data_frame.set_index(index_col, inplace=True) return data_frame + + read_sql = read_query def to_sql(self, frame, name, if_exists='fail', index=True, index_label=None, schema=None, chunksize=None): - table = PandasSQLTable( - name, self, frame=frame, index=index, if_exists=if_exists, - index_label=index_label, schema=schema) + """ + Write records stored in a DataFrame to a SQL database. + + Parameters + ---------- + frame : DataFrame + name : string + Name of SQL table + if_exists : {'fail', 'replace', 'append'}, default 'fail' + - fail: If table exists, do nothing. + - replace: If table exists, drop it, recreate it, and insert data. + - append: If table exists, insert data. Create if does not exist. + index : boolean, default True + Write DataFrame index as a column + index_label : string or sequence, default None + Column label for index column(s). If None is given (default) and + `index` is True, then the index names are used. + A sequence should be given if the DataFrame uses MultiIndex. + schema : string, default None + Name of SQL schema in database to write to (if database flavor + supports this). If specified, this overwrites the default + schema of the SQLDatabase object. + chunksize : int, default None + If not None, then rows will be written in batches of this size at a + time. If None, all rows will be written at once. + + """ + table = SQLTable(name, self, frame=frame, index=index, + if_exists=if_exists, index_label=index_label, + schema=schema) table.create() table.insert(chunksize) # check for potentially case sensitivity issues (GH7815) @@ -972,8 +1088,7 @@ def drop_table(self, table_name, schema=None): self.meta.clear() def _create_sql_schema(self, frame, table_name, keys=None): - table = PandasSQLTable(table_name, self, frame=frame, index=False, - keys=keys) + table = SQLTable(table_name, self, frame=frame, index=False, keys=keys) return str(table.sql_schema()) @@ -1032,9 +1147,9 @@ def _create_sql_schema(self, frame, table_name, keys=None): "underscores.") -class PandasSQLTableLegacy(PandasSQLTable): +class SQLiteTable(SQLTable): """ - Patch the PandasSQLTable for legacy support. + Patch the SQLTable for fallback support. Instead of a table variable just use the Create Table statement. """ @@ -1135,7 +1250,19 @@ def _sql_type_name(self, col): return _SQL_TYPES[pytype_name][self.pd_sql.flavor] -class PandasSQLLegacy(PandasSQL): +class SQLiteDatabase(PandasSQL): + """ + Version of SQLDatabase to support sqlite connections (fallback without + sqlalchemy). This should only be used internally. + + For now still supports `flavor` argument to deal with 'mysql' database + for backwards compatibility, but this will be removed in future versions. + + Parameters + ---------- + con : sqlite connection object + + """ def __init__(self, con, flavor, is_cursor=False): self.is_cursor = is_cursor @@ -1180,7 +1307,7 @@ def execute(self, *args, **kwargs): ex = DatabaseError("Execution failed on sql '%s': %s" % (args[0], exc)) raise_with_traceback(ex) - def read_sql(self, sql, index_col=None, coerce_float=True, params=None, + def read_query(self, sql, index_col=None, coerce_float=True, params=None, parse_dates=None): args = _convert_params(sql, params) cursor = self.execute(*args) @@ -1196,7 +1323,7 @@ def read_sql(self, sql, index_col=None, coerce_float=True, params=None, if index_col is not None: data_frame.set_index(index_col, inplace=True) return data_frame - + def _fetchall_as_list(self, cur): result = cur.fetchall() if not isinstance(result, list): @@ -1230,9 +1357,8 @@ def to_sql(self, frame, name, if_exists='fail', index=True, size at a time. If None, all rows will be written at once. """ - table = PandasSQLTableLegacy( - name, self, frame=frame, index=index, if_exists=if_exists, - index_label=index_label) + table = SQLiteTable(name, self, frame=frame, index=index, + if_exists=if_exists, index_label=index_label) table.create() table.insert(chunksize) @@ -1246,15 +1372,15 @@ def has_table(self, name, schema=None): return len(self.execute(query).fetchall()) > 0 def get_table(self, table_name, schema=None): - return None # not supported in Legacy mode + return None # not supported in fallback mode def drop_table(self, name, schema=None): drop_sql = "DROP TABLE %s" % name self.execute(drop_sql) def _create_sql_schema(self, frame, table_name, keys=None): - table = PandasSQLTableLegacy(table_name, self, frame=frame, - index=False, keys=keys) + table = SQLiteTable(table_name, self, frame=frame, index=False, + keys=keys) return str(table.sql_schema()) diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index 217114a00e980..c2d75f3ff2611 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -6,12 +6,12 @@ - Tests for the public API (only tests with sqlite3) - `_TestSQLApi` base class - `TestSQLApi`: test the public API with sqlalchemy engine - - `TesySQLLegacyApi`: test the public API with DBAPI connection + - `TesySQLiteFallbackApi`: test the public API with a sqlite DBAPI connection - Tests for the different SQL flavors (flavor specific type conversions) - Tests for the sqlalchemy mode: `_TestSQLAlchemy` is the base class with common methods, the different tested flavors (sqlite3, MySQL, PostgreSQL) derive from the base class - - Tests for the legacy mode (`TestSQLiteLegacy` and `TestMySQLLegacy`) + - Tests for the fallback mode (`TestSQLiteFallback` and `TestMySQLLegacy`) """ @@ -228,19 +228,19 @@ def _count_rows(self, table_name): return result[0] def _read_sql_iris(self): - iris_frame = self.pandasSQL.read_sql("SELECT * FROM iris") + iris_frame = self.pandasSQL.read_query("SELECT * FROM iris") self._check_iris_loaded_frame(iris_frame) def _read_sql_iris_parameter(self): query = SQL_STRINGS['read_parameters'][self.flavor] params = ['Iris-setosa', 5.1] - iris_frame = self.pandasSQL.read_sql(query, params=params) + iris_frame = self.pandasSQL.read_query(query, params=params) self._check_iris_loaded_frame(iris_frame) def _read_sql_iris_named_parameter(self): query = SQL_STRINGS['read_named_parameters'][self.flavor] params = {'name': 'Iris-setosa', 'length': 5.1} - iris_frame = self.pandasSQL.read_sql(query, params=params) + iris_frame = self.pandasSQL.read_query(query, params=params) self._check_iris_loaded_frame(iris_frame) def _to_sql(self): @@ -313,7 +313,7 @@ def _to_sql_append(self): def _roundtrip(self): self.drop_table('test_frame_roundtrip') self.pandasSQL.to_sql(self.test_frame1, 'test_frame_roundtrip') - result = self.pandasSQL.read_sql('SELECT * FROM test_frame_roundtrip') + result = self.pandasSQL.read_query('SELECT * FROM test_frame_roundtrip') result.set_index('level_0', inplace=True) # result.index.astype(int) @@ -348,13 +348,13 @@ def _transaction_test(self): except: # ignore raised exception pass - res = self.pandasSQL.read_sql('SELECT * FROM test_trans') + res = self.pandasSQL.read_query('SELECT * FROM test_trans') self.assertEqual(len(res), 0) # Make sure when transaction is committed, rows do get inserted with self.pandasSQL.run_transaction() as trans: trans.execute(ins_sql) - res2 = self.pandasSQL.read_sql('SELECT * FROM test_trans') + res2 = self.pandasSQL.read_query('SELECT * FROM test_trans') self.assertEqual(len(res2), 1) @@ -367,7 +367,7 @@ class _TestSQLApi(PandasSQLTest): Base class to test the public API. From this two classes are derived to run these tests for both the - sqlalchemy mode (`TestSQLApi`) and the legacy mode (`TestSQLLegacyApi`). + sqlalchemy mode (`TestSQLApi`) and the fallback mode (`TestSQLiteFallbackApi`). These tests are run with sqlite3. Specific tests for the different sql flavours are included in `_TestSQLAlchemy`. @@ -736,9 +736,9 @@ def _get_index_columns(self, tbl_name): return ixs -class TestSQLLegacyApi(_TestSQLApi): +class TestSQLiteFallbackApi(_TestSQLApi): """ - Test the public legacy API + Test the public sqlite connection fallback API """ flavor = 'sqlite' @@ -833,7 +833,7 @@ def connect(self): def setup_connect(self): try: self.conn = self.connect() - self.pandasSQL = sql.PandasSQLAlchemy(self.conn) + self.pandasSQL = sql.SQLDatabase(self.conn) # to test if connection can be made: self.conn.connect() except sqlalchemy.exc.OperationalError: @@ -871,7 +871,7 @@ def test_create_table(self): temp_frame = DataFrame( {'one': [1., 2., 3., 4.], 'two': [4., 3., 2., 1.]}) - pandasSQL = sql.PandasSQLAlchemy(temp_conn) + pandasSQL = sql.SQLDatabase(temp_conn) pandasSQL.to_sql(temp_frame, 'temp_frame') self.assertTrue( @@ -883,7 +883,7 @@ def test_drop_table(self): temp_frame = DataFrame( {'one': [1., 2., 3., 4.], 'two': [4., 3., 2., 1.]}) - pandasSQL = sql.PandasSQLAlchemy(temp_conn) + pandasSQL = sql.SQLDatabase(temp_conn) pandasSQL.to_sql(temp_frame, 'temp_frame') self.assertTrue( @@ -1302,7 +1302,7 @@ def test_schema_support(self): engine2 = self.connect() meta = sqlalchemy.MetaData(engine2, schema='other') - pdsql = sql.PandasSQLAlchemy(engine2, meta=meta) + pdsql = sql.SQLDatabase(engine2, meta=meta) pdsql.to_sql(df, 'test_schema_other2', index=False) pdsql.to_sql(df, 'test_schema_other2', index=False, if_exists='replace') pdsql.to_sql(df, 'test_schema_other2', index=False, if_exists='append') @@ -1314,9 +1314,9 @@ def test_schema_support(self): #------------------------------------------------------------------------------ #--- Test Sqlite / MySQL fallback -class TestSQLiteLegacy(PandasSQLTest): +class TestSQLiteFallback(PandasSQLTest): """ - Test the legacy mode against an in-memory sqlite database. + Test the fallback mode against an in-memory sqlite database. """ flavor = 'sqlite' @@ -1331,7 +1331,7 @@ def drop_table(self, table_name): def setUp(self): self.conn = self.connect() - self.pandasSQL = sql.PandasSQLLegacy(self.conn, 'sqlite') + self.pandasSQL = sql.SQLiteDatabase(self.conn, 'sqlite') self._load_iris_data() @@ -1339,7 +1339,7 @@ def setUp(self): def test_invalid_flavor(self): self.assertRaises( - NotImplementedError, sql.PandasSQLLegacy, self.conn, 'oracle') + NotImplementedError, sql.SQLiteDatabase, self.conn, 'oracle') def test_read_sql(self): self._read_sql_iris() @@ -1417,7 +1417,7 @@ def test_to_sql_save_index(self): def test_transactions(self): self._transaction_test() -class TestMySQLLegacy(TestSQLiteLegacy): +class TestMySQLLegacy(TestSQLiteFallback): """ Test the legacy mode against a MySQL database. @@ -1451,7 +1451,7 @@ def setUp(self): except self.driver.err.OperationalError: raise nose.SkipTest("Can't connect to MySQL server") - self.pandasSQL = sql.PandasSQLLegacy(self.conn, 'mysql') + self.pandasSQL = sql.SQLiteDatabase(self.conn, 'mysql') self._load_iris_data() self._load_test1_data()