| Title: | Load Data in SQLite from Tabular Files |
|---|---|
| Description: | A lightweight wrapper around the 'RSQLite' package for streamlined loading of data from tabular files (i,e. text delimited files like Comma Separated Values and Tab Separated Values, Microsoft Excel, and Arrow Inter-process Communication files) in 'SQLite' databases. Includes helper functions for inspecting the structure of the input files, and some functions to simplify activities on the 'SQLite' tables. |
| Authors: | Ludovico G. Beretta [aut, cre, cph] |
| Maintainer: | Ludovico G. Beretta <[email protected]> |
| License: | GPL (>= 3) |
| Version: | 0.1.2.9040 |
| Built: | 2026-06-08 14:53:55 UTC |
| Source: | https://github.com/fab-algo/rsqlite.toolkit |
The dbCopyTable() function can be used to create a copy of the data in
a table of a SQLite database in another database. The data can be appended
to an already existing table (with the same name of the source one), or
a new table can be created. It is possible to move also the indexes
from source to target.
dbCopyTable( db_file_src, db_file_tgt, table_name, drop_table = FALSE, copy_indexes = FALSE )dbCopyTable( db_file_src, db_file_tgt, table_name, drop_table = FALSE, copy_indexes = FALSE )
db_file_src |
character, the file name (including path) of the source database containing the table to be copied. |
db_file_tgt |
character, the file name (including path) of the target database where the table will be copied. |
table_name |
character, the table name. |
drop_table |
logical, if |
copy_indexes |
logical, if |
nothing
db_source <- tempfile(fileext = ".sqlite") db_target <- tempfile(fileext = ".sqlite") # Load some sample data dbcon <- dbConnect(RSQLite::SQLite(), db_source) data_path <- system.file("extdata", package = "RSQLite.toolkit") dbTableFromDSV( input_file = file.path(data_path, "abalone.csv"), dbcon = dbcon, table_name = "ABALONE", drop_table = TRUE, auto_pk = TRUE, header = TRUE, sep = ",", dec = "." ) dbDisconnect(dbcon) # Copy the table to a new database, recreating it # if it already exists and copying indexes dbCopyTable( db_file_src = db_source, db_file_tgt = db_target, table_name = "ABALONE", drop_table = TRUE, # Recreate table if it exists copy_indexes = TRUE # Copy indexes too ) # Check that the table was copied correctly dbcon_tgt <- dbConnect(RSQLite::SQLite(), db_target) print(dbListTables(dbcon_tgt)) print(dbListFields(dbcon_tgt, "ABALONE")) print(dbGetQuery(dbcon_tgt, "SELECT COUNT(*) AS TOTAL_ROWS FROM ABALONE;")) dbDisconnect(dbcon_tgt) # Clean up temporary database files unlink(c(db_source, db_target))db_source <- tempfile(fileext = ".sqlite") db_target <- tempfile(fileext = ".sqlite") # Load some sample data dbcon <- dbConnect(RSQLite::SQLite(), db_source) data_path <- system.file("extdata", package = "RSQLite.toolkit") dbTableFromDSV( input_file = file.path(data_path, "abalone.csv"), dbcon = dbcon, table_name = "ABALONE", drop_table = TRUE, auto_pk = TRUE, header = TRUE, sep = ",", dec = "." ) dbDisconnect(dbcon) # Copy the table to a new database, recreating it # if it already exists and copying indexes dbCopyTable( db_file_src = db_source, db_file_tgt = db_target, table_name = "ABALONE", drop_table = TRUE, # Recreate table if it exists copy_indexes = TRUE # Copy indexes too ) # Check that the table was copied correctly dbcon_tgt <- dbConnect(RSQLite::SQLite(), db_target) print(dbListTables(dbcon_tgt)) print(dbListFields(dbcon_tgt, "ABALONE")) print(dbGetQuery(dbcon_tgt, "SELECT COUNT(*) AS TOTAL_ROWS FROM ABALONE;")) dbDisconnect(dbcon_tgt) # Clean up temporary database files unlink(c(db_source, db_target))
The dbCreatePK() function creates a UNIQUE INDEX named
<table_name>_PK on the table specified by table_name in
the database connected by dbcon. The index is created on
the fields specified in the pk_fields argument.
dbCreatePK(dbcon, table_name, pk_fields, drop_index = FALSE)dbCreatePK(dbcon, table_name, pk_fields, drop_index = FALSE)
dbcon |
database connection, as created by the dbConnect function. |
table_name |
character, the name of the table where the index will be created. |
pk_fields |
character vector, the list of the fields' names that
define the |
drop_index |
logical, if |
nothing
# Create a database and table, then add a primary key library(RSQLite.toolkit) # Set up database connection dbcon <- dbConnect(RSQLite::SQLite(), file.path(tempdir(), "example.sqlite")) # Load sample data data_path <- system.file("extdata", package = "RSQLite.toolkit") dbTableFromFeather( input_file = file.path(data_path, "penguins.feather"), dbcon = dbcon, table_name = "PENGUINS", drop_table = TRUE ) dbGetQuery(dbcon, "select species, sex, body_mass_g, culmen_length_mm, culmen_depth_mm from PENGUINS group by species, sex, body_mass_g, culmen_length_mm, culmen_depth_mm having count(*) > 1") # Create a primary key on multiple fields dbCreatePK(dbcon, "PENGUINS", c("species", "sex", "body_mass_g", "culmen_length_mm", "culmen_depth_mm")) # Check that the index was created dbGetQuery(dbcon, "SELECT name, sql FROM sqlite_master WHERE type='index' AND tbl_name='PENGUINS'") # Clean up dbDisconnect(dbcon)# Create a database and table, then add a primary key library(RSQLite.toolkit) # Set up database connection dbcon <- dbConnect(RSQLite::SQLite(), file.path(tempdir(), "example.sqlite")) # Load sample data data_path <- system.file("extdata", package = "RSQLite.toolkit") dbTableFromFeather( input_file = file.path(data_path, "penguins.feather"), dbcon = dbcon, table_name = "PENGUINS", drop_table = TRUE ) dbGetQuery(dbcon, "select species, sex, body_mass_g, culmen_length_mm, culmen_depth_mm from PENGUINS group by species, sex, body_mass_g, culmen_length_mm, culmen_depth_mm having count(*) > 1") # Create a primary key on multiple fields dbCreatePK(dbcon, "PENGUINS", c("species", "sex", "body_mass_g", "culmen_length_mm", "culmen_depth_mm")) # Check that the index was created dbGetQuery(dbcon, "SELECT name, sql FROM sqlite_master WHERE type='index' AND tbl_name='PENGUINS'") # Clean up dbDisconnect(dbcon)
The dbExecFile() function executes the SQL statements contained
in a text file.
This function reads the text in input_file, strips all comment lines
(i.e. all lines beginning with -- characters) and splits the SQL statements
assuming that they are separated by the ; character. The list of SQL
statements is then executed, one at a time; the results of each statement
are stored in a list with length equal to the number of statements.
dbExecFile(input_file, dbcon, plist = NULL)dbExecFile(input_file, dbcon, plist = NULL)
input_file |
the file name (including path) containing the SQL statements to be executed |
dbcon |
database connection, as created by the dbConnect function. |
plist |
a list with values to be binded to the parameters of
SQL statements. It should have the same length as the number of SQL
statements. If any of the statements do not require parameters,
the corresponding element of the list should be set to |
a list with the results returned by each statement executed.
# Create a database and execute SQL from a file library(RSQLite.toolkit) # Set up database connection dbcon <- dbConnect(RSQLite::SQLite(), file.path(tempdir(), "example.sqlite")) # Load some sample data data_path <- system.file("extdata", package = "RSQLite.toolkit") dbTableFromDSV( input_file = file.path(data_path, "abalone.csv"), dbcon = dbcon, table_name = "ABALONE", drop_table = TRUE, auto_pk = TRUE, header = TRUE, sep = ",", dec = "." ) # Create a SQL file with multiple statements sql_content <- " -- Create a summary table DROP TABLE IF EXISTS ABALONE_SUMMARY; CREATE TABLE ABALONE_SUMMARY AS SELECT SEX, COUNT(*) as TOTAL_COUNT, ROUND(AVG(LENGTH), 3) as AVG_LENGTH, ROUND(AVG(WHOLE), 3) as AVG_WEIGHT FROM ABALONE GROUP BY SEX; -- Query the results SELECT * FROM ABALONE_SUMMARY ORDER BY SEX; -- Parameterized query example SELECT SEX, COUNT(*) as COUNT FROM ABALONE WHERE LENGTH > :min_length GROUP BY SEX; " sql_file <- tempfile(fileext = ".sql") writeLines(sql_content, sql_file) # Execute SQL statements with parameters plist <- list( NULL, # DROP TABLE statement (no parameters) NULL, # CREATE TABLE statement (no parameters) NULL, # First SELECT (no parameters) list(min_length = 0.5) # Parameterized SELECT ) results <- dbExecFile( input_file = sql_file, dbcon = dbcon, plist = plist ) # Check results print(results[[3]]) # Summary data print(results[[4]]) # Filtered data # Clean up unlink(sql_file) dbDisconnect(dbcon)# Create a database and execute SQL from a file library(RSQLite.toolkit) # Set up database connection dbcon <- dbConnect(RSQLite::SQLite(), file.path(tempdir(), "example.sqlite")) # Load some sample data data_path <- system.file("extdata", package = "RSQLite.toolkit") dbTableFromDSV( input_file = file.path(data_path, "abalone.csv"), dbcon = dbcon, table_name = "ABALONE", drop_table = TRUE, auto_pk = TRUE, header = TRUE, sep = ",", dec = "." ) # Create a SQL file with multiple statements sql_content <- " -- Create a summary table DROP TABLE IF EXISTS ABALONE_SUMMARY; CREATE TABLE ABALONE_SUMMARY AS SELECT SEX, COUNT(*) as TOTAL_COUNT, ROUND(AVG(LENGTH), 3) as AVG_LENGTH, ROUND(AVG(WHOLE), 3) as AVG_WEIGHT FROM ABALONE GROUP BY SEX; -- Query the results SELECT * FROM ABALONE_SUMMARY ORDER BY SEX; -- Parameterized query example SELECT SEX, COUNT(*) as COUNT FROM ABALONE WHERE LENGTH > :min_length GROUP BY SEX; " sql_file <- tempfile(fileext = ".sql") writeLines(sql_content, sql_file) # Execute SQL statements with parameters plist <- list( NULL, # DROP TABLE statement (no parameters) NULL, # CREATE TABLE statement (no parameters) NULL, # First SELECT (no parameters) list(min_length = 0.5) # Parameterized SELECT ) results <- dbExecFile( input_file = sql_file, dbcon = dbcon, plist = plist ) # Check results print(results[[3]]) # Summary data print(results[[4]]) # Filtered data # Clean up unlink(sql_file) dbDisconnect(dbcon)
The dbTableFromDataFrame() function reads the data from a rectangula region
of a sheet in an Excel file and copies it to a table in a SQLite
database. If table does not exist, it will create it.
dbTableFromDataFrame( df, dbcon, table_name, id_quote_method = "DB_NAMES", col_names = NULL, col_types = NULL, drop_table = FALSE, auto_pk = FALSE, build_pk = FALSE, pk_fields = NULL )dbTableFromDataFrame( df, dbcon, table_name, id_quote_method = "DB_NAMES", col_names = NULL, col_types = NULL, drop_table = FALSE, auto_pk = FALSE, build_pk = FALSE, pk_fields = NULL )
df |
the data frame to be saved in the SQLite table. |
dbcon |
database connection, as created by the dbConnect function. |
table_name |
character, the name of the table. |
id_quote_method |
character, used to specify how to build the SQLite
columns' names using the fields' identifiers read from the input file.
For details see the description of the |
col_names |
character vector, names of the columuns to be imported.
Used to override the field names derived from the data frame (using the
quote method selected by |
col_types |
character vector of classes to be assumed for the columns.
If not null, it will override the data types inferred from the input data
frame. Must be of the same length of the number of columns in the input.
If |
drop_table |
logical, if |
auto_pk |
logical, if |
build_pk |
logical, if |
pk_fields |
character vector, the list of the fields' names that
define the |
integer, the number of records in table_name after reading data
from the data frame.
# Create a temporary database and load data frame # Set up database connection dbcon <- dbConnect(RSQLite::SQLite(), file.path(tempdir(), "example.sqlite")) # Create a sample data frame sample_data <- data.frame( id = 1:10, name = paste0("Item_", 1:10), value = runif(10, 1, 100), active = c(TRUE, FALSE), date = Sys.Date() + 0:9, stringsAsFactors = FALSE, row.names = NULL ) # Load data frame with automatic primary key dbTableFromDataFrame( df = sample_data, dbcon = dbcon, table_name = "SAMPLE_DATA", drop_table = TRUE, auto_pk = TRUE ) # Check the imported data dbListFields(dbcon, "SAMPLE_DATA") dbGetQuery(dbcon, "SELECT * FROM SAMPLE_DATA LIMIT 5") # Load with column selection and custom naming dbTableFromDataFrame( df = sample_data, dbcon = dbcon, table_name = "SAMPLE_SUBSET", drop_table = TRUE, col_names = c("ID", "ITEM_NAME", "ITEM_VALUE", "IS_ACTIVE", "DATE_CREATED") ) dbGetQuery(dbcon, "SELECT * FROM SAMPLE_SUBSET LIMIT 5") # Clean up dbDisconnect(dbcon)# Create a temporary database and load data frame # Set up database connection dbcon <- dbConnect(RSQLite::SQLite(), file.path(tempdir(), "example.sqlite")) # Create a sample data frame sample_data <- data.frame( id = 1:10, name = paste0("Item_", 1:10), value = runif(10, 1, 100), active = c(TRUE, FALSE), date = Sys.Date() + 0:9, stringsAsFactors = FALSE, row.names = NULL ) # Load data frame with automatic primary key dbTableFromDataFrame( df = sample_data, dbcon = dbcon, table_name = "SAMPLE_DATA", drop_table = TRUE, auto_pk = TRUE ) # Check the imported data dbListFields(dbcon, "SAMPLE_DATA") dbGetQuery(dbcon, "SELECT * FROM SAMPLE_DATA LIMIT 5") # Load with column selection and custom naming dbTableFromDataFrame( df = sample_data, dbcon = dbcon, table_name = "SAMPLE_SUBSET", drop_table = TRUE, col_names = c("ID", "ITEM_NAME", "ITEM_VALUE", "IS_ACTIVE", "DATE_CREATED") ) dbGetQuery(dbcon, "SELECT * FROM SAMPLE_SUBSET LIMIT 5") # Clean up dbDisconnect(dbcon)
The dbTableFromDSV() function reads the data from a DSV file
and copies it to a table in a SQLite database. If table does
not exist, it will create it.
The dbTableFromDSV() function reads the data from a DSV file
and copies it to a table in a SQLite database. If table does
not exist, it will create it.
dbTableFromDSV( input_file, dbcon, table_name, header = TRUE, sep = ",", dec = ".", grp = "", id_quote_method = "DB_NAMES", col_names = NULL, col_types = NULL, col_import = NULL, drop_table = FALSE, auto_pk = FALSE, build_pk = FALSE, pk_fields = NULL, constant_values = NULL, chunk_size = 0, ... )dbTableFromDSV( input_file, dbcon, table_name, header = TRUE, sep = ",", dec = ".", grp = "", id_quote_method = "DB_NAMES", col_names = NULL, col_types = NULL, col_import = NULL, drop_table = FALSE, auto_pk = FALSE, build_pk = FALSE, pk_fields = NULL, constant_values = NULL, chunk_size = 0, ... )
input_file |
character, the file name (including path) to be read. |
dbcon |
database connection, as created by the dbConnect function. |
table_name |
character, the name of the table. |
header |
logical, if |
sep |
character, field delimiter (e.g., "," for CSV, "\t" for TSV) in the input file. Defaults to ",". |
dec |
character, decimal separator (e.g., "." or "," depending on locale) in the input file. Defaults to ".". |
grp |
character, character used for digit grouping. It defaults
to |
id_quote_method |
character, used to specify how to build the SQLite
columns' names using the fields' identifiers read from the input file.
For details see the description of the |
col_names |
character vector, names of the columuns in the input file.
Used to override the field names derived from the input file (using the
quote method selected by |
col_types |
character vector of classes to be assumed for the columns
of the input file. Must be of the same length of the number of columns
in the input file. If not null, it will override the data types guessed
from the input file.
If |
col_import |
can be either:
|
drop_table |
logical, if |
auto_pk |
logical, if |
build_pk |
logical, if |
pk_fields |
character vector, the list of the fields' names that
define the |
constant_values |
a one row data frame whose columns will be added to the table in the database. The additional table columns will be named as the data frame columns, and the corresponding values will be associeted to each record imported from the input file. It is useful to keep track of additional information (e.g., the input file name, additional context data not available in the data set, ...) when loading the content of multiple input files in the same table. |
chunk_size |
integer, the number of lines in each "chunk" (i.e. block
of lines from the input file). Setting its value to a positive integer
number, will process the input file by blocks of |
... |
additional arguments passed to |
integer, the number of records in table_name after reading data
from input_file.
# Create a temporary database and load CSV data library(RSQLite.toolkit) # Set up database connection dbcon <- dbConnect(RSQLite::SQLite(), file.path(tempdir(), "example.sqlite")) # Get path to example data data_path <- system.file("extdata", package = "RSQLite.toolkit") # Load abalone CSV data with automatic primary key dbTableFromDSV( input_file = file.path(data_path, "abalone.csv"), dbcon = dbcon, table_name = "ABALONE", drop_table = TRUE, auto_pk = TRUE, header = TRUE, sep = ",", dec = "." ) # Check the imported data dbListFields(dbcon, "ABALONE") head(dbGetQuery(dbcon, "SELECT * FROM ABALONE")) # Load data with specific column selection dbTableFromDSV( input_file = file.path(data_path, "abalone.csv"), dbcon = dbcon, table_name = "ABALONE_SUBSET", drop_table = TRUE, header = TRUE, sep = ",", dec = ".", col_import = c("Sex", "Length", "Diam", "Whole") ) head(dbGetQuery(dbcon, "SELECT * FROM ABALONE_SUBSET")) # Check available tables dbListTables(dbcon) # Clean up dbDisconnect(dbcon)# Create a temporary database and load CSV data library(RSQLite.toolkit) # Set up database connection dbcon <- dbConnect(RSQLite::SQLite(), file.path(tempdir(), "example.sqlite")) # Get path to example data data_path <- system.file("extdata", package = "RSQLite.toolkit") # Load abalone CSV data with automatic primary key dbTableFromDSV( input_file = file.path(data_path, "abalone.csv"), dbcon = dbcon, table_name = "ABALONE", drop_table = TRUE, auto_pk = TRUE, header = TRUE, sep = ",", dec = "." ) # Check the imported data dbListFields(dbcon, "ABALONE") head(dbGetQuery(dbcon, "SELECT * FROM ABALONE")) # Load data with specific column selection dbTableFromDSV( input_file = file.path(data_path, "abalone.csv"), dbcon = dbcon, table_name = "ABALONE_SUBSET", drop_table = TRUE, header = TRUE, sep = ",", dec = ".", col_import = c("Sex", "Length", "Diam", "Whole") ) head(dbGetQuery(dbcon, "SELECT * FROM ABALONE_SUBSET")) # Check available tables dbListTables(dbcon) # Clean up dbDisconnect(dbcon)
The dbTableFromFeather() function reads the data from a Feather (Arrow IPC) file
and copies it to a table in a SQLite database. If table does not exist, it will
create it.
The dbTableFromFeather() function reads the data from an Apache
Arrow table serialized in a Feather (Arrow IPC) file and copies it
to a table in a SQLite database. If table does not exist, it will
create it.
dbTableFromFeather( input_file, dbcon, table_name, id_quote_method = "DB_NAMES", col_names = NULL, col_types = NULL, col_import = NULL, drop_table = FALSE, auto_pk = FALSE, build_pk = FALSE, pk_fields = NULL, constant_values = NULL )dbTableFromFeather( input_file, dbcon, table_name, id_quote_method = "DB_NAMES", col_names = NULL, col_types = NULL, col_import = NULL, drop_table = FALSE, auto_pk = FALSE, build_pk = FALSE, pk_fields = NULL, constant_values = NULL )
input_file |
character, the file name (including path) to be read. |
dbcon |
database connection, as created by the dbConnect function. |
table_name |
character, the name of the table. |
id_quote_method |
character, used to specify how to build the SQLite
columns' names using the fields' identifiers read from the input file.
For details see the description of the |
col_names |
character vector, names of the columuns in the input file.
Used to override the field names derived from the input file (using the
quote method selected by |
col_types |
character vector of classes to be assumed for the columns
of the input file. Must be of the same length of the number of columns
in the input file. If not null, it will override the data types guessed
from the input file.
If |
col_import |
can be either:
|
drop_table |
logical, if |
auto_pk |
logical, if |
build_pk |
logical, if |
pk_fields |
character vector, the list of the fields' names that
define the |
constant_values |
a one row data frame whose columns will be added to
the table in the database. The additional table columns will be named
as the data frame columns, and the corresponding values will be associated
to each record imported from the input file. It is useful to keep
track of additional information (e.g., the input file name, additional
context data not available in the data set, ...) when loading
the content of multiple input files in the same table. Defults to |
integer, the number of records in table_name after reading data
from input_file.
# Create a temporary database and load Feather data library(RSQLite.toolkit) # Set up database connection dbcon <- dbConnect(RSQLite::SQLite(), file.path(tempdir(), "example.sqlite")) # Get path to example data data_path <- system.file("extdata", package = "RSQLite.toolkit") # Load penguins Feather data dbTableFromFeather( input_file = file.path(data_path, "penguins.feather"), dbcon = dbcon, table_name = "PENGUINS", drop_table = TRUE ) # Check the imported data dbListFields(dbcon, "PENGUINS") head(dbGetQuery(dbcon, "SELECT * FROM PENGUINS")) # Load with custom column selection and types dbTableFromFeather( input_file = file.path(data_path, "penguins.feather"), dbcon = dbcon, table_name = "PENGUINS_SUBSET", drop_table = TRUE, col_import = c("species", "flipper_length_mm", "body_mass_g", "sex") ) # Check the imported data dbListFields(dbcon, "PENGUINS_SUBSET") head(dbGetQuery(dbcon, "SELECT * FROM PENGUINS_SUBSET")) # Check available tables dbListTables(dbcon) # Clean up dbDisconnect(dbcon)# Create a temporary database and load Feather data library(RSQLite.toolkit) # Set up database connection dbcon <- dbConnect(RSQLite::SQLite(), file.path(tempdir(), "example.sqlite")) # Get path to example data data_path <- system.file("extdata", package = "RSQLite.toolkit") # Load penguins Feather data dbTableFromFeather( input_file = file.path(data_path, "penguins.feather"), dbcon = dbcon, table_name = "PENGUINS", drop_table = TRUE ) # Check the imported data dbListFields(dbcon, "PENGUINS") head(dbGetQuery(dbcon, "SELECT * FROM PENGUINS")) # Load with custom column selection and types dbTableFromFeather( input_file = file.path(data_path, "penguins.feather"), dbcon = dbcon, table_name = "PENGUINS_SUBSET", drop_table = TRUE, col_import = c("species", "flipper_length_mm", "body_mass_g", "sex") ) # Check the imported data dbListFields(dbcon, "PENGUINS_SUBSET") head(dbGetQuery(dbcon, "SELECT * FROM PENGUINS_SUBSET")) # Check available tables dbListTables(dbcon) # Clean up dbDisconnect(dbcon)
The dbTableFromView() function creates a table in a SQLite database
from a view already present in the same database.
dbTableFromView( view_name, dbcon, table_name, drop_table = FALSE, build_pk = FALSE, pk_fields = NULL )dbTableFromView( view_name, dbcon, table_name, drop_table = FALSE, build_pk = FALSE, pk_fields = NULL )
view_name |
character, name of the view. |
dbcon |
database connection, as created by the dbConnect function. |
table_name |
character, the name of the table. |
drop_table |
logical, if |
build_pk |
logical, if |
pk_fields |
character vector, the list of the fields' names that
define the |
integer, the number of records in table_name after writing data
from the input view.
# Create a temporary database and demonstrate view to table conversion library(RSQLite.toolkit) # Set up database connection dbcon <- dbConnect(RSQLite::SQLite(), file.path(tempdir(), "example.sqlite")) # Load some sample data first data_path <- system.file("extdata", package = "RSQLite.toolkit") dbTableFromDSV( input_file = file.path(data_path, "abalone.csv"), dbcon = dbcon, table_name = "ABALONE", drop_table = TRUE, header = TRUE, sep = ",", dec = "." ) # Create a view with aggregated data dbExecute(dbcon, "DROP VIEW IF EXISTS VW_ABALONE_SUMMARY;") dbExecute(dbcon, "CREATE VIEW VW_ABALONE_SUMMARY AS SELECT SEX, COUNT(*) as COUNT, AVG(LENGTH) as AVG_LENGTH, AVG(WHOLE) as AVG_WEIGHT FROM ABALONE GROUP BY SEX" ) # Convert the view to a permanent table dbTableFromView( view_name = "VW_ABALONE_SUMMARY", dbcon = dbcon, table_name = "ABALONE_STATS", drop_table = TRUE ) # Check the result dbListTables(dbcon) dbGetQuery(dbcon, "SELECT * FROM ABALONE_STATS") # Clean up dbDisconnect(dbcon)# Create a temporary database and demonstrate view to table conversion library(RSQLite.toolkit) # Set up database connection dbcon <- dbConnect(RSQLite::SQLite(), file.path(tempdir(), "example.sqlite")) # Load some sample data first data_path <- system.file("extdata", package = "RSQLite.toolkit") dbTableFromDSV( input_file = file.path(data_path, "abalone.csv"), dbcon = dbcon, table_name = "ABALONE", drop_table = TRUE, header = TRUE, sep = ",", dec = "." ) # Create a view with aggregated data dbExecute(dbcon, "DROP VIEW IF EXISTS VW_ABALONE_SUMMARY;") dbExecute(dbcon, "CREATE VIEW VW_ABALONE_SUMMARY AS SELECT SEX, COUNT(*) as COUNT, AVG(LENGTH) as AVG_LENGTH, AVG(WHOLE) as AVG_WEIGHT FROM ABALONE GROUP BY SEX" ) # Convert the view to a permanent table dbTableFromView( view_name = "VW_ABALONE_SUMMARY", dbcon = dbcon, table_name = "ABALONE_STATS", drop_table = TRUE ) # Check the result dbListTables(dbcon) dbGetQuery(dbcon, "SELECT * FROM ABALONE_STATS") # Clean up dbDisconnect(dbcon)
The dbTableFromXlsx() function creates a table in a SQLite database from a
range of an Excel worksheet.
The dbTableFromXlsx() function reads the data from a range of
an Excel worksheet. If table does not exist, it will
create it.
dbTableFromXlsx( input_file, dbcon, table_name, sheet_name, first_row, cols_range, header = TRUE, id_quote_method = "DB_NAMES", col_names = NULL, col_types = NULL, col_import = NULL, drop_table = FALSE, auto_pk = FALSE, build_pk = FALSE, pk_fields = NULL, constant_values = NULL, ... )dbTableFromXlsx( input_file, dbcon, table_name, sheet_name, first_row, cols_range, header = TRUE, id_quote_method = "DB_NAMES", col_names = NULL, col_types = NULL, col_import = NULL, drop_table = FALSE, auto_pk = FALSE, build_pk = FALSE, pk_fields = NULL, constant_values = NULL, ... )
input_file |
character, the file name (including path) to be read. |
dbcon |
database connection, as created by the dbConnect function. |
table_name |
character, the name of the table. |
sheet_name |
character, the name of the worksheet containing the data table. |
first_row |
integer, the row number where the data table starts. If present, it is the row number of the header row, otherwise it is the row number of the first row of data. |
cols_range |
integer, a numeric vector specifying which columns in the worksheet to be read. |
header |
logical, if |
id_quote_method |
character, used to specify how to build the SQLite
columns' names using the fields' identifiers read from the input file.
For details see the description of the |
col_names |
character vector, names of the columuns in the input file.
Used to override the field names derived from the input file (using the
quote method selected by |
col_types |
character vector of classes to be assumed for the columns
of the input file. Must be of the same length of the number of columns
in the input file. If not null, it will override the data types guessed
from the input file.
If |
col_import |
can be either:
|
drop_table |
logical, if |
auto_pk |
logical, if |
build_pk |
logical, if |
pk_fields |
character vector, the list of the fields' names that
define the |
constant_values |
a one row data frame whose columns will be added to
the table in the database. The additional table columns will be named
as the data frame columns, and the corresponding values will be associeted
to each record imported from the input file. It is useful to keep
track of additional information (e.g., the input file name, additional
context data not available in the data set, ...) when loading
the content of multiple input files in the same table. Defults to |
... |
additional arguments passed to |
integer, the number of records in table_name after reading data
from input_file.
# Create a temporary database and load Excel data library(RSQLite.toolkit) # Set up database connection dbcon <- dbConnect(RSQLite::SQLite(), file.path(tempdir(), "example.sqlite")) # Get path to example data data_path <- system.file("extdata", package = "RSQLite.toolkit") # Check if Excel file exists (may not be available in all installations) xlsx_file <- file.path(data_path, "stock_portfolio.xlsx") fschema <- file_schema_xlsx(xlsx_file, sheet_name="all period", first_row=2, cols_range="A:S", header=TRUE, id_quote_method="DB_NAMES", max_lines=10) fschema[, c("col_names", "src_names")] # Load Excel data from specific sheet and range dbTableFromXlsx( input_file = xlsx_file, dbcon = dbcon, table_name = "PORTFOLIO_PERF", sheet_name = "all period", first_row = 2, cols_range = "A:S", drop_table = TRUE, col_import = c("ID", "Large_B_P", "Large_ROE", "Large_S_P", "Annual_Return_7", "Excess_Return_8", "Systematic_Risk_9") ) # Check the imported data dbListFields(dbcon, "PORTFOLIO_PERF") head(dbGetQuery(dbcon, "SELECT * FROM PORTFOLIO_PERF")) # Clean up dbDisconnect(dbcon)# Create a temporary database and load Excel data library(RSQLite.toolkit) # Set up database connection dbcon <- dbConnect(RSQLite::SQLite(), file.path(tempdir(), "example.sqlite")) # Get path to example data data_path <- system.file("extdata", package = "RSQLite.toolkit") # Check if Excel file exists (may not be available in all installations) xlsx_file <- file.path(data_path, "stock_portfolio.xlsx") fschema <- file_schema_xlsx(xlsx_file, sheet_name="all period", first_row=2, cols_range="A:S", header=TRUE, id_quote_method="DB_NAMES", max_lines=10) fschema[, c("col_names", "src_names")] # Load Excel data from specific sheet and range dbTableFromXlsx( input_file = xlsx_file, dbcon = dbcon, table_name = "PORTFOLIO_PERF", sheet_name = "all period", first_row = 2, cols_range = "A:S", drop_table = TRUE, col_import = c("ID", "Large_B_P", "Large_ROE", "Large_S_P", "Annual_Return_7", "Excess_Return_8", "Systematic_Risk_9") ) # Check the imported data dbListFields(dbcon, "PORTFOLIO_PERF") head(dbGetQuery(dbcon, "SELECT * FROM PORTFOLIO_PERF")) # Clean up dbDisconnect(dbcon)
error_handler manage error messages for package
error_handler(err, fun, step)error_handler(err, fun, step)
err |
character, error message |
fun |
character, function name where error happened |
step |
integer, code identifying the step in the function where error happened. For dbTableFrom... functions steps are:
|
nothing
The file_schema_dsv() function returns a data frame with the schema
of a DSV file reading only the first max_lines of a delimiter
separated values (DSV) text file to infer column names and data types
(it does not read the full dataset into memory). Then it converts them
to the candidate data frame columns' names and data types.
file_schema_dsv( input_file, header = TRUE, sep = ",", dec = ".", grp = "", id_quote_method = "DB_NAMES", max_lines = 2000, null_columns = FALSE, force_num_cols = TRUE, ... )file_schema_dsv( input_file, header = TRUE, sep = ",", dec = ".", grp = "", id_quote_method = "DB_NAMES", max_lines = 2000, null_columns = FALSE, force_num_cols = TRUE, ... )
input_file |
character, file name (including path) to be read. |
header |
logical, if |
sep |
character, field delimiter (e.g., "," for CSV, "\t" for TSV) in the input file. Defaults to ",". |
dec |
character, decimal separator (e.g., "." or "," depending on locale) in the input file. Defaults to ".". |
grp |
character, character used for digit grouping. It defaults
to |
id_quote_method |
character, used to specify how to build the SQLite
columns' names using the fields' identifiers read from the input file.
For details see the description of the |
max_lines |
integer, number of lines (excluding the header) to be read to infer columns' data types. Defaults to 2000. |
null_columns |
logical, if |
force_num_cols |
logical, if |
... |
Additional arguments for quoting and data interpretation as
described in the
|
a list with the following named elements:
schema, a data frame with these columns:
col_names: columns' names, after applying the selected quote method;
col_names_unquoted: columns' names, unquoted; if id_quote_method
is set to DB_NAMES they will be the same as col_names; for other
quote methods they will be the unquoted versions of col_names,that
is generally the same as src_names unless src_names contain the
quoting characters;
col_types: columns' R data types;
sql_types: columns' SQLite data types;
src_names: columns' names as they appear in the input file.
src_types: defaults to text for all columns.
src_is_quoted: logical vector indicating if each column has at least
one value enclosed in quotes.
all_na: logical vector indicating if each column consists only of NAs.
col_counts, a data frame with these columns:
num_col: number of columns,
Freq: number of rows (within max_lines) that have the number
of colums shown in num_col.
n_cols, integer, the number of columns selected for the file.
num_col, a vector of integers of length max_lines with the
number of detected columns in each row tested.
col_fill, logical, it is set to TRUE if there are lines with
less columns than n_cols.
col_flush, logical, it is set to TRUE if there are lines with
more columns than n_cols.
# Inspect CSV file schema without loading full dataset data_path <- system.file("extdata", package = "RSQLite.toolkit") # Get schema information for abalone CSV schema_info <- file_schema_dsv( input_file = file.path(data_path, "abalone.csv"), header = TRUE, sep = ",", dec = ".", max_lines = 50 ) # Display schema information print(schema_info$schema[, c("col_names", "col_types", "sql_types")]) # Check column consistency print(schema_info$col_counts) print(paste("Guessed columns:", schema_info$n_cols)) # Example with different parameters schema_custom <- file_schema_dsv( input_file = file.path(data_path, "abalone.csv"), header = TRUE, sep = ",", dec = ".", max_lines = 50, id_quote_method = "SQL_SERVER" ) print(schema_custom$schema[, c("col_names", "col_types", "src_names")])# Inspect CSV file schema without loading full dataset data_path <- system.file("extdata", package = "RSQLite.toolkit") # Get schema information for abalone CSV schema_info <- file_schema_dsv( input_file = file.path(data_path, "abalone.csv"), header = TRUE, sep = ",", dec = ".", max_lines = 50 ) # Display schema information print(schema_info$schema[, c("col_names", "col_types", "sql_types")]) # Check column consistency print(schema_info$col_counts) print(paste("Guessed columns:", schema_info$n_cols)) # Example with different parameters schema_custom <- file_schema_dsv( input_file = file.path(data_path, "abalone.csv"), header = TRUE, sep = ",", dec = ".", max_lines = 50, id_quote_method = "SQL_SERVER" ) print(schema_custom$schema[, c("col_names", "col_types", "src_names")])
The file_schema_feather() function returns a data frame with the
schema of a Feather file. This function is used to preview the table
structure contained in a Feather file, by reading only the metadata of
the file. It inspects the input file metadata to read the field identifiers'
names and data types, then converts them to the candidate data frame
columns' names and data types. The dataset contained in the input file
is not read in to memory, only meta-data are accessed.
file_schema_feather(input_file, id_quote_method = "DB_NAMES")file_schema_feather(input_file, id_quote_method = "DB_NAMES")
input_file |
File name (including path) to be read |
id_quote_method |
character, used to specify how to build the SQLite
columns' names using the fields' identifiers read from the input file.
For details see the description of the |
a data frame with these columns:
col_names: columns' names, after applying the selected quote method;
col_names_unquoted: columns' names, unquoted; if id_quote_method
is set to DB_NAMES they will be the same as col_names; for other
quote methods they will be the unquoted versions of col_names, that
is generally the same as src_names unless src_names contain the
quoting characters;
col_types: columns' R data types;
sql_types: columns' SQLite data types;
src_names: columns' names as they appear in the input file;
src_types: the Arrow's data type of each column.
The implementation is based on this question on Stackoverflow. # nolint: line_length_linter.
# Inspect Feather file schema data_path <- system.file("extdata", package = "RSQLite.toolkit") # Get schema information for penguins Feather file schema_info <- file_schema_feather( input_file = file.path(data_path, "penguins.feather") ) # Display schema information print(schema_info[, c("col_names", "col_types", "sql_types", "src_names")]) # Check specific columns print(paste("Number of columns:", nrow(schema_info))) print(paste("Column names:", paste(schema_info$col_names, collapse = ", ")))# Inspect Feather file schema data_path <- system.file("extdata", package = "RSQLite.toolkit") # Get schema information for penguins Feather file schema_info <- file_schema_feather( input_file = file.path(data_path, "penguins.feather") ) # Display schema information print(schema_info[, c("col_names", "col_types", "sql_types", "src_names")]) # Check specific columns print(paste("Number of columns:", nrow(schema_info))) print(paste("Column names:", paste(schema_info$col_names, collapse = ", ")))
The file_schema_xlsx() function returns a data frame with the
schema of an Excel data table. It will read only a range of
the specified worksheet to infer column names and data types.
Then it converts them to the candidate data frame columns' names
and data types.
file_schema_xlsx( input_file, sheet_name, first_row, cols_range, header = TRUE, id_quote_method = "DB_NAMES", max_lines = 100, null_columns = FALSE, ... )file_schema_xlsx( input_file, sheet_name, first_row, cols_range, header = TRUE, id_quote_method = "DB_NAMES", max_lines = 100, null_columns = FALSE, ... )
input_file |
character, file name (including path) to be read. |
sheet_name |
character, the name of the worksheet containing the data table. |
first_row |
integer, the row number where the data table starts. If present, it is the row number of the header row, otherwise it is the row number of the first row of data. |
cols_range |
integer, a numeric vector specifying which columns in the worksheet to be read. |
header |
logical, if |
id_quote_method |
character, used to specify how to build the SQLite
columns' names using the fields' identifiers read from the input file.
For details see the description of the |
max_lines |
integer, number of lines (excluding the header) to be read to infer columns' data types. Defaults to 100. |
null_columns |
logical, if |
... |
Additional parameters passed to |
a data frame with these columns:
col_names: columns' names, after applying the selected quote method;
col_names_unquoted: columns' names, unquoted; if id_quote_method
is set to DB_NAMES they will be the same as col_names; for other
quote methods they will be the unquoted versions of col_names,that
is generally the same as src_names unless src_names contain the
quoting characters;
col_types: columns' R data types;
sql_types: columns' SQLite data types;
src_names: columns' names as they appear in the input file;
src_types: data type attribute of each column, as determined by the
openxlsx2::wb_to_df() function.
# Inspect xlsx file schema data_path <- system.file("extdata", package = "RSQLite.toolkit") # Get schema information for Excel file schema_info <- file_schema_xlsx( input_file = file.path(data_path, "stock_portfolio.xlsx"), sheet_name = "all period", first_row = 2, cols_range = "A:S", header = TRUE, id_quote_method = "DB_NAMES", max_lines = 10 ) # Display schema information head(schema_info[, c("col_names", "src_names")]) # Check specific columns print(paste("Number of columns:", nrow(schema_info)))# Inspect xlsx file schema data_path <- system.file("extdata", package = "RSQLite.toolkit") # Get schema information for Excel file schema_info <- file_schema_xlsx( input_file = file.path(data_path, "stock_portfolio.xlsx"), sheet_name = "all period", first_row = 2, cols_range = "A:S", header = TRUE, id_quote_method = "DB_NAMES", max_lines = 10 ) # Display schema information head(schema_info[, c("col_names", "src_names")]) # Check specific columns print(paste("Number of columns:", nrow(schema_info)))
The format_column_names() function formats a vector of
strings to be used as columns' names for a table in a SQLite
database.
format_column_names( x, quote_method = "DB_NAMES", unique_names = TRUE, encoding = "" )format_column_names( x, quote_method = "DB_NAMES", unique_names = TRUE, encoding = "" )
x |
character vector with the identifiers' names to be quoted. |
quote_method |
character, used to specify how to build the SQLite
columns' names from the identifiers passed through the
|
unique_names |
logical, checks for any duplicate name after
applying the selected quote methods. If duplicates exist, they
will be made unique by adding a postfix |
encoding |
character, encoding to be assumed for input strings. It is used to re-encode the input in order to process it to build column identifiers. Defaults to ‘""’ (for the encoding of the current locale). |
A data frame containing the columns' identifiers in two formats:
quoted: the quoted names, as per the selected quote_method;
unquoted: the cleaned names, without any quoting.
# Example with DB_NAMES method col_names <- c("column 1", "column-2", "3rd_column", "SELECT") formatted_names <- format_column_names(col_names, quote_method = "DB_NAMES") print(formatted_names) # Example with SINGLE_QUOTES method formatted_names_sq <- format_column_names(col_names, quote_method = "SINGLE_QUOTES") print(formatted_names_sq) # Example with SQL_SERVER method formatted_names_sqlsrv <- format_column_names(col_names, quote_method = "SQL_SERVER") print(formatted_names_sqlsrv)# Example with DB_NAMES method col_names <- c("column 1", "column-2", "3rd_column", "SELECT") formatted_names <- format_column_names(col_names, quote_method = "DB_NAMES") print(formatted_names) # Example with SINGLE_QUOTES method formatted_names_sq <- format_column_names(col_names, quote_method = "SINGLE_QUOTES") print(formatted_names_sq) # Example with SQL_SERVER method formatted_names_sqlsrv <- format_column_names(col_names, quote_method = "SQL_SERVER") print(formatted_names_sqlsrv)
The R2SQL_types() function returns a character vector with the names
of SQLite data types corresponding to the R classes passed through the
x parameter.
If any class is not recognized, it will be replaced with TEXT data type.
R2SQL_types(x)R2SQL_types(x)
x |
character, a vector containing the strings with the R class names. |
a character vector with the names of SQLite data types.
# Convert R data types to SQLite types r_types <- c("character", "integer", "numeric", "logical", "Date") sql_types <- R2SQL_types(r_types) # Display the mapping data.frame( R_type = r_types, SQLite_type = sql_types, row.names = NULL ) # Handle unknown types (converted to TEXT) mixed_types <- c("character", "unknown_type", "integer") R2SQL_types(mixed_types)# Convert R data types to SQLite types r_types <- c("character", "integer", "numeric", "logical", "Date") sql_types <- R2SQL_types(r_types) # Display the mapping data.frame( R_type = r_types, SQLite_type = sql_types, row.names = NULL ) # Handle unknown types (converted to TEXT) mixed_types <- c("character", "unknown_type", "integer") R2SQL_types(mixed_types)