Skip to content

SQL*Loader Advanced Techniques

This guide covers advanced SQL*Loader features and techniques for high-performance bulk data loading and complex data transformation scenarios.

Direct path loading bypasses the SQL engine and writes directly to database blocks, providing significant performance improvements for large data loads.

Terminal window
# Enable direct path loading
sqlldr hr/password control=employees.ctl direct=true
# Direct path with optimizations
sqlldr hr/password control=employees.ctl \
direct=true \
parallel=true \
columnarrayrows=5000 \
streamsize=256000

Control File for Direct Path:

LOAD DATA
INFILE 'large_employees.csv'
APPEND INTO TABLE employees
FIELDS TERMINATED BY ','
(
employee_id,
first_name,
last_name,
hire_date DATE "YYYY-MM-DD",
salary
)

Direct path loading has limitations:

  • No triggers fire during load
  • Referential integrity constraints must be disabled
  • Indexes can be maintained or rebuilt
  • Limited SQL expressions allowed
-- Pre-load preparation for direct path
ALTER TABLE employees DISABLE ALL TRIGGERS;
ALTER TABLE employees DISABLE CONSTRAINT fk_emp_dept;
ALTER INDEX emp_name_idx UNUSABLE;
-- Load data using direct path
-- Post-load tasks
ALTER TABLE employees ENABLE ALL TRIGGERS;
ALTER TABLE employees ENABLE CONSTRAINT fk_emp_dept;
ALTER INDEX emp_name_idx REBUILD PARALLEL 4;
parallel_single_file.sh
#!/bin/bash
# Load single large file using parallel processes
DATA_FILE="large_data.csv"
PARALLEL_DEGREE=4
# Split file into chunks
lines_per_file=$(($(wc -l < $DATA_FILE) / $PARALLEL_DEGREE))
split -l $lines_per_file $DATA_FILE chunk_
# Create control files for each chunk
for i in $(seq 1 $PARALLEL_DEGREE); do
cat > load_chunk_${i}.ctl << EOF
LOAD DATA
INFILE 'chunk_a$(printf "%c" $((96+i)))'
APPEND INTO TABLE large_table
FIELDS TERMINATED BY ','
(
id,
description,
amount,
created_date DATE "YYYY-MM-DD"
)
EOF
done
# Load chunks in parallel
for i in $(seq 1 $PARALLEL_DEGREE); do
sqlldr hr/password control=load_chunk_${i}.ctl \
direct=true parallel=true \
log=load_chunk_${i}.log &
done
# Wait for all processes to complete
wait
echo "Parallel loading completed"
parallel_multiple_files.sh
#!/bin/bash
# Load multiple files concurrently
FILES=(file1.csv file2.csv file3.csv file4.csv)
PARALLEL_DEGREE=${#FILES[@]}
# Function to load a single file
load_file() {
local file=$1
local basename=$(basename $file .csv)
sqlldr hr/password control=load_template.ctl \
data=$file \
log=${basename}.log \
bad=${basename}.bad \
direct=true
}
# Load files in parallel
for file in "${FILES[@]}"; do
load_file $file &
done
wait
echo "All files loaded"
-- complex_transformations.ctl
LOAD DATA
INFILE 'employee_data.csv'
INTO TABLE employees
FIELDS TERMINATED BY ','
OPTIONALLY ENCLOSED BY '"'
TRAILING NULLCOLS
(
employee_id,
first_name "INITCAP(:first_name)",
last_name "UPPER(:last_name)",
full_name ":first_name || ' ' || :last_name",
email "LOWER(:first_name) || '.' || LOWER(:last_name) || '@company.com'",
phone "REGEXP_REPLACE(:phone, '[^0-9]', '')",
hire_date DATE "YYYY-MM-DD",
salary "TO_NUMBER(:salary, '999999.99')",
commission "DECODE(:commission, '', NULL, TO_NUMBER(:commission)/100)",
department_id "DECODE(:department, 'HR', 10, 'IT', 20, 'SALES', 30, 99)",
status "DECODE(:status, 'A', 'ACTIVE', 'I', 'INACTIVE', 'UNKNOWN')",
created_date SYSDATE,
created_by CONSTANT 'SQLLDR'
)
-- conditional_loading.ctl
LOAD DATA
INFILE 'mixed_records.csv'
-- Load employees (records starting with 'E')
INTO TABLE employees
WHEN (1:1) = 'E'
FIELDS TERMINATED BY ','
(
record_type FILLER,
employee_id,
first_name,
last_name,
department_id
)
-- Load departments (records starting with 'D')
INTO TABLE departments
WHEN (1:1) = 'D'
FIELDS TERMINATED BY ','
(
record_type FILLER,
department_id,
department_name,
location_id
)
-- Load locations (records starting with 'L')
INTO TABLE locations
WHEN (1:1) = 'L'
FIELDS TERMINATED BY ','
(
record_type FILLER,
location_id,
street_address,
city,
state_province,
country_id
)
-- hierarchical_loading.ctl
LOAD DATA
INFILE 'hierarchical_data.csv'
-- Parent records
INTO TABLE categories
WHEN (record_type = 'CAT')
FIELDS TERMINATED BY ','
(
record_type FILLER,
category_id,
category_name,
parent_id NULLIF parent_id = ''
)
-- Child records
INTO TABLE products
WHEN (record_type = 'PROD')
FIELDS TERMINATED BY ','
(
record_type FILLER,
product_id,
product_name,
category_id,
unit_price "TO_NUMBER(:unit_price, '9999.99')"
)
-- clob_loading.ctl
LOAD DATA
INFILE 'documents.csv'
INTO TABLE document_store
FIELDS TERMINATED BY ','
(
doc_id,
title,
content CHAR(4000000), -- Large character field
created_date DATE "YYYY-MM-DD"
)
-- external_lob_loading.ctl
LOAD DATA
INFILE 'lob_metadata.csv'
INTO TABLE image_store
FIELDS TERMINATED BY ','
(
image_id,
image_name,
image_data LOBFILE(image_path) TERMINATED BY EOF CHAR,
image_path FILLER,
uploaded_date SYSDATE
)

Data File (lob_metadata.csv):

1,logo.jpg,/images/logo.jpg
2,banner.png,/images/banner.png
3,profile.gif,/images/profile.gif
-- blob_loading.ctl
LOAD DATA
INFILE 'binary_files.csv'
INTO TABLE binary_store
FIELDS TERMINATED BY ','
(
file_id,
filename,
file_size,
file_data LOBFILE(file_path) TERMINATED BY EOF,
file_path FILLER
)
-- error_handling.ctl
LOAD DATA
INFILE 'employees.csv'
BADFILE 'employees.bad'
DISCARDFILE 'employees.dis'
APPEND INTO TABLE employees
ERRORS=1000 -- Allow up to 1000 bad records
ROWS=5000 -- Commit every 5000 rows
FIELDS TERMINATED BY ','
OPTIONALLY ENCLOSED BY '"'
TRAILING NULLCOLS
(
employee_id "DECODE(:employee_id, '', NULL, TO_NUMBER(:employee_id))",
first_name CHAR(50) NULLIF first_name=BLANKS,
last_name CHAR(50) NULLIF last_name=BLANKS,
email "CASE WHEN REGEXP_LIKE(:email, '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}$')
THEN :email
ELSE NULL
END",
hire_date DATE "YYYY-MM-DD" NULLIF hire_date=BLANKS,
salary "CASE WHEN TO_NUMBER(:salary) > 0
THEN TO_NUMBER(:salary)
ELSE NULL
END",
department_id "DECODE(:department_id, '', 99, TO_NUMBER(:department_id))"
)
advanced_error_handling.sh
#!/bin/bash
# Load with comprehensive error tracking
DATA_FILE=$1
TABLE_NAME=$2
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
# Validate inputs
if [ -z "$DATA_FILE" ] || [ -z "$TABLE_NAME" ]; then
echo "Usage: $0 <data_file> <table_name>"
exit 1
fi
BASE_NAME=$(basename $DATA_FILE .csv)
CONTROL_FILE="${BASE_NAME}.ctl"
LOG_FILE="${BASE_NAME}_${TIMESTAMP}.log"
BAD_FILE="${BASE_NAME}_${TIMESTAMP}.bad"
DISCARD_FILE="${BASE_NAME}_${TIMESTAMP}.dis"
# Count input records
INPUT_COUNT=$(wc -l < $DATA_FILE)
echo "Input records: $INPUT_COUNT"
# Run SQL*Loader
sqlldr hr/password \
control=$CONTROL_FILE \
data=$DATA_FILE \
log=$LOG_FILE \
bad=$BAD_FILE \
discard=$DISCARD_FILE \
errors=1000
# Analyze results
if [ -f "$LOG_FILE" ]; then
LOADED=$(grep "successfully loaded" $LOG_FILE | awk '{print $1}')
REJECTED=$(grep "rejected" $LOG_FILE | awk '{print $1}')
DISCARDED=$(grep "discarded" $LOG_FILE | awk '{print $1}')
echo "Results:"
echo " Loaded: ${LOADED:-0}"
echo " Rejected: ${REJECTED:-0}"
echo " Discarded: ${DISCARDED:-0}"
# Check for errors
if [ -f "$BAD_FILE" ] && [ -s "$BAD_FILE" ]; then
echo "Bad records found in: $BAD_FILE"
echo "Sample bad records:"
head -5 $BAD_FILE
fi
# Generate error report
if [ "${REJECTED:-0}" -gt 0 ]; then
echo "Generating error report..."
grep "ORA-" $LOG_FILE > error_summary_${TIMESTAMP}.txt
fi
fi
optimized_loading.sh
#!/bin/bash
# SQL*Loader with optimal performance settings
sqlldr hr/password \
control=large_table.ctl \
direct=true \
parallel=true \
bindsize=2097152 \ # 2MB bind array
readsize=2097152 \ # 2MB read buffer
rows=10000 \ # Commit every 10K rows
columnarrayrows=5000 \ # Column array size
streamsize=256000 \ # Stream buffer size
multithreading=true \
log=optimized_load.log
system_optimized_load.sh
#!/bin/bash
# Prepare system for optimal loading performance
# Database preparation
sqlplus -s / as sysdba << EOF
-- Increase SGA if needed
ALTER SYSTEM SET sga_max_size=4G SCOPE=SPFILE;
ALTER SYSTEM SET sga_target=4G;
-- Optimize for bulk operations
ALTER SYSTEM SET db_writer_processes=8;
ALTER SYSTEM SET large_pool_size=512M;
-- Disable archive logging temporarily (if allowed)
ALTER DATABASE NOARCHIVELOG;
-- Set tablespace to NOLOGGING
ALTER TABLESPACE users NOLOGGING;
EOF
# Disable constraints and indexes
sqlplus -s hr/password << EOF
-- Disable constraints
ALTER TABLE employees DISABLE CONSTRAINT pk_employees;
ALTER TABLE employees DISABLE CONSTRAINT fk_emp_dept;
-- Make indexes unusable
ALTER INDEX idx_emp_name UNUSABLE;
ALTER INDEX idx_emp_dept UNUSABLE;
EOF
# Perform the load
sqlldr hr/password control=massive_load.ctl direct=true parallel=true
# Post-load optimization
sqlplus -s hr/password << EOF
-- Rebuild indexes in parallel
ALTER INDEX idx_emp_name REBUILD PARALLEL 8 NOLOGGING;
ALTER INDEX idx_emp_dept REBUILD PARALLEL 8 NOLOGGING;
-- Re-enable constraints
ALTER TABLE employees ENABLE CONSTRAINT pk_employees;
ALTER TABLE employees ENABLE CONSTRAINT fk_emp_dept;
-- Gather statistics
EXEC DBMS_STATS.gather_table_stats('HR','EMPLOYEES',degree=>8);
EOF
# Re-enable logging
sqlplus -s / as sysdba << EOF
ALTER TABLESPACE users LOGGING;
ALTER DATABASE ARCHIVELOG;
EOF
-- multi_format_loading.ctl
LOAD DATA
-- Fixed-width header record
INFILE 'complex_data.txt'
INTO TABLE file_headers
WHEN (1:3) = 'HDR'
(
record_type POSITION(1:3) CHAR,
file_id POSITION(4:13) INTEGER EXTERNAL,
file_date POSITION(14:23) DATE "YYYY-MM-DD",
record_count POSITION(24:33) INTEGER EXTERNAL
)
-- Delimited detail records
INTO TABLE file_details
WHEN (1:3) = 'DTL'
FIELDS TERMINATED BY '|'
(
record_type FILLER,
line_number,
product_code,
quantity INTEGER EXTERNAL,
unit_price DECIMAL EXTERNAL,
line_total "TO_NUMBER(:quantity) * TO_NUMBER(:unit_price)"
)
-- Fixed trailer record
INTO TABLE file_trailers
WHEN (1:3) = 'TRL'
(
record_type POSITION(1:3) CHAR,
total_records POSITION(4:13) INTEGER EXTERNAL,
total_amount POSITION(14:25) DECIMAL EXTERNAL
)
-- data_validation.ctl
LOAD DATA
INFILE 'dirty_data.csv'
BADFILE 'validation_errors.bad'
INTO TABLE clean_customers
FIELDS TERMINATED BY ','
OPTIONALLY ENCLOSED BY '"'
TRAILING NULLCOLS
(
customer_id "CASE WHEN REGEXP_LIKE(:customer_id, '^[0-9]+$')
THEN TO_NUMBER(:customer_id)
ELSE NULL
END",
customer_name "CASE WHEN LENGTH(TRIM(:customer_name)) > 0
THEN INITCAP(TRIM(:customer_name))
ELSE NULL
END",
email "CASE WHEN REGEXP_LIKE(LOWER(TRIM(:email)),
'^[a-z0-9._%+-]+@[a-z0-9.-]+\.[a-z]{2,}$')
THEN LOWER(TRIM(:email))
ELSE NULL
END",
phone "REGEXP_REPLACE(REGEXP_REPLACE(:phone, '[^0-9]', ''),
'^1?([0-9]{3})([0-9]{3})([0-9]{4})$',
'(\1) \2-\3')",
birth_date "CASE WHEN TO_DATE(:birth_date, 'MM/DD/YYYY') < SYSDATE
AND TO_DATE(:birth_date, 'MM/DD/YYYY') > DATE '1900-01-01'
THEN TO_DATE(:birth_date, 'MM/DD/YYYY')
ELSE NULL
END",
credit_limit "CASE WHEN TO_NUMBER(:credit_limit) >= 0
THEN TO_NUMBER(:credit_limit)
ELSE 0
END",
created_date SYSDATE,
data_source CONSTANT 'CSV_IMPORT'
)
-- monitor_sqlloader.sql
-- Monitor SQL*Loader sessions in real-time
-- Current SQL*Loader sessions
SELECT s.sid, s.serial#, s.username, s.program,
s.status, s.logon_time,
st.value/1024/1024 mb_processed
FROM v$session s, v$sesstat st, v$statname sn
WHERE s.sid = st.sid
AND st.statistic# = sn.statistic#
AND sn.name = 'bytes received via SQL*Net from client'
AND s.program LIKE '%sqlldr%'
ORDER BY st.value DESC;
-- I/O activity for loading sessions
SELECT s.sid, s.program,
i.physical_reads, i.physical_writes,
i.logical_reads
FROM v$session s, v$sess_io i
WHERE s.sid = i.sid
AND s.program LIKE '%sqlldr%';
-- Current long operations
SELECT sid, opname, target, sofar, totalwork,
ROUND(sofar/totalwork*100,2) pct_complete,
time_remaining/60 mins_remaining
FROM v$session_longops
WHERE opname LIKE '%SQL*Loader%'
AND sofar <> totalwork;
load_report.sh
#!/bin/bash
# Generate comprehensive loading report
LOG_FILE=$1
REPORT_FILE="load_report_$(date +%Y%m%d_%H%M%S).html"
if [ -z "$LOG_FILE" ]; then
echo "Usage: $0 <sqlloader_log_file>"
exit 1
fi
# Generate HTML report
cat > $REPORT_FILE << EOF
<!DOCTYPE html>
<html>
<head>
<title>SQL*Loader Report</title>
<style>
body { font-family: Arial, sans-serif; margin: 20px; }
table { border-collapse: collapse; width: 100%; }
th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
th { background-color: #f2f2f2; }
.success { color: green; }
.error { color: red; }
.warning { color: orange; }
</style>
</head>
<body>
<h1>SQL*Loader Report</h1>
<h2>Load Summary</h2>
<table>
EOF
# Extract key metrics from log file
TOTAL_RECORDS=$(grep -o "[0-9]* Records loaded" $LOG_FILE | awk '{print $1}')
REJECTED_RECORDS=$(grep -o "[0-9]* Records rejected" $LOG_FILE | awk '{print $1}')
DISCARDED_RECORDS=$(grep -o "[0-9]* Records discarded" $LOG_FILE | awk '{print $1}')
ELAPSED_TIME=$(grep -o "Elapsed time was: [^$]*" $LOG_FILE | cut -d: -f2-)
# Add metrics to report
cat >> $REPORT_FILE << EOF
<tr><td>Total Records Loaded</td><td class="success">${TOTAL_RECORDS:-0}</td></tr>
<tr><td>Records Rejected</td><td class="error">${REJECTED_RECORDS:-0}</td></tr>
<tr><td>Records Discarded</td><td class="warning">${DISCARDED_RECORDS:-0}</td></tr>
<tr><td>Elapsed Time</td><td>${ELAPSED_TIME:-Unknown}</td></tr>
</table>
<h2>Errors</h2>
<pre>
EOF
# Add error details
grep "ORA-" $LOG_FILE >> $REPORT_FILE 2>/dev/null || echo "No errors found" >> $REPORT_FILE
cat >> $REPORT_FILE << EOF
</pre>
</body>
</html>
EOF
echo "Report generated: $REPORT_FILE"
performance_checklist.sh
#!/bin/bash
# Verify optimal configuration for large loads
echo "SQL*Loader Performance Checklist"
echo "================================"
# Check available memory
TOTAL_MEM=$(free -g | awk '/^Mem:/{print $2}')
echo "Available Memory: ${TOTAL_MEM}GB"
# Check CPU cores
CPU_CORES=$(nproc)
echo "CPU Cores: $CPU_CORES"
# Check I/O subsystem
echo "I/O Performance Test:"
dd if=/dev/zero of=/tmp/testfile bs=1G count=1 oflag=direct 2>&1 | grep copied
# Database checks
sqlplus -s / as sysdba << EOF
SET PAGESIZE 0 FEEDBACK OFF
-- SGA settings
SELECT 'SGA Target: ' || value/1024/1024 || 'MB' FROM v\$parameter WHERE name = 'sga_target';
SELECT 'Large Pool: ' || value/1024/1024 || 'MB' FROM v\$parameter WHERE name = 'large_pool_size';
-- Process settings
SELECT 'Processes: ' || value FROM v\$parameter WHERE name = 'processes';
SELECT 'DB Writer Processes: ' || value FROM v\$parameter WHERE name = 'db_writer_processes';
-- Archive mode
SELECT 'Archive Mode: ' || log_mode FROM v\$database;
EOF
echo "Recommendations:"
echo "- Use direct=true for tables > 1M rows"
echo "- Set parallel=true for multi-CPU systems"
echo "- Increase bindsize and readsize for large files"
echo "- Disable unnecessary constraints and indexes"
echo "- Consider NOLOGGING for temporary data"

This advanced guide provides sophisticated techniques for handling complex SQL*Loader scenarios in production environments.