Sample Configurations
This section highlights sample configuration files to demonstrate the advanced data transformation capabilities of SmartLoader.
Skipping Fields: This file demostrates how to skip fields from being imported. It also uses the expression property to write a Groovy script to check whether numeric fields contains non-numeric values, and if they do, replace them with zero:
---
columns:
- !field
additionalData: yyyy-MM-dd
name: Date
size: 32
type: Timestamp
- !field
name: underlying
size: 16
type: String
- !field
name: _SKIP_expDate
size: 32
type: String
- !field
additionalSize: 3
name: strike
size: 8
type: Numeric
- !field
name: putCall
size: 16
type: String
- !field
name: symbol
size: 32
type: String
- !field
name: description
size: 64
type: String
- !field
name: exchangeName
size: 16
type: String
- !field
additionalSize: 4
name: bid
size: 9
type: Numeric
- !field
additionalSize: 4
name: ask
size: 9
type: Numeric
- !field
additionalSize: 4
name: _SKIP_last
size: 9
type: Numeric
- !field
additionalSize: 4
name: _SKIP_mark
size: 9
type: Numeric
- !field
name: bidSize
size: 7
type: Integer
- !field
name: askSize
size: 7
type: Integer
- !field
name: _SKIP_bidAskSize
size: 32
type: String
- !field
name: _SKIP_lastSize
size: 7
type: Integer
- !field
additionalSize: 4
name: _SKIP_highPrice
size: 9
type: Numeric
- !field
additionalSize: 4
name: _SKIP_lowPrice
size: 9
type: Numeric
- !field
additionalSize: 3
name: _SKIP_openPrice
size: 7
type: Numeric
- !field
additionalSize: 4
name: closePrice
size: 9
type: Numeric
- !field
name: totalVolume
size: 7
type: Integer
- !field
name: _SKIP_tradeDate
size: 512
type: String
- !field
name: _SKIP_tradeTimeInLong
size: 32
type: String
- !field
name: _SKIP_quoteTimeInLong
size: 32
type: String
- !field
additionalSize: 4
name: netChange
size: 9
type: Numeric
- !field
additionalSize: 5
name: volatility
size: 11
type: Numeric
expression: "'nan'.equalsIgnoreCase(f('volatility'))?'0':f('volatility')"
- !field
additionalSize: 5
name: delta
size: 10
type: Numeric
expression: "'nan'.equalsIgnoreCase(f('delta'))?'0':f('delta')"
- !field
additionalSize: 5
name: gamma
size: 10
type: Numeric
expression: "'nan'.equalsIgnoreCase(f('gamma'))?'0':f('gamma')"
- !field
additionalSize: 5
name: theta
size: 10
type: Numeric
expression: "'nan'.equalsIgnoreCase(f('theta'))?'0':f('theta')"
- !field
additionalSize: 5
name: vega
size: 9
type: Numeric
expression: "'nan'.equalsIgnoreCase(f('vega'))?'0':f('vega')"
- !field
additionalSize: 5
name: rho
size: 10
type: Numeric
expression: "'nan'.equalsIgnoreCase(f('rho'))?'0':f('rho')"
- !field
name: openInterest
size: 8
type: Integer
- !field
additionalSize: 4
name: timeValue
size: 9
type: Numeric
expression: "'nan'.equalsIgnoreCase(f('timeValue'))?'0':f('timeValue')"
- !field
additionalSize: 5
name: theoreticalOptionValue
size: 10
type: Numeric
expression: "'nan'.equalsIgnoreCase(f('theoreticalOptionValue'))?'0':f('theoreticalOptionValue')"
- !field
additionalSize: 3
name: theoreticalVolatility
size: 8
type: Numeric
expression: "'nan'.equalsIgnoreCase(f('theoreticalVolatility'))?'0':f('theoreticalVolatility')"
- !field
name: _SKIP_optionDeliverablesList
size: 512
type: String
- !field
additionalSize: 3
name: _SKIP_strikePrice
size: 8
type: Numeric
- !field
additionalData: yyyy-MM-dd
name: expirationDate
size: 32
type: Timestamp
- !field
name: daysToExpiration
size: 7
type: Integer
- !field
name: expirationType
size: 16
type: String
- !field
additionalData: yyyy-MM-dd
name: _SKIP_lastTradingDay
size: 32
type: Timestamp
- !field
additionalSize: 3
name: _SKIP_multiplier
size: 9
type: Numeric
- !field
name: _SKIP_settlementType
size: 512
type: String
- !field
name: _SKIP_deliverableNote
size: 512
type: String
- !field
name: _SKIP_isIndexOption
size: 512
type: String
- !field
additionalSize: 4
name: percentChange
size: 12
type: Numeric
- !field
additionalSize: 4
name: markChange
size: 9
type: Numeric
- !field
additionalSize: 4
name: markPercentChange
size: 12
type: Numeric
- !field
name: inTheMoney
size: 16
type: String
- !field
name: mini
size: 16
type: String
- !field
name: nonStandard
size: 16
type: String
configFileName: default.yml
createTable: true
table: optionchain
dataDir: .
delimiter: ","
header: 1
quotes: "\""
instanceName: smartloader
linesToAnalyze: 1000
loadingMode: BY_CATEGORY
moveToArchive: true
nullStringPlaceholder: ""
parallelism: 2
processId: Smart Loader
workDir: .
skipColumns:
- _SKIP_mark
- _SKIP_bidAskSize
- _SKIP_lastSize
- _SKIP_highPrice
- _SKIP_lowPrice
- _SKIP_openPrice
- _SKIP_tradeDate
- _SKIP_tradeTimeInLong
- _SKIP_quoteTimeInLong
- _SKIP_optionDeliverablesList
- _SKIP_strikePrice
- _SKIP_lastTradingDay
- _SKIP_multiplier
- _SKIP_settlementType
- _SKIP_deliverableNote
- _SKIP_isIndexOption
- _SKIP_expDate
- _SKIP_last
Date and Time Conversion: This configuration file demonstrates the use of the java DateTime library in the onStartExpression to convert from epoch date and time:
---
columns:
- !field
name: audit_sid
size: 21
type: Long
generated: true
- !field
name: sighting_sid
size: 21
type: Long
expression: "sequence('seq')"
generated: true
- !field
name: date_id
size: 10
type: Integer
expression: "dateId(f('ts'))"
generated: true
- !field
name: time_id
size: 7
type: Integer
expression: "timeId(f('ts'))"
generated: true
- !field
name: dataset
size: 8
type: String
expression: "'default'"
generated: true
- !field
name: ts
size: 12
type: Long
- !field
name: aid
size: 64
type: String
- !field
name: aid_type
size: 16
type: String
- !field
additionalSize: 10
name: latitude
size: 16
type: Numeric
- !field
additionalSize: 10
name: longitude
size: 16
type: Numeric
- !field
additionalSize: 3
name: horizontal_accuracy
size: 10
type: Numeric
- !field
additionalSize: 3
name: altitude
size: 15
type: Numeric
- !field
additionalSize: 3
name: altitude_accuracy
size: 15
type: Numeric
- !field
name: location_method
size: 16
type: String
- !field
name: ip
size: 64
type: String
- !field
name: user_agent
size: 256
type: String
- !field
name: os
size: 32
type: String
- !field
name: os_version
size: 32
type: String
- !field
name: manufacturer
size: 64
type: String
- !field
name: model
size: 64
type: String
- !field
name: carrier
size: 64
type: String
configFileName: default.yml
createTable: false
delimiter: "\t"
header: 1
instanceName: LAXMLDXI
loadingMode: BY_CATEGORY
moveToArchive: true
nullStringPlaceholder: ""
parallelism: 2
processId: LAXMLDXI
quotes: "\""
table: fact_raw_mld_sighting
onStartExpression: |
dateIdFormatter = java.time.format.DateTimeFormatter.ofPattern('yyyyMMdd')
timeIdFormatter = java.time.format.DateTimeFormatter.ofPattern('H')
datetimeFromEpochStr = {java.time.LocalDateTime.ofInstant(java.time.Instant.ofEpochSecond(Long.parseLong(it)), java.time.ZoneId.systemDefault())}
dateId = {datetimeFromEpochStr(it).format(dateIdFormatter)}
timeId = {datetimeFromEpochStr(it).format(timeIdFormatter)}
Advanced use of Grrovy Expressions: This file highlights the following:
Additional properties of Columns used for informational purposes but not used by SmarLoader
Comments using the # identifier
Encryption configuration using HashiCorp Vault for key storage
Generating output files based on input data
NOTE: This file is truncated, the definition of some of the field names used as function argumented may not be present.
---
columns:
- !field
# SUBSCRIBER_MDN (1)
description: "SUBSCRIBER_MDN"
outname: "SUBSCRIBER_MDN"
name: MSISDN
size: 128
type: String
expression: f('MSISDN').take(63)
vaultEncryptionPath: company/product/key
- !field
# GENDER (7)
description: "GENDER"
outname: "GENDER"
name: GENDER
size: 100
type: String
expression: "decode(f('GENDER').toLowerCase(), 'f', 'Female', 'm', 'Male', '{Unknown}')"
vaultEncryptionPath: company/product/key
- !field
# AGE RANGE
generated: true
description: "AGE RANGE"
outname: "AGE_RANGE"
name: AGE_RANGE
size: 100
type: String
expression: |
String val
try {
switch (toInt(f('VARCHAR_COL_1'))) {
case 0..18:
val = '<=18'
break
case 19..25:
val = '19-25'
break
case 26..35:
val = '26-35'
break
case 36..45:
val = '36-45'
break
case 46..60:
val = '46-60'
break
case { it > 60 }:
val = 'Above 60'
break
default:
val = '{Unknown}'
break
}
} catch (all) {
val = '{Unknown}'
}
return val
vaultEncryptionPath: company/product/key
- !field
# CUSTOMER_ID HASH
generated: true
description: "CUSTOMER_ID HASH"
name: CUSTOMER_ID
additionalSize: 0
size: 38
type: Numeric
expression: hash(f('VARCHAR_COL_2'))
- !field
# MULTISIM_FLAG (10)
description: "MULTISIM_FLAG"
outname: "MULTISIM_FLAG"
name: VARCHAR_COL_3
size: 100
type: String
expression: "prepGenericFlag(f('VARCHAR_COL_3'))"
vaultEncryptionPath: company/product/key
- !field
# LD_START_DATE
generated: true
description: "LD_START_DATE"
outname: "LD_START_DATE"
name: VARCHAR_COL_4
size: 100
type: String
expression: dateToId(f('__SKIP__LD_START_DATE'))
vaultEncryptionPath: company/product/key
- !field
# CRM_DATE
generated: true
description: "CRM_DATE"
name: CRM_DATE
size: 7
type: Integer
expression: |
return regex(FILE_NAME, /.*_(\d{8}).*/, java.time.LocalDate.now().format(outputIdFormatter))
- !field
# Field prepares Tariff Plan dictionary file
generated: true
description: "Field prepares Tariff Plan dictionary file"
name: __SKIP__TARIFF_POPULATIONCOLUMN
size: 150
type: String
expression: |
if (!exportMap.containsKey(f('TARIFF_PLAN_ID')))
exportMap.put(f('TARIFF_PLAN_ID'),[
f('TARIFF_PLAN_ID'),
f('__SKIP__TARIFF_GROUP_CODE'),
encryptAesWithVault('company/product/key', f('__SKIP__TARIFF_PLAN_NAME').take(47)),
encryptAesWithVault('company/product/key', f('__SKIP__TARIFF_GROUP_NAME').take(47)?:'{Unspecified}'),
encryptAesWithVault('company/product/key', f('__SKIP__TARIFF_PLAN_NAME').take(47)),
encryptAesWithVault('company/product/key', f('TARIFF_PLAN_ID').take(47)),
encryptAesWithVault('company/product/key', f('__SKIP__TARIFF_PLAN_NAME').take(127)?:'{Unspecified}'),
encryptAesWithVault('company/product/key', f('__SKIP__TARIFF_GROUP_CODE').take(127)?:'{Unspecified}'),
encryptAesWithVault('company/product/key', f('__SKIP__TARIFF_GROUP_NAME').take(127)?:'{Unspecified}')])
configFileName: ./default.yml
createTable: false
dataDir: .
delimiter: ","
quotes: '"'
header: 1
instancename: instance001
linesToAnalyze: 1000
loadingMode: BY_CATEGORY
moveToArchive: true
nullStringPlaceholder: ""
parallelism: 2
processId: SAXCUSXI Staging
table: stg_dim_subscriber
workDir: .
archiveFolderPathOverride: ./archive/
vaultUrl: https://servername
vaultRoleId: smartloader
vaultPassword: xyz
inputFileDecryptionSecretPath: company/product/inputkey
skipColumns:
- __SKIP__TARIFF_PLAN_NAME
- __SKIP__TARIFF_GROUP_CODE
- __SKIP__TARIFF_GROUP_NAME
- __SKIP__LD_START_DATE
- __SKIP__LD_END_DATE
- __SKIP__ACTIVATION_DATE
- __SKIP__DEACTIVATION_DATE
- __SKIP__EARLIEST_ACQ_YEAR_MONTH
- __SKIP__TARIFF_POPULATIONCOLUMN
onStartExpression: |
inputFormatter = java.time.format.DateTimeFormatter.ofPattern('[d/M/yyyy H:m][d/M/yyyy][d-MMM-yyyy][d-MMM-yy]')
historicalInputFormatter = new java.time.format.DateTimeFormatterBuilder()
.appendPattern("[d/M/yyyy H:m][d/M/yyyy][d-MMM-yyyy]")
.optionalStart()
.appendPattern("d-MMM-")
.appendValueReduced(java.time.temporal.ChronoField.YEAR_OF_ERA, 2, 2, java.time.LocalDate.now().minusYears(100))
.optionalEnd()
.toFormatter()
outputIdFormatter = java.time.format.DateTimeFormatter.ofPattern('yyyyMMdd')
outputMonthFormatter = java.time.format.DateTimeFormatter.ofPattern('MMM-yyyy')
dateToId = {try {java.time.LocalDate.parse(it, inputFormatter).format(outputIdFormatter)} catch (all) {''}}
histdateToId = {try {java.time.LocalDate.parse(it, historicalInputFormatter).format(outputIdFormatter)} catch (all) {''}}
dateToMonth = {try {java.time.LocalDate.parse(it, inputFormatter).format(outputMonthFormatter)} catch (all) {'{Unknown}'}}
histdateToMonth = {try {java.time.LocalDate.parse(it, historicalInputFormatter).format(outputMonthFormatter)} catch (all) {'{Unknown}'}}
prepGenericFlag = {it.regionMatches(true, 0, 'y', 0, 1)?'Yes':it.regionMatches(true, 0, 'n', 0, 1)?'No':'{Unknown}'}
revenueRange = [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1500, 2000, 2500, 3000]
tenureRange = [12, 24, 36, 48, 60]
numToRange = { val, ranges ->
def range = '{Unknown}'
try {
def internalVal
if (val instanceof GString || val instanceof String)
internalVal = Double.parseDouble(val)
else internalVal = val
below = ranges.findAll { it <= internalVal }
above = ranges.findAll { it > internalVal }
if (!below && above)
range = "<${ranges[0]}"
else if (below && !above)
range = ">=${ranges[-1]}"
else range = "${below[-1]} - <${above[0]}"
} catch(all) {}
return range
}
exportMap = [:]
onEndExpression: |
if (exportMap) {
def exportFilePath = ARGS.QUEUED_SMARTLOADER_DIR?:'./incoming'
def exportFileName = FILE_NAME + '_tariff_' + java.time.LocalDate.now().format(outputIdFormatter) + '.csv'
new File(exportFilePath, exportFileName).withWriter('utf-8') {writer ->
writer.writeLine 'TARIFF_ID|VARCHAR_COL_1|TARIFF_NAME|TARIFF_PLAN_GROUP|TARIFF_PLAN_LEVEL_2|TARIFF_PLAN_LEVEL_3|VARCHAR_COL_11|VARCHAR_COL_12|VARCHAR_COL_13'
exportMap.each { entry ->
writer.writeLine entry.value.collect{ it.replaceAll(/\|/,' ') }.join('|')
}
}
}