Sample Configurations

This section highlights sample configuration files to demonstrate the advanced data transformation capabilities of SmartLoader.

Skipping Fields: This file demostrates how to skip fields from being imported. It also uses the expression property to write a Groovy script to check whether numeric fields contains non-numeric values, and if they do, replace them with zero:

---
columns:
    - !field
        additionalData: yyyy-MM-dd
        name: Date
        size: 32
        type: Timestamp
    - !field
        name: underlying
        size: 16
        type: String
    - !field
        name: _SKIP_expDate
        size: 32
        type: String
    - !field
        additionalSize: 3
        name: strike
        size: 8
        type: Numeric
    - !field
        name: putCall
        size: 16
        type: String
    - !field
        name: symbol
        size: 32
        type: String
    - !field
        name: description
        size: 64
        type: String
    - !field
        name: exchangeName
        size: 16
        type: String
    - !field
        additionalSize: 4
        name: bid
        size: 9
        type: Numeric
    - !field
        additionalSize: 4
        name: ask
        size: 9
        type: Numeric
    - !field
        additionalSize: 4
        name: _SKIP_last
        size: 9
        type: Numeric
    - !field
        additionalSize: 4
        name: _SKIP_mark
        size: 9
        type: Numeric
    - !field
        name: bidSize
        size: 7
        type: Integer
    - !field
        name: askSize
        size: 7
        type: Integer
    - !field
        name: _SKIP_bidAskSize
        size: 32
        type: String
    - !field
        name: _SKIP_lastSize
        size: 7
        type: Integer
    - !field
        additionalSize: 4
        name: _SKIP_highPrice
        size: 9
        type: Numeric
    - !field
        additionalSize: 4
        name: _SKIP_lowPrice
        size: 9
        type: Numeric
    - !field
        additionalSize: 3
        name: _SKIP_openPrice
        size: 7
        type: Numeric
    - !field
        additionalSize: 4
        name: closePrice
        size: 9
        type: Numeric
    - !field
        name: totalVolume
        size: 7
        type: Integer
    - !field
        name: _SKIP_tradeDate
        size: 512
        type: String
    - !field
        name: _SKIP_tradeTimeInLong
        size: 32
        type: String
    - !field
        name: _SKIP_quoteTimeInLong
        size: 32
        type: String
    - !field
        additionalSize: 4
        name: netChange
        size: 9
        type: Numeric
    - !field
        additionalSize: 5
        name: volatility
        size: 11
        type: Numeric
        expression: "'nan'.equalsIgnoreCase(f('volatility'))?'0':f('volatility')"
    - !field
        additionalSize: 5
        name: delta
        size: 10
        type: Numeric
        expression: "'nan'.equalsIgnoreCase(f('delta'))?'0':f('delta')"
    - !field
        additionalSize: 5
        name: gamma
        size: 10
        type: Numeric
        expression: "'nan'.equalsIgnoreCase(f('gamma'))?'0':f('gamma')"
    - !field
        additionalSize: 5
        name: theta
        size: 10
        type: Numeric
        expression: "'nan'.equalsIgnoreCase(f('theta'))?'0':f('theta')"
    - !field
        additionalSize: 5
        name: vega
        size: 9
        type: Numeric
        expression: "'nan'.equalsIgnoreCase(f('vega'))?'0':f('vega')"
    - !field
        additionalSize: 5
        name: rho
        size: 10
        type: Numeric
        expression: "'nan'.equalsIgnoreCase(f('rho'))?'0':f('rho')"
    - !field
        name: openInterest
        size: 8
        type: Integer
    - !field
        additionalSize: 4
        name: timeValue
        size: 9
        type: Numeric
        expression: "'nan'.equalsIgnoreCase(f('timeValue'))?'0':f('timeValue')"
    - !field
        additionalSize: 5
        name: theoreticalOptionValue
        size: 10
        type: Numeric
        expression: "'nan'.equalsIgnoreCase(f('theoreticalOptionValue'))?'0':f('theoreticalOptionValue')"
    - !field
        additionalSize: 3
        name: theoreticalVolatility
        size: 8
        type: Numeric
        expression: "'nan'.equalsIgnoreCase(f('theoreticalVolatility'))?'0':f('theoreticalVolatility')"
    - !field
        name: _SKIP_optionDeliverablesList
        size: 512
        type: String
    - !field
        additionalSize: 3
        name: _SKIP_strikePrice
        size: 8
        type: Numeric
    - !field
        additionalData: yyyy-MM-dd
        name: expirationDate
        size: 32
        type: Timestamp
    - !field
        name: daysToExpiration
        size: 7
        type: Integer
    - !field
        name: expirationType
        size: 16
        type: String
    - !field
        additionalData: yyyy-MM-dd
        name: _SKIP_lastTradingDay
        size: 32
        type: Timestamp
    - !field
        additionalSize: 3
        name: _SKIP_multiplier
        size: 9
        type: Numeric
    - !field
        name: _SKIP_settlementType
        size: 512
        type: String
    - !field
        name: _SKIP_deliverableNote
        size: 512
        type: String
    - !field
        name: _SKIP_isIndexOption
        size: 512
        type: String
    - !field
        additionalSize: 4
        name: percentChange
        size: 12
        type: Numeric
    - !field
        additionalSize: 4
        name: markChange
        size: 9
        type: Numeric
    - !field
        additionalSize: 4
        name: markPercentChange
        size: 12
        type: Numeric
    - !field
        name: inTheMoney
        size: 16
        type: String
    - !field
        name: mini
        size: 16
        type: String
    - !field
        name: nonStandard
        size: 16
        type: String
configFileName: default.yml
createTable: true
table: optionchain
dataDir: .
delimiter: ","
header: 1
quotes: "\""
instanceName: smartloader
linesToAnalyze: 1000
loadingMode: BY_CATEGORY
moveToArchive: true
nullStringPlaceholder: ""
parallelism: 2
processId: Smart Loader
workDir: .
skipColumns:
 - _SKIP_mark
 - _SKIP_bidAskSize
 - _SKIP_lastSize
 - _SKIP_highPrice
 - _SKIP_lowPrice
 - _SKIP_openPrice
 - _SKIP_tradeDate
 - _SKIP_tradeTimeInLong
 - _SKIP_quoteTimeInLong
 - _SKIP_optionDeliverablesList
 - _SKIP_strikePrice
 - _SKIP_lastTradingDay
 - _SKIP_multiplier
 - _SKIP_settlementType
 - _SKIP_deliverableNote
 - _SKIP_isIndexOption
 - _SKIP_expDate
 - _SKIP_last

Date and Time Conversion: This configuration file demonstrates the use of the java DateTime library in the onStartExpression to convert from epoch date and time:

---
columns:
    - !field
        name: audit_sid
        size: 21
        type: Long
        generated: true
    - !field
        name: sighting_sid
        size: 21
        type: Long
        expression: "sequence('seq')"
        generated: true
    - !field
        name: date_id
        size: 10
        type: Integer
        expression: "dateId(f('ts'))"
        generated: true
    - !field
        name: time_id
        size: 7
        type: Integer
        expression: "timeId(f('ts'))"
        generated: true
    - !field
        name: dataset
        size: 8
        type: String
        expression: "'default'"
        generated: true
    - !field
        name: ts
        size: 12
        type: Long
    - !field
        name: aid
        size: 64
        type: String
    - !field
        name: aid_type
        size: 16
        type: String
    - !field
        additionalSize: 10
        name: latitude
        size: 16
        type: Numeric
    - !field
        additionalSize: 10
        name: longitude
        size: 16
        type: Numeric
    - !field
        additionalSize: 3
        name: horizontal_accuracy
        size: 10
        type: Numeric
    - !field
        additionalSize: 3
        name: altitude
        size: 15
        type: Numeric
    - !field
        additionalSize: 3
        name: altitude_accuracy
        size: 15
        type: Numeric
    - !field
        name: location_method
        size: 16
        type: String
    - !field
        name: ip
        size: 64
        type: String
    - !field
        name: user_agent
        size: 256
        type: String
    - !field
        name: os
        size: 32
        type: String
    - !field
        name: os_version
        size: 32
        type: String
    - !field
        name: manufacturer
        size: 64
        type: String
    - !field
        name: model
        size: 64
        type: String
    - !field
        name: carrier
        size: 64
        type: String
configFileName: default.yml
createTable: false
delimiter: "\t"
header: 1
instanceName: LAXMLDXI
loadingMode: BY_CATEGORY
moveToArchive: true
nullStringPlaceholder: ""
parallelism: 2
processId: LAXMLDXI
quotes: "\""
table: fact_raw_mld_sighting
onStartExpression: |
    dateIdFormatter = java.time.format.DateTimeFormatter.ofPattern('yyyyMMdd')
    timeIdFormatter = java.time.format.DateTimeFormatter.ofPattern('H')
    datetimeFromEpochStr = {java.time.LocalDateTime.ofInstant(java.time.Instant.ofEpochSecond(Long.parseLong(it)), java.time.ZoneId.systemDefault())}
    dateId = {datetimeFromEpochStr(it).format(dateIdFormatter)}
    timeId = {datetimeFromEpochStr(it).format(timeIdFormatter)}

Advanced use of Grrovy Expressions: This file highlights the following:

  • Additional properties of Columns used for informational purposes but not used by SmarLoader

  • Comments using the # identifier

  • Encryption configuration using HashiCorp Vault for key storage

  • Generating output files based on input data

NOTE: This file is truncated, the definition of some of the field names used as function argumented may not be present.

---
columns:
- !field
    # SUBSCRIBER_MDN (1)
    description: "SUBSCRIBER_MDN"
    outname: "SUBSCRIBER_MDN"
    name: MSISDN
    size: 128
    type: String
    expression: f('MSISDN').take(63)
    vaultEncryptionPath: company/product/key
- !field
    # GENDER (7)
    description: "GENDER"
    outname: "GENDER"
    name: GENDER
    size: 100
    type: String
    expression: "decode(f('GENDER').toLowerCase(), 'f', 'Female', 'm', 'Male', '{Unknown}')"
    vaultEncryptionPath: company/product/key
- !field
    # AGE RANGE
    generated: true
    description: "AGE RANGE"
    outname: "AGE_RANGE"
    name: AGE_RANGE
    size: 100
    type: String
    expression: |
        String val
        try {
            switch (toInt(f('VARCHAR_COL_1'))) {
                case 0..18:
                    val = '<=18'
                    break
                case 19..25:
                    val = '19-25'
                    break
                case 26..35:
                    val = '26-35'
                    break
                case 36..45:
                    val = '36-45'
                    break
                case 46..60:
                    val = '46-60'
                    break
                case { it > 60 }:
                    val = 'Above 60'
                    break
                default:
                    val = '{Unknown}'
                    break
            }
        } catch (all) {
            val = '{Unknown}'
        }
        return val
    vaultEncryptionPath: company/product/key
- !field
    # CUSTOMER_ID HASH
    generated: true
    description: "CUSTOMER_ID HASH"
    name: CUSTOMER_ID
    additionalSize: 0
    size: 38
    type: Numeric
    expression: hash(f('VARCHAR_COL_2'))
- !field
    # MULTISIM_FLAG (10)
    description: "MULTISIM_FLAG"
    outname: "MULTISIM_FLAG"
    name: VARCHAR_COL_3
    size: 100
    type: String
    expression: "prepGenericFlag(f('VARCHAR_COL_3'))"
    vaultEncryptionPath: company/product/key
- !field
    # LD_START_DATE
    generated: true
    description: "LD_START_DATE"
    outname: "LD_START_DATE"
    name: VARCHAR_COL_4
    size: 100
    type: String
    expression: dateToId(f('__SKIP__LD_START_DATE'))
    vaultEncryptionPath: company/product/key
- !field
    # CRM_DATE
    generated: true
    description: "CRM_DATE"
    name: CRM_DATE
    size: 7
    type: Integer
    expression: |
        return regex(FILE_NAME, /.*_(\d{8}).*/, java.time.LocalDate.now().format(outputIdFormatter))
- !field
    # Field prepares Tariff Plan dictionary file
    generated: true
    description: "Field prepares Tariff Plan dictionary file"
    name: __SKIP__TARIFF_POPULATIONCOLUMN
    size: 150
    type: String
    expression: |
        if (!exportMap.containsKey(f('TARIFF_PLAN_ID')))
            exportMap.put(f('TARIFF_PLAN_ID'),[
                f('TARIFF_PLAN_ID'),
                f('__SKIP__TARIFF_GROUP_CODE'),
                encryptAesWithVault('company/product/key', f('__SKIP__TARIFF_PLAN_NAME').take(47)),
                encryptAesWithVault('company/product/key', f('__SKIP__TARIFF_GROUP_NAME').take(47)?:'{Unspecified}'),
                encryptAesWithVault('company/product/key', f('__SKIP__TARIFF_PLAN_NAME').take(47)),
                encryptAesWithVault('company/product/key', f('TARIFF_PLAN_ID').take(47)),
                encryptAesWithVault('company/product/key', f('__SKIP__TARIFF_PLAN_NAME').take(127)?:'{Unspecified}'),
                encryptAesWithVault('company/product/key', f('__SKIP__TARIFF_GROUP_CODE').take(127)?:'{Unspecified}'),
                encryptAesWithVault('company/product/key', f('__SKIP__TARIFF_GROUP_NAME').take(127)?:'{Unspecified}')])
configFileName: ./default.yml
createTable: false
dataDir: .
delimiter: ","
quotes: '"'
header: 1
instancename: instance001
linesToAnalyze: 1000
loadingMode: BY_CATEGORY
moveToArchive: true
nullStringPlaceholder: ""
parallelism: 2
processId: SAXCUSXI Staging
table: stg_dim_subscriber
workDir: .
archiveFolderPathOverride: ./archive/
vaultUrl: https://servername
vaultRoleId: smartloader
vaultPassword: xyz
inputFileDecryptionSecretPath: company/product/inputkey
skipColumns:
- __SKIP__TARIFF_PLAN_NAME
- __SKIP__TARIFF_GROUP_CODE
- __SKIP__TARIFF_GROUP_NAME
- __SKIP__LD_START_DATE
- __SKIP__LD_END_DATE
- __SKIP__ACTIVATION_DATE
- __SKIP__DEACTIVATION_DATE
- __SKIP__EARLIEST_ACQ_YEAR_MONTH
- __SKIP__TARIFF_POPULATIONCOLUMN
onStartExpression: |
inputFormatter = java.time.format.DateTimeFormatter.ofPattern('[d/M/yyyy H:m][d/M/yyyy][d-MMM-yyyy][d-MMM-yy]')
historicalInputFormatter = new java.time.format.DateTimeFormatterBuilder()
    .appendPattern("[d/M/yyyy H:m][d/M/yyyy][d-MMM-yyyy]")
    .optionalStart()
    .appendPattern("d-MMM-")
    .appendValueReduced(java.time.temporal.ChronoField.YEAR_OF_ERA, 2, 2, java.time.LocalDate.now().minusYears(100))
    .optionalEnd()
    .toFormatter()
outputIdFormatter = java.time.format.DateTimeFormatter.ofPattern('yyyyMMdd')
outputMonthFormatter = java.time.format.DateTimeFormatter.ofPattern('MMM-yyyy')
dateToId = {try {java.time.LocalDate.parse(it, inputFormatter).format(outputIdFormatter)} catch (all) {''}}
histdateToId = {try {java.time.LocalDate.parse(it, historicalInputFormatter).format(outputIdFormatter)} catch (all) {''}}
dateToMonth = {try {java.time.LocalDate.parse(it, inputFormatter).format(outputMonthFormatter)} catch (all) {'{Unknown}'}}
histdateToMonth = {try {java.time.LocalDate.parse(it, historicalInputFormatter).format(outputMonthFormatter)} catch (all) {'{Unknown}'}}
prepGenericFlag = {it.regionMatches(true, 0, 'y', 0, 1)?'Yes':it.regionMatches(true, 0, 'n', 0, 1)?'No':'{Unknown}'}
revenueRange = [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1500, 2000, 2500, 3000]
tenureRange = [12, 24, 36, 48, 60]
numToRange = { val, ranges ->
    def range = '{Unknown}'
    try {
        def internalVal
        if (val instanceof GString || val instanceof String)
            internalVal = Double.parseDouble(val)
        else internalVal = val
        below = ranges.findAll { it <= internalVal }
        above = ranges.findAll { it > internalVal }
        if (!below && above)
            range = "<${ranges[0]}"
        else if (below && !above)
            range = ">=${ranges[-1]}"
        else range = "${below[-1]} - <${above[0]}"
    } catch(all) {}
    return range
}
exportMap = [:]
onEndExpression: |
if (exportMap) {
    def exportFilePath = ARGS.QUEUED_SMARTLOADER_DIR?:'./incoming'
    def exportFileName = FILE_NAME + '_tariff_' + java.time.LocalDate.now().format(outputIdFormatter) + '.csv'
    new File(exportFilePath, exportFileName).withWriter('utf-8') {writer ->
        writer.writeLine 'TARIFF_ID|VARCHAR_COL_1|TARIFF_NAME|TARIFF_PLAN_GROUP|TARIFF_PLAN_LEVEL_2|TARIFF_PLAN_LEVEL_3|VARCHAR_COL_11|VARCHAR_COL_12|VARCHAR_COL_13'
        exportMap.each { entry ->
            writer.writeLine entry.value.collect{ it.replaceAll(/\|/,' ') }.join('|')
        }
    }
}