{{announcement.body}}
{{announcement.title}}

Spring Batch Read an XML File and Write to Oracle Database

DZone 's Guide to

Spring Batch Read an XML File and Write to Oracle Database

See how to use Spring Batch to read an XML file with your ItemReader using StaxEventItemReader and write its data to Oracle Database using Custom ItemWriter.

· Database Zone ·
Free Resource

Image title

Spring Batch

In this post, we will show you how to use Spring Batch to read an XML file with your ItemReader using StaxEventItemReader and write its data to Oracle Database using Custom ItemWriter. We will also learn how to use ItemProcessor to process input data before writing to the database.

Custom ItemReader or ItemWriter is a class where we write our own way of reading or writing data. In Custom Reader we are required to handle the chunking logic as well. This comes in handy if our reading logic is complex and cannot be handled using Default ItemReader provided by spring.

For an introduction to Spring batch and to learn basics, click here and here. For a Spring Batch Use Case, c lick  here.

Tools and libraries used:

  1. Maven 3
  2. Eclipse 4.2
  3. JDK 1.8
  4. Spring Core 3.2.2.RELEASE
  5. Spring Batch 2.2.0.RELEASE
  6. Spring OXM 3.2.2.RELEASE

1. Java Maven Project

Create a simple Java Maven project

File -> New -> Maven Project

2. Project Dependencies

Declares all project dependencies in the pom.xml

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <groupId>com.learningsolo.springbatch</groupId>
  <artifactId>SpringBatchExample</artifactId>
  <version>0.0.1-SNAPSHOT</version>
  <url>http://maven.apache.org</url>
  <properties>
        <jdk.version>1.8</jdk.version>
        <spring.version>3.2.2.RELEASE</spring.version>
        <spring.batch.version>2.2.0.RELEASE</spring.batch.version>
        <mysql.driver.version>5.1.25</mysql.driver.version>
        <junit.version>4.11</junit.version>

    </properties>

    <dependencies>

        <!-- Spring Core -->
        <dependency>
            <groupId>org.springframework</groupId>
            <artifactId>spring-core</artifactId>
            <version>${spring.version}</version>
        </dependency>

        <!-- Spring jdbc, for database -->
        <dependency>
            <groupId>org.springframework</groupId>
            <artifactId>spring-jdbc</artifactId>
            <version>${spring.version}</version>
        </dependency>

        <!-- Spring XML to/back object -->
        <dependency>
            <groupId>org.springframework</groupId>
            <artifactId>spring-oxm</artifactId>
            <version>${spring.version}</version>
        </dependency>

        <!-- MySQL database driver -->
        <dependency>
            <groupId>mysql</groupId>
            <artifactId>mysql-connector-java</artifactId>
            <version>${mysql.driver.version}</version>
        </dependency>

        <!-- Spring Batch dependencies -->
        <dependency>
            <groupId>org.springframework.batch</groupId>
            <artifactId>spring-batch-core</artifactId>
            <version>${spring.batch.version}</version>
        </dependency>
        <dependency>
            <groupId>org.springframework.batch</groupId>
            <artifactId>spring-batch-infrastructure</artifactId>
            <version>${spring.batch.version}</version>
        </dependency>

        <!-- Spring Batch unit test -->
        <dependency>
            <groupId>org.springframework.batch</groupId>
            <artifactId>spring-batch-test</artifactId>
            <version>${spring.batch.version}</version>
        </dependency>

        <!-- Junit -->
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>${junit.version}</version>
            <scope>test</scope>
        </dependency>

    </dependencies>
    <build>
        <finalName>spring-batch</finalName>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-eclipse-plugin</artifactId>
                <version>2.9</version>
                <configuration>
                    <downloadSources>true</downloadSources>
                    <downloadJavadocs>false</downloadJavadocs>
                </configuration>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>2.3.2</version>
                <configuration>
                    <source>${jdk.version}</source>
                    <target>${jdk.version}</target>
                </configuration>
            </plugin>
        </plugins>
    </build>

</project>


3. Input XML File

resources/sample-input-report.xml

<?xml version="1.0" encoding="UTF-8"?>
<Contracts>
    <Contract ContractId="1001">
        <Carrier>CMU</Carrier>
        <ContractName>Contract reg1</ContractName>
        <ProcessingTime>2018-07-29T00:00:00+05:30</ProcessingTime>
    </Contract>
    <Contract ContractId="1001">
        <Carrier>NLU</Carrier>
        <ContractName>Contract reg2</ContractName>
        <ProcessingTime>2018-07-30T00:00:00+05:30</ProcessingTime>
    </Contract>
    <Contract ContractId="1001">
        <Carrier>HLA</Carrier>
        <ContractName>Contract reg3</ContractName>
        <ProcessingTime>2018-07-01T00:00:00+05:30</ProcessingTime>
    </Contract>
    <Contract ContractId="1001">
        <Carrier>GMR</Carrier>
        <ContractName>Contract reg4</ContractName>
        <ProcessingTime>2018-05-29T00:00:00+05:30</ProcessingTime>
    </Contract>
    <Contract ContractId="1001">
        <Carrier>BLU</Carrier>
        <ContractName>Contract reg5</ContractName>
        <ProcessingTime>2018-04-22T00:00:00+05:30</ProcessingTime>
    </Contract>
</Contracts>


4. Spring Batch Job Configuration.

4.1. Read XML input

Read XML input file using default ItemReader provided by spring StaxEventItemReader. If our reading logic is straight forward and no complex logic is involved, then it's better to use default ItemReader, as it works well.

<!-- XML file reader -->
<bean id="xmlItemReader" class="org.springframework.batch.item.xml.StaxEventItemReader">
    <property name="resource" value="classpath:sample-input-report.xml" />
    <property name="unmarshaller" ref="reportMarshaller" />
    <property name="fragmentRootElementName" value="Contract" />
</bean>

<!-- JAXB marshaller for mapping XML element to java object -->
<bean id="reportMarshaller" class="org.springframework.oxm.jaxb.Jaxb2Marshaller">
    <property name="classesToBeBound">
    <list>
    <value>com.learningsolo.springbatch.Contract</value>
    </list>
    </property>
</bean>


4.2 Batch Job Configuration

Spring Job contains steps, where each steps does its work one after another based on SUCCESS or FAILURE of the step. This step contains Tasklet, which can again be divided into 2 categories.

  1. Tasklet
  2. Chunk

Tasklet is meant to perform a single task in one step for the complete data. All the steps like read, process, and write will be done in one step and once finished, it will exit.

public class MergeTableDataTasklet implements Tasklet {
............

}


Chunk: In a chunk-based approach, it performs an action on a chunk of data and not on the whole as in Tasklet. It performs read, process, and write in one step only for chunk of the data defined in the configuration. Then again, it reads a new chunk on data and repeats the process until it finishes the data.

Chunk based flow:

While there’re N lines:

  • Do for X amount of lines:
    • Read one line
    • Process one line
  • Write X amount of lines.
<batch:tasklet transaction-manager="transactionManager">
    <batch:chunk reader="xmlItemReader" writer="contractWriter"
        processor="contractProcessor" commit-interval="10" />
</batch:tasklet>


In the above example, the chunk of data is defined using the commit-interval property.

Spring Job Context XML

In this context XML, we define our Spring Batch job and also the steps that will be executed to perform the required actions.

resources/job-context.xml

<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
    xmlns:context="http://www.springframework.org/schema/context"
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:batch="http://www.springframework.org/schema/batch"
    xsi:schemaLocation="http://www.springframework.org/schema/batch
    http://www.springframework.org/schema/batch/spring-batch-2.2.xsd
    http://www.springframework.org/schema/context 
     http://www.springframework.org/schema/context/spring-context.xsd
    http://www.springframework.org/schema/beans
    http://www.springframework.org/schema/beans/spring-beans-3.0.xsd">

    <context:component-scan base-package="com.learningsolo.springbatch.*" />
    <import resource="context.xml" />

    <bean id="transactionManager" class="org.springframework.batch.support.transaction.ResourcelessTransactionManager" />

     <batch:job id="gtnContractJob">
    <!-- Spring batch job and steps -->
    <batch:step id="xmlReadAndProcessStep">
            <batch:tasklet transaction-manager="transactionManager">
                <batch:chunk reader="xmlItemReader" writer="contractWriter"
                    processor="contractProcessor" commit-interval="10" />
            </batch:tasklet>
            <batch:listeners>
                <batch:listener ref="stepListener" />
            </batch:listeners>
        </batch:step>
    </batch:job>

    <!-- XML file reader -->
    <bean id="xmlItemReader" class="org.springframework.batch.item.xml.StaxEventItemReader">
        <property name="resource" value="classpath:sample-input-report.xml" />
        <property name="unmarshaller" ref="reportMarshaller" />
        <property name="fragmentRootElementName" value="Contract" />
    </bean>

    <!-- JAXB marshaller for mapping XML element to java object -->
    <bean id="reportMarshaller" class="org.springframework.oxm.jaxb.Jaxb2Marshaller">
       <property name="classesToBeBound">
        <list>
            <value>com.learningsolo.springbatch.Contract</value>
        </list>
        </property>
    </bean>
    <!-- Custom writer -->
    <bean id="contractWriter" class="com.learningsolo.springbatch.CustomItemWriter"/>
    <!-- Step listner -->
    <bean id="stepListener" class="com.learningsolo.springbatch.listner.StepListner"></bean>

    <bean id="contractProcessor" class= "com.learningsolo.springbatch.CustomItemProcessor"/>

    <bean id="contractDao" class="com.learningsolo.springbatch.ContractDaoImpl"/>

</beans>


JAXB Marshaller is used to map XML root, element, and its properties to Java object.

4.3 Spring Batch Core Settings

The core setting XML file, context.xml, contains all job launcher and job repository settings, which holds the metadata information about the Spring batch job. We also have the data source configured here to connect our Oracle database.

resources/context.xml

<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:jdbc="http://www.springframework.org/schema/jdbc"
    xmlns:batch="http://www.springframework.org/schema/batch"
    xsi:schemaLocation="http://www.springframework.org/schema/jdbc
http://www.springframework.org/schema/jdbc/spring-jdbc-3.0.xsd
http://www.springframework.org/schema/beans
http://www.springframework.org/schema/beans/spring-beans-3.0.xsd">


    <bean id="jobRepository" class="org.springframework.batch.core.repository.support.MapJobRepositoryFactoryBean" />

    <bean id="jobLauncher"
        class="org.springframework.batch.core.launch.support.SimpleJobLauncher">
        <property name="jobRepository" ref="jobRepository" />
    </bean>

      <bean id="namedParamJdbcTemplate" class="org.springframework.jdbc.core.namedparam.NamedParameterJdbcTemplate">
        <constructor-arg ref="dataSource" />
    </bean>
    <bean id="dataSource" class="org.springframework.jdbc.datasource.DriverManagerDataSource">
        <property name="driverClassName" value="oracle.jdbc.OracleDriver" />
        <property name="url" value="jdbc:oracle:thin:@localhost:1521/Z1UAZ" />
        <property name="username" value="TESTDB" />
        <property name="password" value="test$123" />
    </bean>

    <bean id="baseDAO" abstract="true">
        <description>common configuration for all DAO classes
        </description>
        <property name="dataSource" ref="dataSource" />        
    </bean>

</beans>


5. Java Source Classes:

Contract.java

import java.util.Date;

import javax.xml.bind.annotation.*;

@XmlRootElement(name="Contract")
public class Contract {

    private long contractId;
    private String contractName;
    private String carrier;
    private Date fileProcessingTime;

    @XmlAttribute(name="ContractId")
    public long getContractId() {
        return contractId;
    }
    public void setContractId(long contractId) {
        this.contractId = contractId;
    }
    @XmlElement(name="ContractName")
    public String getContractName() {
        return contractName;
    }
    public void setContractName(String contractName) {
        this.contractName = contractName;
    }
    @XmlElement(name="Carrier")
    public String getCarrier() {
        return carrier;
    }
    public void setCarrier(String carrier) {
        this.carrier = carrier;
    }
    @XmlElement(name="ProcessingTime")
    public Date getFileProcessingTime() {
        return fileProcessingTime;
    }
    public void setFileProcessingTime(Date fileProcessingTime) {
        this.fileProcessingTime = fileProcessingTime;
    }



}


ItemProcessor is used to process the data coming from the input source, If you want to perform some data manipulation before writing it to DB like date formatting, text formatting, etc., then in that case, ItemProcessor is the right place.

CustomItemProcessor.java


import org.springframework.batch.item.ItemProcessor;

public class CustomItemProcessor implements ItemProcessor<Contract, Contract>{

    public Contract process(Contract item) throws Exception {
        System.out.println("Processing..."+item.getContractId()+" - "+item.getContractName());
        return item;
    }

}


ItemReader reads each record from the input and sends it to ItemProcessor for further processing and finally, when the chunk size limit is reached, it is sent to ItemWriter for writing to output.

ItemWriter.java

import java.util.List;

import org.springframework.batch.item.ItemWriter;
import org.springframework.beans.factory.annotation.Autowired;

public class CustomItemWriter implements ItemWriter<Contract>{

    @Autowired
    ContractDaoImpl contractDao;

    public void write(List<? extends Contract> items) throws Exception {
        System.out.println("Writer...");

        List<Contract> contractList = (List<Contract>) items;
        contractDao.saveData(contractList);
    }
}


JDBCBaseDao.java

import javax.sql.DataSource;

import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.jdbc.core.namedparam.NamedParameterJdbcOperations;
import org.springframework.jdbc.core.namedparam.NamedParameterJdbcTemplate;

public abstract class JDBCBaseDao {
    private DataSource dataSource;
    private NamedParameterJdbcOperations namedJdbcTemplate;

    public JDBCBaseDao() {    
    }
    public void setDataSource(DataSource ds) {
        namedJdbcTemplate = new NamedParameterJdbcTemplate(ds);
        this.dataSource = ds;
    }

    public DataSource getDataSource() {
        return dataSource;
    }

    public NamedParameterJdbcOperations getNamedJdbcTemplate() {
        return namedJdbcTemplate;
    }

    @Autowired
    @Qualifier("namedParamJdbcTemplate")
    public void setNamedJdbcTemplate(NamedParameterJdbcOperations namedJdbcTemplate) {
        this.namedJdbcTemplate = namedJdbcTemplate;
    }

}


Listeners:

Listeners are like interceptors that help to intercept the execution of a Job or a Step and allow the user to perform some functionality. In listeners, we have 2 methods beforeStep() and afterStep(). beforeStep() is called before the job starts step execution and afterStep() is called once the step has finished its job.

StepListener.java

import org.springframework.batch.core.ExitStatus;
import org.springframework.batch.core.StepExecution;
import org.springframework.batch.core.StepExecutionListener;

public class StepListner implements StepExecutionListener{

    public ExitStatus afterStep(StepExecution execution) {
        System.out.println("After listner");
        return execution.getExitStatus();
    }

    public void beforeStep(StepExecution arg0) {
        System.out.println("Before listner");

    }

}


SpringApp.java

import org.springframework.batch.core.Job;
import org.springframework.batch.core.JobExecution;
import org.springframework.batch.core.JobParameters;
import org.springframework.batch.core.launch.JobLauncher;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;

public class SpringApp {

    public static void main(String[] args) {
        ApplicationContext applicationContext = null;
        try {
            applicationContext = new ClassPathXmlApplicationContext("./job-context.xml");
            JobLauncher jobLauncher = (JobLauncher) applicationContext.getBean("jobLauncher");
            Job job = (Job) applicationContext.getBean("gtnContractJob");
            JobExecution execution = jobLauncher.run(job, new JobParameters());
            System.out.println("Exit status:"+execution.getStatus());
        } catch (Exception e) {
            e.printStackTrace();
        }

    }

}


Output:

You should see a record inserted in your Oracle DB table"CONTRACT".

Further Reading

Creating a Batch Service

Batch Processing Large Data Sets With Spring Boot and Spring Batch

Topics:
java ,spring ,spring batch ,xml ,database ,tutorial

Published at DZone with permission of

Opinions expressed by DZone contributors are their own.

{{ parent.title || parent.header.title}}

{{ parent.tldr }}

{{ parent.urlSource.name }}