{{announcement.body}}
{{announcement.title}}

Spring Batch — Read From XML and Write to Mongo

DZone 's Guide to

Spring Batch — Read From XML and Write to Mongo

In this post, see how to use Spring Batch to read an XML file with ItemReader using StaxEventItemReader and write its data to NoSQL.

· Database Zone ·
Free Resource

Pink flower

In this post, we will show you how to use Spring Batch to read an XML file with ItemReader using StaxEventItemReader and write its data to NoSQL using Custom ItemWriter with JpaRepository. Here, we've used MongoDB.

Custom ItemReader or ItemWriter is a class where we write our own way of reading or writing data. In Custom Reader we are required to handle the chunking logic as well. This comes in handy if our reading logic is complex and cannot be handled using Default ItemReader provided by spring.

Tools and libraries used:

1. Maven 3.5+

2. Spring Batch Starter

3. Spring OXM

4. Data Mongodb starter

5. xstream

You might also enjoy:  Converting XML to JSON, Raw Use in MongoDB, and Spring Batch

Maven Dependency — Need to configure the project.

XML
xxxxxxxxxx
1
77
 
1
<?xml version="1.0" encoding="UTF-8"?>
2
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
3
    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
4
    <modelVersion>4.0.0</modelVersion>
5
    <p>
6
        <groupId>org.springframework.boot</groupId>
7
        <artifactId>spring-boot-starter-parent</artifactId>
8
        <version>2.2.2.RELEASE</version>
9
        <relativePath ></relativePath> <!-- lookup parent from repository -->
10
    </parent>
11
    <groupId>com.example</groupId>
12
    <artifactId>spring-batch-mongodb</artifactId>
13
    <version>0.0.1-SNAPSHOT</version>
14
    <name>spring-batch-mongodb</name>
15
    <description>Demo project for Spring Boot</description>
16
 
          
17
 
          
18
    <p>
19
        <java.version>1.8</java.version>
20
        <maven-jar-plugin.version>3.1.1</maven-jar-plugin.version>
21
    </properties>
22
 
          
23
 
          
24
    <dependencies>
25
        <dependency>
26
            <groupId>org.springframework.boot</groupId>
27
            <artifactId>spring-boot-starter-batch</artifactId>
28
        </dependency>
29
        <dependency>
30
            <groupId>org.springframework</groupId>
31
            <artifactId>spring-oxm</artifactId>
32
        </dependency>
33
        <dependency>
34
            <groupId>org.springframework.boot</groupId>
35
            <artifactId>spring-boot-starter-data-mongodb</artifactId>
36
        </dependency>
37
        <dependency>
38
            <groupId>com.thoughtworks.xstream</groupId>
39
            <artifactId>xstream</artifactId>
40
            <version>1.4.7</version>
41
        </dependency>
42
        <dependency>
43
            <groupId>org.projectlombok</groupId>
44
            <artifactId>lombok</artifactId>
45
            <optional>true</optional>
46
        </dependency>
47
        <dependency>
48
            <groupId>org.springframework.boot</groupId>
49
            <artifactId>spring-boot-starter-test</artifactId>
50
            <scope>test</scope>
51
            <exclusions>
52
                <exclusion>
53
                    <groupId>org.junit.vintage</groupId>
54
                    <artifactId>junit-vintage-engine</artifactId>
55
                </exclusion>
56
            </exclusions>
57
        </dependency>
58
        <dependency>
59
            <groupId>org.springframework.batch</groupId>
60
            <artifactId>spring-batch-test</artifactId>
61
            <scope>test</scope>
62
        </dependency>
63
        <dependency>
64
            <groupId>com.h2database</groupId>
65
            <artifactId>h2</artifactId>
66
            <scope>runtime</scope>
67
        </dependency>
68
    </dependencies>
69
    <build>
70
        <p>
71
            <p>
72
                <groupId>org.springframework.boot</groupId>
73
                <artifactId>spring-boot-maven-plugin</artifactId>
74
            </plugin>
75
        </plugins>
76
    </build>
77
</project>


CustomerWriter — This is a custom writer we've created to write the customer data into MongoDB. Custom writer gives the capability to perform complex operations too.

Java
xxxxxxxxxx
1
19
 
1
package com.example.writer;
2
 
          
3
import java.util.List;
4
 
          
5
import org.springframework.batch.item.ItemWriter;
6
import org.springframework.beans.factory.annotation.Autowired;
7
import com.example.domain.Customer;
8
import com.example.repository.CustomerRepository;
9
 
          
10
 
          
11
public class CustomerWriter implements ItemWriter<Customer>{
12
    @Autowired
13
    private CustomerRepository customerRepository;
14
    
15
    @Override
16
    public void write(List<? extends Customer> customers) throws Exception {
17
        customerRepository.saveAll(customers);
18
    }
19
}


CustomerRepository — This is a Mongo repository that talks with the Mongo database and performs operations to get the data back.

Java
xxxxxxxxxx
1
 
1
package com.example.repository;
2
 
          
3
import org.springframework.data.mongodb.repository.MongoRepository;
4
import com.example.domain.Customer;
5
 
          
6
 
          
7
public interface CustomerRepository extends MongoRepository<Customer, String>{
8
}


Customer — This is a Mongo document class that holds business data.

Java
xxxxxxxxxx
1
30
 
1
package com.example.domain;
2
 
          
3
import java.time.LocalDate;
4
import javax.xml.bind.annotation.XmlRootElement;
5
import org.springframework.data.annotation.Id;
6
import org.springframework.data.mongodb.core.mapping.Document;
7
import org.springframework.data.mongodb.core.mapping.Field;
8
import lombok.AllArgsConstructor;
9
import lombok.Builder;
10
import lombok.Data;
11
import lombok.NoArgsConstructor;
12
 
          
13
 
          
14
 
          
15
@AllArgsConstructor
16
@NoArgsConstructor
17
@Builder
18
@Data
19
@XmlRootElement(name = "Customer")
20
@Document
21
public class Customer {
22
    @Id
23
    private Long id;
24
    @Field
25
    private String firstName;
26
    @Field
27
    private String lastName;
28
    @Field
29
    private LocalDate birthdate;
30
}


CustomerConverter — We've implemented the Converter interface. This class is used for Converter implementations and are responsible for marshalling Java objects to/from textual data. If an exception occurs during processing, ConversionException should be thrown. If working with the high-level com.thoughtworks.xstream.XStream facade, you can register new converters using the XStream.registerConverter() method.

Java
xxxxxxxxxx
1
47
 
1
package com.example.config;
2
 
          
3
import java.time.LocalDate;
4
import java.time.format.DateTimeFormatter;
5
import com.example.domain.Customer;
6
import com.thoughtworks.xstream.converters.Converter;
7
import com.thoughtworks.xstream.converters.MarshallingContext;
8
import com.thoughtworks.xstream.converters.UnmarshallingContext;
9
import com.thoughtworks.xstream.io.HierarchicalStreamReader;
10
import com.thoughtworks.xstream.io.HierarchicalStreamWriter;
11
 
          
12
 
          
13
 
          
14
public class CustomerConverter implements Converter {
15
    private static final DateTimeFormatter DT_FORMATTER = DateTimeFormatter.ofPattern("dd-MM-yyyy HH:mm:ss");
16
    
17
    @Override
18
    public boolean canConvert(Class type) {
19
        return type.equals(Customer.class);
20
    }
21
 
          
22
    @Override
23
    public void marshal(Object source, HierarchicalStreamWriter writer, MarshallingContext context) {
24
        // Don't do anything
25
    }
26
 
          
27
    @Override
28
    public Object unmarshal(HierarchicalStreamReader reader, UnmarshallingContext context) {
29
        reader.moveDown();
30
        Customer customer = new Customer();
31
        customer.setId(Long.valueOf(reader.getValue()));
32
        
33
        reader.moveUp();
34
        reader.moveDown();
35
        customer.setFirstName(reader.getValue());
36
        
37
        reader.moveUp();
38
        reader.moveDown();
39
        customer.setLastName(reader.getValue());
40
        
41
        reader.moveUp();
42
        reader.moveDown();
43
        customer.setBirthdate(LocalDate.parse(reader.getValue(), DT_FORMATTER));
44
        
45
        return customer;
46
    }
47
}


JobConfiguration — This is the main class responsible for performing the batch job. In this class, we used various Beans to perform the individual task.

StaxEventItemReader — Item reader for reading XML input based on StAX. It extracts fragments from the input XML document, which corresponds to records for processing. The fragments are wrapped with StartDocument and EndDocument events so that the fragments can be further processed like standalone XML documents. The implementation is not thread-safe.

CustomerWriter — This is a custom class that writes data to MongoDB.

step1 — This step configures ItemReader and ItemWriter, however, ItemProcessor is an optional step, which we've skipped.

Job — Batch domain object representing a job. Job is an explicit abstraction representing the configuration of a job specified by a developer. It should be noted that restart policy is applied to the job as a whole and not to a step.

Java
xxxxxxxxxx
1
67
 
1
package com.example.config;
2
 
          
3
import java.util.HashMap;
4
import java.util.Map;
5
import org.springframework.batch.core.Job;
6
import org.springframework.batch.core.Step;
7
import org.springframework.batch.core.configuration.annotation.JobBuilderFactory;
8
import org.springframework.batch.core.configuration.annotation.StepBuilderFactory;
9
import org.springframework.batch.item.xml.StaxEventItemReader;
10
import org.springframework.beans.factory.annotation.Autowired;
11
import org.springframework.context.annotation.Bean;
12
import org.springframework.context.annotation.Configuration;
13
import org.springframework.core.io.ClassPathResource;
14
import org.springframework.oxm.xstream.XStreamMarshaller;
15
import com.example.domain.Customer;
16
import com.example.writer.CustomerWriter;
17
 
          
18
 
          
19
 
          
20
@Configuration
21
public class JobConfiguration {
22
    @Autowired
23
    private JobBuilderFactory jobBuilderFactory;
24
    
25
    @Autowired
26
    private StepBuilderFactory stepBuilderFactory;
27
 
          
28
    @Bean
29
    public StaxEventItemReader<Customer> customerItemReader(){
30
        Map<String, Class> aliases = new HashMap<>();
31
        aliases.put("customer", Customer.class);
32
        
33
        CustomerConverter converter = new CustomerConverter();
34
 
          
35
        XStreamMarshaller ummarshaller = new XStreamMarshaller();
36
        ummarshaller.setAliases(aliases);
37
        ummarshaller.setConverters(converter);
38
        
39
        StaxEventItemReader<Customer> reader = new StaxEventItemReader<>();
40
        reader.setResource(new ClassPathResource("/data/customer.xml"));
41
        reader.setFragmentRootElementName("customer");
42
        reader.setUnmarshaller(ummarshaller);
43
        
44
        return reader;
45
    }
46
    
47
    @Bean
48
    public CustomerWriter customerWriter() {
49
        return new CustomerWriter();
50
    }
51
    
52
    @Bean
53
    public Step step1() throws Exception {
54
        return stepBuilderFactory.get("step1")
55
                .<Customer, Customer>chunk(200)
56
                .reader(customerItemReader())
57
                .writer(customerWriter())
58
                .build();
59
    }
60
    
61
    @Bean
62
    public Job job() throws Exception {
63
        return jobBuilderFactory.get("job")
64
                .start(step1())
65
                .build();
66
    }
67
}


application.properties

Java
xxxxxxxxxx
1
 
1
spring.data.mongodb.host=localhost
2
spring.data.mongodb.port=27017


Customer.xml — This is sample data to be read by Spring Batch.

XML
xxxxxxxxxx
1
75
 
1
<?xml version="1.0" encoding="UTF-8" ?>
2
<customers>
3
    <customer>
4
        <id>1</id>
5
        <firstName>John</firstName>
6
        <lastName>Doe</lastName>
7
        <birthdate>10-10-1988 19:43:23</birthdate>
8
    </customer>
9
    <customer>
10
        <id>2</id>
11
        <firstName>James</firstName>
12
        <lastName>Moss</lastName>
13
        <birthdate>01-04-1991 10:20:23</birthdate>
14
    </customer>
15
    <customer>
16
        <id>3</id>
17
        <firstName>Jonie</firstName>
18
        <lastName>Gamble</lastName>
19
        <birthdate>21-07-1982 11:12:13</birthdate>
20
    </customer>
21
    <customer>
22
        <id>4</id>
23
        <firstName>Mary</firstName>
24
        <lastName>Kline</lastName>
25
        <birthdate>07-08-1973 11:27:42</birthdate>
26
    </customer>
27
    <customer>
28
        <id>5</id>
29
        <firstName>William</firstName>
30
        <lastName>Lockhart</lastName>
31
        <birthdate>04-04-1994 04:15:11</birthdate>
32
    </customer>
33
    <customer>
34
        <id>6</id>
35
        <firstName>John</firstName>
36
        <lastName>Doe</lastName>
37
        <birthdate>10-10-1988 19:43:23</birthdate>
38
    </customer>
39
    <customer>
40
        <id>7</id>
41
        <firstName>Kristi</firstName>
42
        <lastName>Dukes</lastName>
43
        <birthdate>17-09-1983 21:22:23</birthdate>
44
    </customer>
45
    <customer>
46
        <id>8</id>
47
        <firstName>Angel</firstName>
48
        <lastName>Porter</lastName>
49
        <birthdate>15-12-1980 18:09:09</birthdate>
50
    </customer>
51
    <customer>
52
        <id>9</id>
53
        <firstName>Mary</firstName>
54
        <lastName>Johnston</lastName>
55
        <birthdate>07-07-1987 19:43:03</birthdate>
56
    </customer>
57
    <customer>
58
        <id>10</id>
59
        <firstName>Linda</firstName>
60
        <lastName>Rodriguez</lastName>
61
        <birthdate>16-09-1991 09:13:43</birthdate>
62
    </customer>
63
    <customer>
64
        <id>11</id>
65
        <firstName>Phillip</firstName>
66
        <lastName>Lopez</lastName>
67
        <birthdate>18-12-1965 11:10:09</birthdate>
68
    </customer>
69
    <customer>
70
        <id>12</id>
71
        <firstName>Peter</firstName>
72
        <lastName>Dixon</lastName>
73
        <birthdate>09-05-1996 19:09:23</birthdate>
74
    </customer>
75
</customers>


MainApp — SpringBatchMongodbApplication can be run as Spring Boot project.

Java
xxxxxxxxxx
1
20
 
1
package com.example;
2
 
          
3
import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing;
4
import org.springframework.boot.SpringApplication;
5
import org.springframework.boot.autoconfigure.SpringBootApplication;
6
import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration;
7
import org.springframework.data.mongodb.repository.config.EnableMongoRepositories;
8
 
          
9
 
          
10
 
          
11
@SpringBootApplication(exclude = {DataSourceAutoConfiguration.class})
12
@EnableBatchProcessing
13
@EnableMongoRepositories(basePackages = "com.example.repository")
14
public class SpringBatchMongodbApplication {
15
 
          
16
 
          
17
    public static void main(String[] args) {
18
        SpringApplication.run(SpringBatchMongodbApplication.class, args);
19
    }
20
}


Output: We can conclude that Spring Batch has read the data and written into into MongoDB with the schema/document type that has been suggested.

Java
xxxxxxxxxx
1
121
 
1
db.getCollection('customer').find({})
2
 
          
3
 
          
4
/* 1 */
5
{
6
    "_id" : NumberLong(1),
7
    "firstName" : "John",
8
    "lastName" : "Doe",
9
    "birthdate" : ISODate("1988-10-09T18:30:00.000Z"),
10
    "_class" : "com.example.domain.Customer"
11
}
12
 
          
13
 
          
14
/* 2 */
15
{
16
    "_id" : NumberLong(2),
17
    "firstName" : "James",
18
    "lastName" : "Moss",
19
    "birthdate" : ISODate("1991-03-31T18:30:00.000Z"),
20
    "_class" : "com.example.domain.Customer"
21
}
22
 
          
23
 
          
24
/* 3 */
25
{
26
    "_id" : NumberLong(3),
27
    "firstName" : "Jonie",
28
    "lastName" : "Gamble",
29
    "birthdate" : ISODate("1982-07-20T18:30:00.000Z"),
30
    "_class" : "com.example.domain.Customer"
31
}
32
 
          
33
 
          
34
/* 4 */
35
{
36
    "_id" : NumberLong(4),
37
    "firstName" : "Mary",
38
    "lastName" : "Kline",
39
    "birthdate" : ISODate("1973-08-06T18:30:00.000Z"),
40
    "_class" : "com.example.domain.Customer"
41
}
42
 
          
43
 
          
44
/* 5 */
45
{
46
    "_id" : NumberLong(5),
47
    "firstName" : "William",
48
    "lastName" : "Lockhart",
49
    "birthdate" : ISODate("1994-04-03T18:30:00.000Z"),
50
    "_class" : "com.example.domain.Customer"
51
}
52
 
          
53
 
          
54
/* 6 */
55
{
56
    "_id" : NumberLong(6),
57
    "firstName" : "John",
58
    "lastName" : "Doe",
59
    "birthdate" : ISODate("1988-10-09T18:30:00.000Z"),
60
    "_class" : "com.example.domain.Customer"
61
}
62
 
          
63
 
          
64
/* 7 */
65
{
66
    "_id" : NumberLong(7),
67
    "firstName" : "Kristi",
68
    "lastName" : "Dukes",
69
    "birthdate" : ISODate("1983-09-16T18:30:00.000Z"),
70
    "_class" : "com.example.domain.Customer"
71
}
72
 
          
73
 
          
74
/* 8 */
75
{
76
    "_id" : NumberLong(8),
77
    "firstName" : "Angel",
78
    "lastName" : "Porter",
79
    "birthdate" : ISODate("1980-12-14T18:30:00.000Z"),
80
    "_class" : "com.example.domain.Customer"
81
}
82
 
          
83
 
          
84
/* 9 */
85
{
86
    "_id" : NumberLong(9),
87
    "firstName" : "Mary",
88
    "lastName" : "Johnston",
89
    "birthdate" : ISODate("1987-07-06T18:30:00.000Z"),
90
    "_class" : "com.example.domain.Customer"
91
}
92
 
          
93
 
          
94
/* 10 */
95
{
96
    "_id" : NumberLong(10),
97
    "firstName" : "Linda",
98
    "lastName" : "Rodriguez",
99
    "birthdate" : ISODate("1991-09-15T18:30:00.000Z"),
100
    "_class" : "com.example.domain.Customer"
101
}
102
 
          
103
 
          
104
/* 11 */
105
{
106
    "_id" : NumberLong(11),
107
    "firstName" : "Phillip",
108
    "lastName" : "Lopez",
109
    "birthdate" : ISODate("1965-12-17T18:30:00.000Z"),
110
    "_class" : "com.example.domain.Customer"
111
}
112
 
          
113
 
          
114
/* 12 */
115
{
116
    "_id" : NumberLong(12),
117
    "firstName" : "Peter",
118
    "lastName" : "Dixon",
119
    "birthdate" : ISODate("1996-05-08T18:30:00.000Z"),
120
    "_class" : "com.example.domain.Customer"
121
}


Thanks for reading!

Further Reading

Spring Batch Read an XML File and Write to Oracle Database

Spring Batch to Read From MongoDB and Generate XML Files

Topics:
spring batch ,database ,tutorial ,custom itemreader ,itemwriter ,maven ,spring batch starter ,mongodb ,nosql

Opinions expressed by DZone contributors are their own.

{{ parent.title || parent.header.title}}

{{ parent.tldr }}

{{ parent.urlSource.name }}