Over a million developers have joined DZone.
{{announcement.body}}
{{announcement.title}}

Getting Started with Avro: Part 2

DZone's Guide to

Getting Started with Avro: Part 2

· Big Data Zone
Free Resource

Need to build an application around your data? Learn more about dataflow programming for rapid development and greater creativity. 

In the previous post we used avro-tools commands to serialize and deserialize data. In this post we post we will use Avro Java API for achieving the same. We will use same sample data and schema from our previous post.

The java code for serializing and deserializing data without generating the code for schema is given below:

package com.rishav.avro;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Iterator;
import java.util.LinkedHashMap;

import org.apache.avro.Schema;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.BinaryDecoder;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DatumWriter;
import org.codehaus.jackson.JsonFactory;
import org.codehaus.jackson.JsonParseException;
import org.codehaus.jackson.JsonProcessingException;
import org.codehaus.jackson.map.ObjectMapper;
import org.json.simple.JSONObject;



public class AvroExampleWithoutCodeGeneration {

	public void serialize() throws JsonParseException, JsonProcessingException, IOException {

		InputStream in = new FileInputStream("resources/StudentActivity.json");

		// create a schema
		Schema schema = new Schema.Parser().parse(new File("resources/StudentActivity.avsc"));
		// create a record to hold json
		GenericRecord AvroRec = new GenericData.Record(schema);
		// create a record to hold course_details 
		GenericRecord CourseRec = new GenericData.Record(schema.getField("course_details").schema());
		// this file will have AVro output data
		File AvroFile = new File("resources/StudentActivity.avro");
		// Create a writer to serialize the record
		DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema);		         
		DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter);

		dataFileWriter.create(schema, AvroFile);

		// iterate over JSONs present in input file and write to Avro output file
		for (Iterator it = new ObjectMapper().readValues(
				new JsonFactory().createJsonParser(in), JSONObject.class); it.hasNext();) {

			JSONObject JsonRec = (JSONObject) it.next();
			AvroRec.put("id", JsonRec.get("id"));
			AvroRec.put("student_id", JsonRec.get("student_id"));
			AvroRec.put("university_id", JsonRec.get("university_id"));

			LinkedHashMap CourseDetails = (LinkedHashMap) JsonRec.get("course_details");
			CourseRec.put("course_id", CourseDetails.get("course_id"));
			CourseRec.put("enroll_date", CourseDetails.get("enroll_date"));
			CourseRec.put("verb", CourseDetails.get("verb"));
			CourseRec.put("result_score", CourseDetails.get("result_score"));

			AvroRec.put("course_details", CourseRec);

			dataFileWriter.append(AvroRec);
		}  // end of for loop

		in.close();
		dataFileWriter.close();

	} // end of serialize method

	public void deserialize () throws IOException {
		// create a schema
		Schema schema = new Schema.Parser().parse(new File("resources/StudentActivity.avsc"));
		// create a record using schema
		GenericRecord AvroRec = new GenericData.Record(schema);
		File AvroFile = new File("resources/StudentActivity.avro");
		DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema);
		DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(AvroFile, datumReader);
		System.out.println("Deserialized data is :");
		while (dataFileReader.hasNext()) {
			AvroRec = dataFileReader.next(AvroRec);
			System.out.println(AvroRec);
		}
	}

	public static void main(String[] args) throws JsonParseException, JsonProcessingException, IOException {
		AvroExampleWithoutCodeGeneration AvroEx = new AvroExampleWithoutCodeGeneration();
		AvroEx.serialize();
		AvroEx.deserialize();
	}
}

For generating the schema java code from Avro json schema we can use avro-tools jar. The command for same is given below:

java -jar avro-tools-1.7.5.jar compile schema StudentActivity.avsc <output_path>
Output path can be source folder for the project or we can add the generated java class files to Eclipse IDE manually.

The java code for serializing and deserializing data with generating the code for schema is similar to above code except that in previous code we were assiging values to a GenericRecord and in this one we are assigning values to the generated Avro object:

package com.rishav.avro;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Iterator;
import java.util.LinkedHashMap;

import org.apache.avro.Schema;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DatumWriter;
import org.codehaus.jackson.JsonFactory;
import org.codehaus.jackson.JsonParseException;
import org.codehaus.jackson.JsonProcessingException;
import org.codehaus.jackson.map.ObjectMapper;
import org.json.simple.JSONObject;

public class AvroExampleWithCodeGeneration {
	public void serialize() throws JsonParseException, JsonProcessingException, IOException {

		InputStream in = new FileInputStream("resources/StudentActivity.json");

		// create a schema
		Schema schema = new Schema.Parser().parse(new File("resources/StudentActivity.avsc"));
		// create an object to hold json record
		StudentActivity sa = new StudentActivity();
		// create an object to hold course_details
		Activity a = new Activity();
		
		// this file will have AVro output data
		File AvroFile = new File("resources/StudentActivity.avro");
		// Create a writer to serialize the record
		DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema);		         
		DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter);

		dataFileWriter.create(schema, AvroFile);

		// iterate over JSONs present in input file and write to Avro output file
		for (Iterator it = new ObjectMapper().readValues(
				new JsonFactory().createJsonParser(in), JSONObject.class); it.hasNext();) {

			JSONObject JsonRec = (JSONObject) it.next();
			sa.setId((CharSequence) JsonRec.get("id"));
			sa.setStudentId((Integer) JsonRec.get("student_id"));
			sa.setUniversityId((Integer) JsonRec.get("university_id"));

			LinkedHashMap CourseDetails = (LinkedHashMap) JsonRec.get("course_details");
			a.setCourseId((Integer) CourseDetails.get("course_id"));
			a.setEnrollDate((CharSequence) CourseDetails.get("enroll_date"));
			a.setVerb((CharSequence) CourseDetails.get("verb"));
			a.setResultScore((Double) CourseDetails.get("result_score"));

			sa.setCourseDetails(a);

			dataFileWriter.append(sa);
		}  // end of for loop

		in.close();
		dataFileWriter.close();

	} // end of serialize method

	public void deserialize () throws IOException {
		// create a schema
		Schema schema = new Schema.Parser().parse(new File("resources/StudentActivity.avsc"));
		// create a record using schema
		GenericRecord AvroRec = new GenericData.Record(schema);
		File AvroFile = new File("resources/StudentActivity.avro");
		DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema);
		DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(AvroFile, datumReader);
		System.out.println("Deserialized data is :");
		while (dataFileReader.hasNext()) {
			AvroRec = dataFileReader.next(AvroRec);
			System.out.println(AvroRec);
		}
	}

	public static void main(String[] args) throws JsonParseException, JsonProcessingException, IOException {
		AvroExampleWithoutCodeGeneration AvroEx = new AvroExampleWithoutCodeGeneration();
		AvroEx.serialize();
		AvroEx.deserialize();
	}

}

In next post we will see how Avro deals with schema evolution.

Check out the Exaptive data application Studio. Technology agnostic. No glue code. Use what you know and rely on the community for what you don't. Try the community version.

Topics:

Published at DZone with permission of Rishav Rohit, DZone MVB. See the original article here.

Opinions expressed by DZone contributors are their own.

THE DZONE NEWSLETTER

Dev Resources & Solutions Straight to Your Inbox

Thanks for subscribing!

Awesome! Check your inbox to verify your email so you can start receiving the latest in tech news and resources.

X

{{ parent.title || parent.header.title}}

{{ parent.tldr }}

{{ parent.urlSource.name }}