Relational to JSON With Node.js
It's not exactly fair to compare a solution crafted in Node.js to the PL/SQL solutions... but whatever, let's compare them anyway!
Join the DZone community and get the full member experience.
Join For FreeCheck out the other parts of this series here:
Node.js is a platform for building JavaScript-based applications that has become very popular over the last few years. To best support this rapidly growing community, Oracle developed a driver for Node.js. Written in C and utilizing the Oracle Instant Client libraries, the Node.js driver is both performant and feature rich.
When it comes to generating JSON, Node.js seems like a natural choice, as JSON is based on JavaScript objects. However, it's not exactly fair to compare a solution crafted in Node.js to the PL/SQL solutions as Node.js has a clear disadvantage: it runs on a separate server. That means we have to bring the data across the network before we can transform it to the desired output.
Whatever, let's compare them anyway!
Solution 1: Emulating the PL/SQL Solutions
This first solution follows the same flow as the PL/SQL-based solutions. We start off by making a connection to the database and then begin creating the object we need by fetching data from the Departments table. Next, we go to the Locations table, then the Regions table, and on and on until finally, we have the object we want. We then use JSON.stringify
to serialize the object into the JSON result we are after.
var oracledb = require('oracledb');
var async = require('async');
var dateFormat = 'DD-MON-YYYY';
function getDepartment(departmentId, callback) {
oracledb.getConnection({
user: 'hr',
password: 'welcome',
connectString: 'server/XE'
},
function(err, connection) {
if (err) throw err;
connection.execute(
'select department_id, \n' +
' department_name, \n' +
' manager_id, \n' +
' location_id \n' +
'from departments \n' +
'where department_id = :department_id',
{
department_id: departmentId
},
function(err, results) {
var department = {};
if (err) {
throw err;
}
department.id = results.rows[0][0];
department.name = results.rows[0][1];
department.managerId = results.rows[0][2];
getLocationDetails(results.rows[0][3], department, connection, callback);
}
);
}
);
}
module.exports.getDepartment = getDepartment;
function getLocationDetails(locationId, department, connection, callback) {
connection.execute(
'select location_id, \n' +
' street_address, \n' +
' postal_code, \n' +
' country_id \n' +
'from locations \n' +
'where location_id = :location_id',
{
location_id: locationId
},
function(err, results) {
if (err) throw err;
department.location = {};
department.location.id = results.rows[0][0];
department.location.streetAddress = results.rows[0][1];
department.location.postalCode = results.rows[0][2];
getCountryDetails(results.rows[0][3], department, connection, callback);
}
);
}
function getCountryDetails(countryId, department, connection, callback) {
connection.execute(
'select country_id, \n' +
' country_name, \n' +
' region_id \n' +
'from countries \n' +
'where country_id = :country_id',
{
country_id: countryId
},
function(err, results) {
if (err) throw err;
department.location.country = {};
department.location.country.id = results.rows[0][0];
department.location.country.name = results.rows[0][1];
department.location.country.regionId = results.rows[0][2];
getManagerDetails(department, connection, callback);
}
);
}
function getManagerDetails(department, connection, callback) {
connection.execute(
'select employee_id, \n' +
' first_name || \' \' || last_name, \n' +
' salary, \n' +
' job_id \n' +
'from employees \n' +
'where employee_id = :manager_id',
{
manager_id: department.managerId
},
function(err, results) {
if (err) throw err;
delete department.managerId;
department.manager = {};
if (results.rows.length) {
department.manager.id = results.rows[0][0];
department.manager.name = results.rows[0][1];
department.manager.salary = results.rows[0][2];
department.manager.jobId = results.rows[0][3];
}
getManagerJobDetails(department, connection, callback);
}
);
}
function getManagerJobDetails(department, connection, callback) {
if (department.manager.id) {
connection.execute(
'select job_id, \n' +
' job_title, \n' +
' min_salary, \n' +
' max_salary \n' +
'from jobs \n' +
'where job_id = :job_id',
{
job_id: department.manager.jobId
},
function(err, results) {
if (err) throw err;
delete department.manager.jobId;
department.manager.job = {};
department.manager.job.id = results.rows[0][0];
department.manager.job.title = results.rows[0][1];
department.manager.job.minSalary = results.rows[0][2];
department.manager.job.maxSalary = results.rows[0][3];
getEmployees(department, connection, callback);
}
);
} else {
getEmployees(department, connection, callback);
}
}
function getEmployees(department, connection, callback) {
connection.execute(
'select employee_id, \n' +
' first_name || \' \' || last_name, \n' +
' case when hire_date < to_date(\'01-01-2005\', \'DD-MM-YYYY\') then 1 else 0 end is_senior, ' +
' to_char(hire_date, \'' + dateFormat + '\'), \n' +
' commission_pct \n' +
'from employees \n' +
'where department_id = :department_id',
{
department_id: department.id
},
function(err, results) {
if (err) throw err;
department.employees = [];
results.rows.forEach(function(row) {
var emp = {};
emp.id = row[0];
emp.name = row[1];
emp.isSenior = row[2] === 1;
emp.hireDate = row[3];
emp.commissionPct = row[4];
department.employees.push(emp);
});
async.eachSeries(
department.employees,
function(emp, cb) {
connection.execute(
'select job_id, \n' +
' department_id, \n' +
' to_char(start_date, \'' + dateFormat + '\'), \n' +
' to_char(end_date, \'' + dateFormat + '\') \n' +
'from job_history \n' +
'where employee_id = :employee_id',
{
employee_id: emp.id
},
function(err, results) {
if (err) {
cb(err);
return;
}
emp.jobHistory = [];
results.rows.forEach(function(row) {
var job = {};
job.id = row[0];
job.departmentId = row[1];
job.startDate = row[2];
job.endDate = row[3];
emp.jobHistory.push(job);
});
cb();
}
);
},
function(err) {
if (err) throw err;
callback(null, JSON.stringify(department));
connection.release(function(err) {
if (err) {
console.error(err);
}
});
}
);
}
);
}
Output
When called with the department ID set to 10, the function returns the serialized JSON that matches the goal 100%.
{
"id": 10,
"name": "Administration",
"location": {
"id": 1700,
"streetAddress": "2004 Charade Rd",
"postalCode": "98199",
"country": {
"id": "US",
"name": "United States of America",
"regionId": 2
}
},
"manager": {
"id": 200,
"name": "Jennifer Whalen",
"salary": 4400,
"job": {
"id": "AD_ASST",
"title": "Administration Assistant",
"minSalary": 3000,
"maxSalary": 6000
}
},
"employees": [
{
"id": 200,
"name": "Jennifer Whalen",
"isSenior": true,
"commissionPct": null,
"jobHistory": [
{
"id": "AD_ASST",
"departmentId": 90,
"startDate": "17-SEP-1995",
"endDate": "17-JUN-2001"
},
{
"id": "AC_ACCOUNT",
"departmentId": 90,
"startDate": "01-JUL-2002",
"endDate": "31-DEC-2006"
}
]
}
]
}
So we got the output we were after, but what about performance? Let's explore the test results...
Test Results
When I finished the solutions for PL/JSON and APEX_JSON
, I wanted to see if one was faster than the other. I ran a very simple test: I generated the JSON for all 27 departments in the HR schema 100 times in a loop (2,700 invocations of the solution). APEX_JSON
finished in around 3.5 seconds while PL/JSON took 17 seconds. How long did it take the Node.js solution from above to do this? 136 seconds! What??? Node.js must be slow, right? Well, not exactly...
You may have noticed that on Line 5, I used the base driver class to get a connection to the database. If I was just doing this once the code would be fine as is, but 2,700 times? That's not good. In fact, that's really bad! But what's a Node.js developer to do?
Using a Connection Pool
The Node.js driver has supported connection pools from the beginning. The idea is that, rather than incur the cost of making a connection to the database each time we need one, we'll just grab a connection from a pool of connections that have already been established and release it back to the pool when we're done with it. Best of all, this is really simple!
Here's a new module that I'll use to create, store, and fetch the connection pool:
var oracledb = require('oracledb');
var connectionPool;
//createPool is invoked in a separate test file (not shown)
function createPool(callback) {
oracledb.createPool(
{
user: 'hr',
password: 'welcome',
connectString: 'server/XE',
poolMin: 2,
poolMax: 20,
poolIncrement: 2,
poolTimeout: 120
},
function(err, pool) {
if (err) throw err;
connectionPool = pool;
callback();
}
);
}
module.exports.createPool = createPool;
function getPool() {
return connectionPool;
}
module.exports.getPool = getPool;
With that in place, I can update the test code to open the connection pool prior to starting the test. Then I just need to update the solution to use the connection pool:
var pool = require(__dirname + '/pool');
var async = require('async');
var dateFormat = 'DD-MON-YYYY';
function getDepartment(departmentId, callback) {
pool.getPool().getConnection(
function(err, connection) {
if (err) throw err;
connection.execute(
'select department_id, \n' +
...
That's it! I just swapped out the driver module for the pool module and used it to get a connection instead. How much did using the connection pool help? With that one simple change, the test completed in 21.5 seconds! Wow, that's just a little longer than it took PL/JSON. So Node.js is still slow, right? Well, not exactly...
Solution 2: Optimizing for Node.js
Remember when I said that the solution mimicked the PL/SQL code? There are two major problems with doing this in Node.js:
- Excessive round trips: Each query we're executing is a round trip to the database. In PL/SQL, this is just a context switch from the PL/SQL to the SQL engine and back. I'm not saying we shouldn't minimize context switches in PL/SQL, we should, but in Node.js this is a round trip across the network! By using the database to do some joins we can reduce the number of queries from 7 to 3.
- Sequential execution: The way the PL/SQL solutions were written, a query was executed, the results were processed, and then the next query was executed. This sequence was repeated until all work was complete. It would have been nice to be able to do some of this work in parallel. While Oracle does have some options for doing work in parallel, such as Parallel Execution and
DBMS_PARALLEL_EXECUTE
, PL/SQL is, for the most part, a single threaded environment. I could probably have worked some magic to execute the queries and process the results in parallel, perhaps via scheduled jobs or Advanced Queues, but it would have been difficult using PL/SQL.However, this is an area where Node.js shines! Doing work in parallel is quite easy with the the async module. In the following solution, I use async's parallel method to fire off three functions at the same time: the first builds the basic department object, the second builds the employees array, and the last builds thejobHistory
array. The final function, which is fired when all the others have completed, puts the results of the first three functions together into a single object before returning the JSON.
Here's the solution optimized for Node.js:
var pool = require(__dirname + '/pool');
var async = require('async');
var dateFormat = 'DD-MON-YYYY';
function getDepartment(departmentId, callback) {
var department = {};
var employees = [];
var empMap = {};
var jobHistory = [];
pool.getPool().getConnection(
function(err, connection) {
if (err) {
throw err;
}
async.parallel(
[
function(callback) {
connection.execute(
'select dept.department_id, \n' +
' dept.department_name, \n' +
' loc.location_id, \n' +
' loc.street_address, \n' +
' loc.postal_code, \n' +
' ctry.country_id, \n' +
' ctry.country_name, \n' +
' ctry.region_id, \n' +
' mgr.employee_id, \n' +
' mgr.first_name || \' \' || mgr.last_name, \n' +
' mgr.salary, \n' +
' mgr_job.job_id, \n' +
' mgr_job.job_title, \n' +
' mgr_job.min_salary, \n' +
' mgr_job.max_salary \n' +
'from departments dept \n' +
'join locations loc \n' +
' on dept.location_id = loc.location_id \n' +
'join countries ctry \n' +
' on loc.country_id = ctry.country_id \n' +
'left join employees mgr \n' +
' on dept.manager_id = mgr.employee_id \n' +
'left join jobs mgr_job \n ' +
' on mgr.job_id = mgr_job.job_id \n' +
'where dept.department_id = :department_id',
{
department_id: departmentId
},
function(err, results) {
var deptRow;
if (err) {
callback(err);
return;
}
deptRow = results.rows[0];
department.id = deptRow[0];
department.name = deptRow[1];
department.location = {};
department.location.id = deptRow[2];
department.location.streetAddress = deptRow[3];
department.location.postalCode = deptRow[4];
department.location.country = {};
department.location.country.id = deptRow[5];
department.location.country.name = deptRow[6];
department.location.country.regionId = deptRow[7];
department.manager = {};
if (deptRow[8]) {
department.manager.id = deptRow[8];
department.manager.name = deptRow[9];
department.manager.salary = deptRow[10];
department.manager.job = {};
department.manager.job.id = deptRow[11];
department.manager.job.title = deptRow[12];
department.manager.job.minSalary = deptRow[13];
department.manager.job.maxSalary = deptRow[14];
}
callback(null);
}
);
},
function(callback) {
connection.execute(
'select employee_id, \n' +
' first_name || \' \' || last_name, \n' +
' case when hire_date < to_date(\'01-01-2005\', \'DD-MM-YYYY\') then 1 else 0 end is_senior, ' +
' commission_pct \n' +
'from employees \n' +
'where department_id = :department_id',
{
department_id: departmentId
},
function(err, results) {
var empRows;
if (err) {
callback(err);
return;
}
empRows = results.rows;
empRows.forEach(function(empRow) {
var emp = {};
emp.id = empRow[0];
emp.name = empRow[1];
emp.isSenior = empRow[2] === 1; //conversion of 1 or 0 to Boolean
emp.commissionPct = empRow[3];
emp.jobHistory = [];
employees.push(emp);
empMap[emp.id] = emp;
});
callback(null);
}
);
},
function(callback) {
connection.execute(
'select employee_id, \n' +
' job_id, \n' +
' department_id, \n' +
' to_char(start_date, \'' + dateFormat + '\'), \n' +
' to_char(end_date, \'' + dateFormat + '\') \n' +
'from job_history \n' +
'where employee_id in ( \n' +
' select employee_id \n ' +
' from employees \n' +
' where department_id = :department_id \n' +
')',
{
department_id: departmentId
},
function(err, results) {
var jobRows;
if (err) {
callback(err);
return;
}
jobRows = results.rows;
jobRows.forEach(function(jobRow) {
var job = {};
job.employeeId = jobRow[0];
job.id = jobRow[1];
job.departmentId = jobRow[2];
job.startDate = jobRow[3];
job.endDate = jobRow[4];
jobHistory.push(job);
});
callback(null);
}
);
}
],
function(err, results) {
if (err) throw err;
department.employees = employees;
jobHistory.forEach(function(job) {
empMap[job.employeeId].jobHistory.push(job);
delete job.employeeId;
});
connection.release(function(err) {
if (err) {
console.error(err);
}
callback(null, department);
});
}
);
}
);
}
module.exports.getDepartment = getDepartment;
How fast did that solution finish the test? 7.8 seconds! Not too shabby! That's faster than the PL/JSON solution but not quite as fast as APEX_JSON
. But could we optimize even further?
Client Result Caching
Because I was running my tests on Oracle XE, I wasn't able to do a final optimization that would have been possible with the Enterprise Edition of the database: Client Result Caching. With Client Result Caching, Oracle can automatically maintain a cache of the data on the server where Node.js is running. This could have eliminated some round trips and data having to move across the network. I'll revisit this feature in a future post where we can explore it in detail.
Published at DZone with permission of Dan McGhan, DZone MVB. See the original article here.
Opinions expressed by DZone contributors are their own.
Comments