001/* 002 * (C) Copyright 2016 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Florent Guillaume 018 */ 019package org.nuxeo.ecm.directory; 020 021import java.io.IOException; 022import java.io.InputStream; 023import java.io.InputStreamReader; 024import java.sql.Timestamp; 025import java.util.ArrayList; 026import java.util.Calendar; 027import java.util.GregorianCalendar; 028import java.util.HashMap; 029import java.util.List; 030import java.util.Map; 031import java.util.function.Consumer; 032 033import org.apache.commons.csv.CSVFormat; 034import org.apache.commons.csv.CSVParser; 035import org.apache.commons.csv.CSVRecord; 036import org.apache.commons.lang.StringUtils; 037import org.apache.commons.logging.Log; 038import org.apache.commons.logging.LogFactory; 039import org.nuxeo.ecm.core.schema.types.Field; 040import org.nuxeo.ecm.core.schema.types.Schema; 041import org.nuxeo.ecm.core.schema.types.Type; 042import org.nuxeo.ecm.core.schema.types.primitives.DateType; 043import org.nuxeo.runtime.api.Framework; 044 045/** 046 * Helper to load data from a CSV file. 047 * <p> 048 * The actual consumer of rows is a parameter passed by the caller. 049 * 050 * @since 8.4 051 */ 052public class DirectoryCSVLoader { 053 054 private static final Log log = LogFactory.getLog(DirectoryCSVLoader.class); 055 056 /** 057 * The special CSV value ({@value}) used to denote that a {@code null} should be used for a value. 058 */ 059 public static final String CSV_NULL_MARKER = "__NULL__"; 060 061 private DirectoryCSVLoader() { 062 } 063 064 /** 065 * Loads the CSV data file based on the provided schema, and creates the corresponding entries using the provided 066 * loader. 067 * 068 * @param dataFileName the file name containing CSV data 069 * @param delimiter the CSV column separator 070 * @param schema the data schema 071 * @param loader the actual consumer of loaded rows 072 * @since 8.4 073 */ 074 public static void loadData(String dataFileName, char delimiter, Schema schema, 075 Consumer<Map<String, Object>> loader) throws DirectoryException { 076 try (InputStream in = getResource(dataFileName); // 077 CSVParser csvParser = new CSVParser(new InputStreamReader(in, "UTF-8"), 078 CSVFormat.DEFAULT.withDelimiter(delimiter).withHeader())) { 079 Map<String, Integer> header = csvParser.getHeaderMap(); 080 081 List<Field> fields = new ArrayList<>(); 082 for (String columnName : header.keySet()) { 083 Field field = schema.getField(columnName.trim()); 084 if (field == null) { 085 throw new DirectoryException("Column not found: " + columnName + " in schema: " + schema.getName()); 086 } 087 fields.add(field); 088 } 089 090 int lineno = 1; // header was first line 091 for (CSVRecord record : csvParser) { 092 lineno++; 093 if (record.size() == 0 || record.size() == 1 && StringUtils.isBlank(record.get(0))) { 094 // NXP-2538: allow columns with only one value but skip empty lines 095 continue; 096 } 097 if (!record.isConsistent()) { 098 log.error("Invalid column count while reading CSV file: " + dataFileName + ", line: " + lineno 099 + ", values: " + record); 100 continue; 101 } 102 103 Map<String, Object> map = new HashMap<String, Object>(); 104 for (int i = 0; i < header.size(); i++) { 105 Field field = fields.get(i); 106 String value = record.get(i); 107 Object v = CSV_NULL_MARKER.equals(value) ? null : decode(field, value); 108 map.put(field.getName().getPrefixedName(), v); 109 } 110 loader.accept(map); 111 } 112 } catch (IOException e) { 113 throw new DirectoryException("Read error while reading data file: " + dataFileName, e); 114 } 115 } 116 117 protected static Object decode(Field field, String value) { 118 Type type = field.getType(); 119 if (type instanceof DateType) { 120 // compat with earlier code, interpret in the local timezone and not UTC 121 Calendar cal = new GregorianCalendar(); 122 cal.setTime(Timestamp.valueOf(value)); 123 return cal; 124 } else { 125 return type.decode(value); 126 } 127 } 128 129 @SuppressWarnings("resource") 130 protected static InputStream getResource(String name) { 131 InputStream in = DirectoryCSVLoader.class.getClassLoader().getResourceAsStream(name); 132 if (in == null) { 133 in = Framework.getResourceLoader().getResourceAsStream(name); 134 if (in == null) { 135 throw new DirectoryException("Data file not found: " + name); 136 } 137 } 138 return in; 139 } 140 141}