001/* 002 * (C) Copyright 2016-2020 Nuxeo (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Florent Guillaume 018 */ 019package org.nuxeo.ecm.directory; 020 021import static java.nio.charset.StandardCharsets.UTF_8; 022 023import java.io.IOException; 024import java.io.InputStream; 025import java.io.InputStreamReader; 026import java.net.URL; 027import java.net.URLConnection; 028import java.sql.Timestamp; 029import java.util.ArrayList; 030import java.util.Calendar; 031import java.util.GregorianCalendar; 032import java.util.HashMap; 033import java.util.List; 034import java.util.Map; 035import java.util.function.BiConsumer; 036import java.util.function.Consumer; 037 038import org.apache.commons.csv.CSVFormat; 039import org.apache.commons.csv.CSVParser; 040import org.apache.commons.csv.CSVRecord; 041import org.apache.commons.lang3.StringUtils; 042import org.apache.logging.log4j.LogManager; 043import org.apache.logging.log4j.Logger; 044import org.nuxeo.ecm.core.api.Blob; 045import org.nuxeo.ecm.core.api.Blobs; 046import org.nuxeo.ecm.core.schema.types.Field; 047import org.nuxeo.ecm.core.schema.types.Schema; 048import org.nuxeo.ecm.core.schema.types.Type; 049import org.nuxeo.ecm.core.schema.types.primitives.DateType; 050import org.nuxeo.runtime.api.Framework; 051 052/** 053 * Helper to load data from a CSV file. 054 * <p> 055 * The actual consumer of rows is a parameter passed by the caller. 056 * 057 * @since 8.4 058 */ 059public class DirectoryCSVLoader { 060 061 private static final Logger log = LogManager.getLogger(DirectoryCSVLoader.class); 062 063 /** 064 * The special CSV value ({@value}) used to denote that a {@code null} should be used for a value. 065 */ 066 public static final String CSV_NULL_MARKER = "__NULL__"; 067 068 private DirectoryCSVLoader() { 069 } 070 071 /** 072 * Loads the CSV data file based on the provided schema, and creates the corresponding entries using the provided 073 * loader. 074 * 075 * @param dataFileName the file name containing CSV data 076 * @param delimiter the CSV column separator 077 * @param schema the data schema 078 * @param loader the actual consumer of loaded rows 079 * @since 8.4 080 * @see #loadData(Blob, char, Schema, Consumer) 081 */ 082 public static void loadData(String dataFileName, char delimiter, Schema schema, 083 Consumer<Map<String, Object>> loader) { 084 Blob blob = createBlob(dataFileName); 085 loadData(blob, delimiter, schema, loader); 086 } 087 088 protected static Blob createBlob(String dataFileName) { 089 try (InputStream in = getResource(dataFileName)) { 090 return Blobs.createBlob(in, "text/csv"); 091 } catch (IOException e) { 092 throw new DirectoryException("Read error while creating blob from data file: " + dataFileName, e); 093 } 094 } 095 096 /** 097 * Loads the CSV data file based on the provided schema, and creates the corresponding entries using the provided 098 * loader. 099 * 100 * @param dataBlob the blob containing CSV data 101 * @param delimiter the CSV column separator 102 * @param schema the data schema 103 * @param loader the actual consumer of loaded rows 104 * @since 11.1 105 */ 106 public static void loadData(Blob dataBlob, char delimiter, Schema schema, Consumer<Map<String, Object>> loader) { 107 BiConsumer<Map<String, Object>, Integer> loaderWithLineno = toLoaderEnrichedOnError(loader); 108 String dataFileName = dataBlob.getFilename(); 109 try (InputStream in = dataBlob.getStream(); 110 CSVParser csvParser = new CSVParser(new InputStreamReader(in, UTF_8), 111 CSVFormat.DEFAULT.withDelimiter(delimiter).withHeader())) { 112 113 Map<String, Integer> header = csvParser.getHeaderMap(); 114 115 List<Field> fields = new ArrayList<>(); 116 for (String columnName : header.keySet()) { 117 Field field = schema.getField(columnName.trim()); 118 if (field == null) { 119 throw new DirectoryException("Column not found: " + columnName + " in schema: " + schema.getName()); 120 } 121 fields.add(field); 122 } 123 124 int lineno = 1; // header was first line 125 for (CSVRecord record : csvParser) { 126 lineno++; 127 if (record.size() == 0 || record.size() == 1 && StringUtils.isBlank(record.get(0))) { 128 // NXP-2538: allow columns with only one value but skip empty lines 129 continue; 130 } 131 if (!record.isConsistent()) { 132 log.error("Invalid column count while reading CSV file: {}, line: {}", dataFileName, lineno); 133 continue; 134 } 135 136 Map<String, Object> map = new HashMap<>(); 137 for (int i = 0; i < header.size(); i++) { 138 Field field = fields.get(i); 139 String value = record.get(i); 140 Object v = CSV_NULL_MARKER.equals(value) ? null : decode(field, value); 141 map.put(field.getName().getPrefixedName(), v); 142 } 143 loaderWithLineno.accept(map, lineno); 144 } 145 } catch (IOException e) { 146 throw new DirectoryException("Read error while reading data file: " + dataFileName, e); 147 } 148 } 149 150 protected static BiConsumer<Map<String, Object>, Integer> toLoaderEnrichedOnError( 151 Consumer<Map<String, Object>> loader) { 152 return (map, lineno) -> { 153 try { 154 loader.accept(map); 155 } catch (DirectoryException e) { 156 e.addInfo("At line: " + lineno); 157 throw e; 158 } 159 }; 160 } 161 162 protected static Object decode(Field field, String value) { 163 Type type = field.getType(); 164 if (type instanceof DateType) { 165 // compat with earlier code, interpret in the local timezone and not UTC 166 Calendar cal = new GregorianCalendar(); 167 cal.setTime(Timestamp.valueOf(value)); 168 return cal; 169 } else { 170 return type.decode(value); 171 } 172 } 173 174 protected static InputStream getResource(String name) { 175 InputStream in = open(DirectoryCSVLoader.class.getClassLoader().getResource(name)); 176 if (in == null) { 177 in = open(Framework.getResourceLoader().getResource(name)); 178 if (in == null) { 179 throw new DirectoryException("Data file not found: " + name); 180 } 181 } 182 return in; 183 } 184 185 /** 186 * Gets the {@link InputStream} from a {@link URL}, avoiding JAR caches. 187 * 188 * @since 11.1 189 */ 190 protected static InputStream open(URL url) { 191 if (url == null) { 192 return null; 193 } 194 URLConnection con; 195 try { 196 con = url.openConnection(); 197 } catch (IOException e) { 198 return null; 199 } 200 // avoid using caches, as hot-reload may change underlying JARs 201 con.setUseCaches(false); 202 try { 203 return con.getInputStream(); 204 } catch (IOException e) { 205 return null; 206 } 207 } 208 209}