001/*
002 * (C) Copyright 2016 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Florent Guillaume
018 */
019package org.nuxeo.ecm.directory;
020
021import java.io.IOException;
022import java.io.InputStream;
023import java.io.InputStreamReader;
024import java.sql.Timestamp;
025import java.util.ArrayList;
026import java.util.Calendar;
027import java.util.GregorianCalendar;
028import java.util.HashMap;
029import java.util.List;
030import java.util.Map;
031import java.util.function.Consumer;
032
033import org.apache.commons.csv.CSVFormat;
034import org.apache.commons.csv.CSVParser;
035import org.apache.commons.csv.CSVRecord;
036import org.apache.commons.lang.StringUtils;
037import org.apache.commons.logging.Log;
038import org.apache.commons.logging.LogFactory;
039import org.nuxeo.ecm.core.schema.types.Field;
040import org.nuxeo.ecm.core.schema.types.Schema;
041import org.nuxeo.ecm.core.schema.types.Type;
042import org.nuxeo.ecm.core.schema.types.primitives.DateType;
043import org.nuxeo.runtime.api.Framework;
044
045/**
046 * Helper to load data from a CSV file.
047 * <p>
048 * The actual consumer of rows is a parameter passed by the caller.
049 *
050 * @since 8.4
051 */
052public class DirectoryCSVLoader {
053
054    private static final Log log = LogFactory.getLog(DirectoryCSVLoader.class);
055
056    /**
057     * The special CSV value ({@value}) used to denote that a {@code null} should be used for a value.
058     */
059    public static final String CSV_NULL_MARKER = "__NULL__";
060
061    private DirectoryCSVLoader() {
062    }
063
064    /**
065     * Loads the CSV data file based on the provided schema, and creates the corresponding entries using the provided
066     * loader.
067     *
068     * @param dataFileName the file name containing CSV data
069     * @param delimiter the CSV column separator
070     * @param schema the data schema
071     * @param loader the actual consumer of loaded rows
072     * @since 8.4
073     */
074    public static void loadData(String dataFileName, char delimiter, Schema schema,
075            Consumer<Map<String, Object>> loader) throws DirectoryException {
076        try (InputStream in = getResource(dataFileName); //
077                CSVParser csvParser = new CSVParser(new InputStreamReader(in, "UTF-8"),
078                        CSVFormat.DEFAULT.withDelimiter(delimiter).withHeader())) {
079            Map<String, Integer> header = csvParser.getHeaderMap();
080
081            List<Field> fields = new ArrayList<>();
082            for (String columnName : header.keySet()) {
083                Field field = schema.getField(columnName.trim());
084                if (field == null) {
085                    throw new DirectoryException("Column not found: " + columnName + " in schema: " + schema.getName());
086                }
087                fields.add(field);
088            }
089
090            int lineno = 1; // header was first line
091            for (CSVRecord record : csvParser) {
092                lineno++;
093                if (record.size() == 0 || record.size() == 1 && StringUtils.isBlank(record.get(0))) {
094                    // NXP-2538: allow columns with only one value but skip empty lines
095                    continue;
096                }
097                if (!record.isConsistent()) {
098                    log.error("Invalid column count while reading CSV file: " + dataFileName + ", line: " + lineno
099                            + ", values: " + record);
100                    continue;
101                }
102
103                Map<String, Object> map = new HashMap<String, Object>();
104                for (int i = 0; i < header.size(); i++) {
105                    Field field = fields.get(i);
106                    String value = record.get(i);
107                    Object v = CSV_NULL_MARKER.equals(value) ? null : decode(field, value);
108                    map.put(field.getName().getPrefixedName(), v);
109                }
110                loader.accept(map);
111            }
112        } catch (IOException e) {
113            throw new DirectoryException("Read error while reading data file: " + dataFileName, e);
114        }
115    }
116
117    protected static Object decode(Field field, String value) {
118        Type type = field.getType();
119        if (type instanceof DateType) {
120            // compat with earlier code, interpret in the local timezone and not UTC
121            Calendar cal = new GregorianCalendar();
122            cal.setTime(Timestamp.valueOf(value));
123            return cal;
124        } else {
125            return type.decode(value);
126        }
127    }
128
129    @SuppressWarnings("resource")
130    protected static InputStream getResource(String name) {
131        InputStream in = DirectoryCSVLoader.class.getClassLoader().getResourceAsStream(name);
132        if (in == null) {
133            in = Framework.getResourceLoader().getResourceAsStream(name);
134            if (in == null) {
135                throw new DirectoryException("Data file not found: " + name);
136            }
137        }
138        return in;
139    }
140
141}