001/*
002 * (C) Copyright 2006-2014 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Nuxeo - initial API and implementation
018 *
019 */
020
021package org.nuxeo.ecm.platform.filemanager.service.extension;
022
023import java.io.File;
024import java.io.IOException;
025import java.io.InputStreamReader;
026import java.io.Reader;
027import java.io.Serializable;
028import java.text.ParseException;
029import java.text.SimpleDateFormat;
030import java.util.Date;
031import java.util.HashMap;
032import java.util.Map;
033import java.util.zip.ZipEntry;
034import java.util.zip.ZipException;
035import java.util.zip.ZipFile;
036
037import org.apache.commons.csv.CSVFormat;
038import org.apache.commons.csv.CSVParser;
039import org.apache.commons.csv.CSVRecord;
040import org.apache.commons.logging.Log;
041import org.apache.commons.logging.LogFactory;
042import org.nuxeo.common.utils.IdUtils;
043import org.nuxeo.common.utils.Path;
044import org.nuxeo.ecm.core.api.Blob;
045import org.nuxeo.ecm.core.api.Blobs;
046import org.nuxeo.ecm.core.api.CloseableFile;
047import org.nuxeo.ecm.core.api.CoreSession;
048import org.nuxeo.ecm.core.api.DocumentModel;
049import org.nuxeo.ecm.core.api.NuxeoException;
050import org.nuxeo.ecm.core.api.PathRef;
051import org.nuxeo.ecm.core.schema.DocumentType;
052import org.nuxeo.ecm.core.schema.TypeConstants;
053import org.nuxeo.ecm.core.schema.types.Field;
054import org.nuxeo.ecm.core.schema.types.SimpleTypeImpl;
055import org.nuxeo.ecm.core.schema.types.Type;
056import org.nuxeo.ecm.core.schema.types.primitives.DateType;
057import org.nuxeo.ecm.core.schema.types.primitives.IntegerType;
058import org.nuxeo.ecm.core.schema.types.primitives.LongType;
059import org.nuxeo.ecm.core.schema.types.primitives.StringType;
060import org.nuxeo.ecm.platform.filemanager.api.FileImporterContext;
061
062public class CSVZipImporter extends AbstractFileImporter {
063
064    private static final long serialVersionUID = 1L;
065
066    private static final String MARKER = "meta-data.csv";
067
068    private static final Log log = LogFactory.getLog(CSVZipImporter.class);
069
070    public static ZipFile getArchiveFileIfValid(File file) throws IOException {
071        ZipFile zip;
072
073        try {
074            zip = new ZipFile(file);
075        } catch (ZipException e) {
076            log.debug("file is not a zipfile ! ", e);
077            return null;
078        } catch (IOException e) {
079            log.debug("can not open zipfile ! ", e);
080            return null;
081        }
082
083        ZipEntry marker = zip.getEntry(MARKER);
084
085        if (marker == null) {
086            zip.close();
087            return null;
088        } else {
089            return zip;
090        }
091    }
092
093    @Override
094    public boolean isOneToMany() {
095        return true;
096    }
097
098    @Override
099    public DocumentModel createOrUpdate(FileImporterContext context) throws IOException {
100        CoreSession session = context.getSession();
101        Blob blob = context.getBlob();
102        ZipFile zip;
103        try (CloseableFile source = blob.getCloseableFile()) {
104            zip = getArchiveFileIfValid(source.getFile());
105            if (zip == null) {
106                return null;
107            }
108
109            String parentPath = context.getParentPath();
110            DocumentModel container = session.getDocument(new PathRef(parentPath));
111
112            ZipEntry index = zip.getEntry(MARKER);
113            try (Reader reader = new InputStreamReader(zip.getInputStream(index));
114                 CSVParser csvParser = new CSVParser(reader, CSVFormat.DEFAULT.withHeader());) {
115
116                Map<String, Integer> header = csvParser.getHeaderMap();
117                for (CSVRecord csvRecord : csvParser) {
118                    String type = null;
119                    String id = null;
120                    Map<String, String> stringValues = new HashMap<>();
121                    for (String headerValue : header.keySet()) {
122                        String lineValue = csvRecord.get(headerValue);
123                        if ("type".equalsIgnoreCase(headerValue)) {
124                            type = lineValue;
125                        } else if ("id".equalsIgnoreCase(headerValue)) {
126                            id = lineValue;
127                        } else {
128                            stringValues.put(headerValue, lineValue);
129                        }
130                    }
131
132                    boolean updateDoc = false;
133                    // get doc for update
134                    DocumentModel targetDoc = null;
135                    if (id != null) {
136                        // update ?
137                        String targetPath = new Path(parentPath).append(id).toString();
138                        if (session.exists(new PathRef(targetPath))) {
139                            targetDoc = session.getDocument(new PathRef(targetPath));
140                            updateDoc = true;
141                        }
142                    }
143
144                    // create doc if needed
145                    if (targetDoc == null) {
146                        if (type == null) {
147                            log.error("Can not create doc without a type, skipping line");
148                            continue;
149                        }
150
151                        if (id == null) {
152                            id = IdUtils.generateStringId();
153                        }
154                        targetDoc = session.createDocumentModel(parentPath, id, type);
155                    }
156
157                    // update doc properties
158                    DocumentType targetDocType = targetDoc.getDocumentType();
159                    for (Map.Entry<String, String> entry : stringValues.entrySet()) {
160                        String fname = entry.getKey();
161                        String stringValue = entry.getValue();
162                        Field field = null;
163                        boolean usePrefix = false;
164                        String schemaName = null;
165                        String fieldName = null;
166
167                        if (fname.contains(":")) {
168                            if (targetDocType.hasField(fname)) {
169                                field = targetDocType.getField(fname);
170                                usePrefix = true;
171                            }
172                        } else if (fname.contains(".")) {
173                            String[] parts = fname.split("\\.");
174                            schemaName = parts[0];
175                            fieldName = parts[1];
176                            if (targetDocType.hasSchema(schemaName)) {
177                                field = targetDocType.getField(fieldName);
178                                usePrefix = false;
179                            }
180                        } else {
181                            if (targetDocType.hasField(fname)) {
182                                field = targetDocType.getField(fname);
183                                usePrefix = false;
184                                schemaName = field.getDeclaringType().getSchemaName();
185                            }
186                        }
187
188                        if (field != null) {
189                            Serializable fieldValue = getFieldValue(field, stringValue, zip);
190
191                            if (fieldValue != null) {
192                                if (usePrefix) {
193                                    targetDoc.setPropertyValue(fname, fieldValue);
194                                } else {
195                                    targetDoc.setProperty(schemaName, fieldName, fieldValue);
196                                }
197                            }
198                        }
199                    }
200                    if (updateDoc) {
201                        session.saveDocument(targetDoc);
202                    } else {
203                        session.createDocument(targetDoc);
204                    }
205                }
206            }
207            return container;
208        }
209    }
210
211    protected Serializable getFieldValue(Field field, String stringValue, ZipFile zip) {
212        Serializable fieldValue = null;
213        Type type = field.getType();
214        if (type.isSimpleType()) {
215            if (type instanceof SimpleTypeImpl) {
216                // consider super type instead
217                type = type.getSuperType();
218            }
219            if (type instanceof StringType) {
220                fieldValue = stringValue;
221            } else if (type instanceof IntegerType) {
222                fieldValue = Integer.parseInt(stringValue);
223            } else if (type instanceof LongType) {
224                fieldValue = Long.parseLong(stringValue);
225            } else if (type instanceof DateType) {
226                try {
227                    Date date;
228                    if (stringValue.length() == 10) {
229                        date = new SimpleDateFormat("dd/MM/yyyy").parse(stringValue);
230                    } else if (stringValue.length() == 8) {
231                        date = new SimpleDateFormat("dd/MM/yy").parse(stringValue);
232                    } else {
233                        log.warn("Unknown date format :" + stringValue);
234                        return null;
235                    }
236                    fieldValue = date;
237                } catch (ParseException e) {
238                    log.error("Error during date parsing", e);
239                }
240            } else {
241                log.warn(String.format("Unsupported field type '%s'", type));
242                return null;
243            }
244        } else if (type.isComplexType() && TypeConstants.CONTENT.equals(field.getName().getLocalName())) {
245            ZipEntry blobIndex = zip.getEntry(stringValue);
246            if (blobIndex != null) {
247                Blob blob;
248                try {
249                    blob = Blobs.createBlob(zip.getInputStream(blobIndex));
250                } catch (IOException e) {
251                    throw new NuxeoException(e);
252                }
253                blob.setFilename(stringValue);
254                fieldValue = (Serializable) blob;
255            }
256        }
257
258        return fieldValue;
259    }
260
261}