001/* 002 * (C) Copyright 2006-2014 Nuxeo SA (http://nuxeo.com/) and contributors. 003 * 004 * All rights reserved. This program and the accompanying materials 005 * are made available under the terms of the GNU Lesser General Public License 006 * (LGPL) version 2.1 which accompanies this distribution, and is available at 007 * http://www.gnu.org/licenses/lgpl-2.1.html 008 * 009 * This library is distributed in the hope that it will be useful, 010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 012 * Lesser General Public License for more details. 013 * 014 * Contributors: 015 * Nuxeo - initial API and implementation 016 * 017 */ 018 019package org.nuxeo.ecm.platform.filemanager.service.extension; 020 021import java.io.File; 022import java.io.IOException; 023import java.io.InputStreamReader; 024import java.io.Reader; 025import java.io.Serializable; 026import java.text.ParseException; 027import java.text.SimpleDateFormat; 028import java.util.Date; 029import java.util.HashMap; 030import java.util.Map; 031import java.util.zip.ZipEntry; 032import java.util.zip.ZipException; 033import java.util.zip.ZipFile; 034 035import org.apache.commons.csv.CSVFormat; 036import org.apache.commons.csv.CSVParser; 037import org.apache.commons.csv.CSVRecord; 038import org.apache.commons.io.IOUtils; 039import org.apache.commons.logging.Log; 040import org.apache.commons.logging.LogFactory; 041import org.nuxeo.common.utils.IdUtils; 042import org.nuxeo.common.utils.Path; 043import org.nuxeo.ecm.core.api.Blob; 044import org.nuxeo.ecm.core.api.Blobs; 045import org.nuxeo.ecm.core.api.CloseableFile; 046import org.nuxeo.ecm.core.api.CoreSession; 047import org.nuxeo.ecm.core.api.DocumentModel; 048import org.nuxeo.ecm.core.api.PathRef; 049import org.nuxeo.ecm.core.schema.DocumentType; 050import org.nuxeo.ecm.core.schema.TypeConstants; 051import org.nuxeo.ecm.core.schema.types.Field; 052import org.nuxeo.ecm.core.schema.types.SimpleTypeImpl; 053import org.nuxeo.ecm.core.schema.types.Type; 054import org.nuxeo.ecm.core.schema.types.primitives.DateType; 055import org.nuxeo.ecm.core.schema.types.primitives.IntegerType; 056import org.nuxeo.ecm.core.schema.types.primitives.LongType; 057import org.nuxeo.ecm.core.schema.types.primitives.StringType; 058import org.nuxeo.ecm.platform.types.TypeManager; 059 060public class CSVZipImporter extends AbstractFileImporter { 061 062 private static final long serialVersionUID = 1L; 063 064 private static final String MARKER = "meta-data.csv"; 065 066 private static final Log log = LogFactory.getLog(CSVZipImporter.class); 067 068 public static ZipFile getArchiveFileIfValid(File file) throws IOException { 069 ZipFile zip; 070 071 try { 072 zip = new ZipFile(file); 073 } catch (ZipException e) { 074 log.debug("file is not a zipfile ! ", e); 075 return null; 076 } catch (IOException e) { 077 log.debug("can not open zipfile ! ", e); 078 return null; 079 } 080 081 ZipEntry marker = zip.getEntry(MARKER); 082 083 if (marker == null) { 084 zip.close(); 085 return null; 086 } else { 087 return zip; 088 } 089 } 090 091 @Override 092 public DocumentModel create(CoreSession documentManager, Blob content, String path, boolean overwrite, 093 String filename, TypeManager typeService) throws IOException { 094 ZipFile zip = null; 095 try (CloseableFile source = content.getCloseableFile()) { 096 zip = getArchiveFileIfValid(source.getFile()); 097 if (zip == null) { 098 return null; 099 } 100 101 DocumentModel container = documentManager.getDocument(new PathRef(path)); 102 103 ZipEntry index = zip.getEntry(MARKER); 104 try (Reader reader = new InputStreamReader(zip.getInputStream(index)); 105 CSVParser csvParser = new CSVParser(reader, CSVFormat.DEFAULT.withHeader());) { 106 107 Map<String, Integer> header = csvParser.getHeaderMap(); 108 for (CSVRecord csvRecord : csvParser) { 109 String type = null; 110 String id = null; 111 Map<String, String> stringValues = new HashMap<>(); 112 for (String headerValue : header.keySet()) { 113 String lineValue = csvRecord.get(headerValue); 114 if ("type".equalsIgnoreCase(headerValue)) { 115 type = lineValue; 116 } else if ("id".equalsIgnoreCase(headerValue)) { 117 id = lineValue; 118 } else { 119 stringValues.put(headerValue, lineValue); 120 } 121 } 122 123 boolean updateDoc = false; 124 // get doc for update 125 DocumentModel targetDoc = null; 126 if (id != null) { 127 // update ? 128 String targetPath = new Path(path).append(id).toString(); 129 if (documentManager.exists(new PathRef(targetPath))) { 130 targetDoc = documentManager.getDocument(new PathRef(targetPath)); 131 updateDoc = true; 132 } 133 } 134 135 // create doc if needed 136 if (targetDoc == null) { 137 if (type == null) { 138 log.error("Can not create doc without a type, skipping line"); 139 continue; 140 } 141 142 if (id == null) { 143 id = IdUtils.generateStringId(); 144 } 145 targetDoc = documentManager.createDocumentModel(path, id, type); 146 } 147 148 // update doc properties 149 DocumentType targetDocType = targetDoc.getDocumentType(); 150 for (String fname : stringValues.keySet()) { 151 152 String stringValue = stringValues.get(fname); 153 Field field = null; 154 boolean usePrefix = false; 155 String schemaName = null; 156 String fieldName = null; 157 158 if (fname.contains(":")) { 159 if (targetDocType.hasField(fname)) { 160 field = targetDocType.getField(fname); 161 usePrefix = true; 162 } 163 } else if (fname.contains(".")) { 164 String[] parts = fname.split("\\."); 165 schemaName = parts[0]; 166 fieldName = parts[1]; 167 if (targetDocType.hasSchema(schemaName)) { 168 field = targetDocType.getField(fieldName); 169 usePrefix = false; 170 } 171 } else { 172 if (targetDocType.hasField(fname)) { 173 field = targetDocType.getField(fname); 174 usePrefix = false; 175 schemaName = field.getDeclaringType().getSchemaName(); 176 } 177 } 178 179 if (field != null) { 180 Serializable fieldValue = getFieldValue(field, stringValue, zip); 181 182 if (fieldValue != null) { 183 if (usePrefix) { 184 targetDoc.setPropertyValue(fname, fieldValue); 185 } else { 186 targetDoc.setProperty(schemaName, fieldName, fieldValue); 187 } 188 } 189 } 190 } 191 if (updateDoc) { 192 documentManager.saveDocument(targetDoc); 193 } else { 194 documentManager.createDocument(targetDoc); 195 } 196 } 197 } 198 return container; 199 } finally { 200 IOUtils.closeQuietly(zip); 201 } 202 } 203 204 protected Serializable getFieldValue(Field field, String stringValue, ZipFile zip) { 205 Serializable fieldValue = null; 206 Type type = field.getType(); 207 if (type.isSimpleType()) { 208 if (type instanceof SimpleTypeImpl) { 209 // consider super type instead 210 type = type.getSuperType(); 211 } 212 if (type instanceof StringType) { 213 fieldValue = stringValue; 214 } else if (type instanceof IntegerType) { 215 fieldValue = Integer.parseInt(stringValue); 216 } else if (type instanceof LongType) { 217 fieldValue = Long.parseLong(stringValue); 218 } else if (type instanceof DateType) { 219 try { 220 Date date; 221 if (stringValue.length() == 10) { 222 date = new SimpleDateFormat("dd/MM/yyyy").parse(stringValue); 223 } else if (stringValue.length() == 8) { 224 date = new SimpleDateFormat("dd/MM/yy").parse(stringValue); 225 } else { 226 log.warn("Unknown date format :" + stringValue); 227 return null; 228 } 229 fieldValue = date; 230 } catch (ParseException e) { 231 log.error("Error during date parsing", e); 232 } 233 } else { 234 log.warn(String.format("Unsupported field type '%s'", type)); 235 return null; 236 } 237 } else if (type.isComplexType()) { 238 if (TypeConstants.CONTENT.equals(field.getName().getLocalName())) { 239 ZipEntry blobIndex = zip.getEntry(stringValue); 240 if (blobIndex != null) { 241 Blob blob; 242 try { 243 blob = Blobs.createBlob(zip.getInputStream(blobIndex)); 244 } catch (IOException e) { 245 throw new RuntimeException(e); 246 } 247 blob.setFilename(stringValue); 248 fieldValue = (Serializable) blob; 249 } 250 } 251 } 252 253 return fieldValue; 254 } 255 256}