001/* 002 * (C) Copyright 2006-2014 Nuxeo SA (http://nuxeo.com/) and others. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 * 016 * Contributors: 017 * Nuxeo - initial API and implementation 018 * 019 */ 020 021package org.nuxeo.ecm.platform.filemanager.service.extension; 022 023import java.io.File; 024import java.io.IOException; 025import java.io.InputStreamReader; 026import java.io.Reader; 027import java.io.Serializable; 028import java.text.ParseException; 029import java.text.SimpleDateFormat; 030import java.util.Date; 031import java.util.HashMap; 032import java.util.Map; 033import java.util.zip.ZipEntry; 034import java.util.zip.ZipException; 035import java.util.zip.ZipFile; 036 037import org.apache.commons.csv.CSVFormat; 038import org.apache.commons.csv.CSVParser; 039import org.apache.commons.csv.CSVRecord; 040import org.apache.commons.logging.Log; 041import org.apache.commons.logging.LogFactory; 042import org.nuxeo.common.utils.IdUtils; 043import org.nuxeo.common.utils.Path; 044import org.nuxeo.ecm.core.api.Blob; 045import org.nuxeo.ecm.core.api.Blobs; 046import org.nuxeo.ecm.core.api.CloseableFile; 047import org.nuxeo.ecm.core.api.CoreSession; 048import org.nuxeo.ecm.core.api.DocumentModel; 049import org.nuxeo.ecm.core.api.NuxeoException; 050import org.nuxeo.ecm.core.api.PathRef; 051import org.nuxeo.ecm.core.schema.DocumentType; 052import org.nuxeo.ecm.core.schema.TypeConstants; 053import org.nuxeo.ecm.core.schema.types.Field; 054import org.nuxeo.ecm.core.schema.types.SimpleTypeImpl; 055import org.nuxeo.ecm.core.schema.types.Type; 056import org.nuxeo.ecm.core.schema.types.primitives.DateType; 057import org.nuxeo.ecm.core.schema.types.primitives.IntegerType; 058import org.nuxeo.ecm.core.schema.types.primitives.LongType; 059import org.nuxeo.ecm.core.schema.types.primitives.StringType; 060import org.nuxeo.ecm.platform.filemanager.api.FileImporterContext; 061 062public class CSVZipImporter extends AbstractFileImporter { 063 064 private static final long serialVersionUID = 1L; 065 066 private static final String MARKER = "meta-data.csv"; 067 068 private static final Log log = LogFactory.getLog(CSVZipImporter.class); 069 070 public static ZipFile getArchiveFileIfValid(File file) throws IOException { 071 ZipFile zip; 072 073 try { 074 zip = new ZipFile(file); 075 } catch (ZipException e) { 076 log.debug("file is not a zipfile ! ", e); 077 return null; 078 } catch (IOException e) { 079 log.debug("can not open zipfile ! ", e); 080 return null; 081 } 082 083 ZipEntry marker = zip.getEntry(MARKER); 084 085 if (marker == null) { 086 zip.close(); 087 return null; 088 } else { 089 return zip; 090 } 091 } 092 093 @Override 094 public boolean isOneToMany() { 095 return true; 096 } 097 098 @Override 099 public DocumentModel createOrUpdate(FileImporterContext context) throws IOException { 100 CoreSession session = context.getSession(); 101 Blob blob = context.getBlob(); 102 ZipFile zip; 103 try (CloseableFile source = blob.getCloseableFile()) { 104 zip = getArchiveFileIfValid(source.getFile()); 105 if (zip == null) { 106 return null; 107 } 108 109 String parentPath = context.getParentPath(); 110 DocumentModel container = session.getDocument(new PathRef(parentPath)); 111 112 ZipEntry index = zip.getEntry(MARKER); 113 try (Reader reader = new InputStreamReader(zip.getInputStream(index)); 114 CSVParser csvParser = new CSVParser(reader, CSVFormat.DEFAULT.withHeader());) { 115 116 Map<String, Integer> header = csvParser.getHeaderMap(); 117 for (CSVRecord csvRecord : csvParser) { 118 String type = null; 119 String id = null; 120 Map<String, String> stringValues = new HashMap<>(); 121 for (String headerValue : header.keySet()) { 122 String lineValue = csvRecord.get(headerValue); 123 if ("type".equalsIgnoreCase(headerValue)) { 124 type = lineValue; 125 } else if ("id".equalsIgnoreCase(headerValue)) { 126 id = lineValue; 127 } else { 128 stringValues.put(headerValue, lineValue); 129 } 130 } 131 132 boolean updateDoc = false; 133 // get doc for update 134 DocumentModel targetDoc = null; 135 if (id != null) { 136 // update ? 137 String targetPath = new Path(parentPath).append(id).toString(); 138 if (session.exists(new PathRef(targetPath))) { 139 targetDoc = session.getDocument(new PathRef(targetPath)); 140 updateDoc = true; 141 } 142 } 143 144 // create doc if needed 145 if (targetDoc == null) { 146 if (type == null) { 147 log.error("Can not create doc without a type, skipping line"); 148 continue; 149 } 150 151 if (id == null) { 152 id = IdUtils.generateStringId(); 153 } 154 targetDoc = session.createDocumentModel(parentPath, id, type); 155 } 156 157 // update doc properties 158 DocumentType targetDocType = targetDoc.getDocumentType(); 159 for (Map.Entry<String, String> entry : stringValues.entrySet()) { 160 String fname = entry.getKey(); 161 String stringValue = entry.getValue(); 162 Field field = null; 163 boolean usePrefix = false; 164 String schemaName = null; 165 String fieldName = null; 166 167 if (fname.contains(":")) { 168 if (targetDocType.hasField(fname)) { 169 field = targetDocType.getField(fname); 170 usePrefix = true; 171 } 172 } else if (fname.contains(".")) { 173 String[] parts = fname.split("\\."); 174 schemaName = parts[0]; 175 fieldName = parts[1]; 176 if (targetDocType.hasSchema(schemaName)) { 177 field = targetDocType.getField(fieldName); 178 usePrefix = false; 179 } 180 } else { 181 if (targetDocType.hasField(fname)) { 182 field = targetDocType.getField(fname); 183 usePrefix = false; 184 schemaName = field.getDeclaringType().getSchemaName(); 185 } 186 } 187 188 if (field != null) { 189 Serializable fieldValue = getFieldValue(field, stringValue, zip); 190 191 if (fieldValue != null) { 192 if (usePrefix) { 193 targetDoc.setPropertyValue(fname, fieldValue); 194 } else { 195 targetDoc.setProperty(schemaName, fieldName, fieldValue); 196 } 197 } 198 } 199 } 200 if (updateDoc) { 201 session.saveDocument(targetDoc); 202 } else { 203 session.createDocument(targetDoc); 204 } 205 } 206 } 207 return container; 208 } 209 } 210 211 protected Serializable getFieldValue(Field field, String stringValue, ZipFile zip) { 212 Serializable fieldValue = null; 213 Type type = field.getType(); 214 if (type.isSimpleType()) { 215 if (type instanceof SimpleTypeImpl) { 216 // consider super type instead 217 type = type.getSuperType(); 218 } 219 if (type instanceof StringType) { 220 fieldValue = stringValue; 221 } else if (type instanceof IntegerType) { 222 fieldValue = Integer.parseInt(stringValue); 223 } else if (type instanceof LongType) { 224 fieldValue = Long.parseLong(stringValue); 225 } else if (type instanceof DateType) { 226 try { 227 Date date; 228 if (stringValue.length() == 10) { 229 date = new SimpleDateFormat("dd/MM/yyyy").parse(stringValue); 230 } else if (stringValue.length() == 8) { 231 date = new SimpleDateFormat("dd/MM/yy").parse(stringValue); 232 } else { 233 log.warn("Unknown date format :" + stringValue); 234 return null; 235 } 236 fieldValue = date; 237 } catch (ParseException e) { 238 log.error("Error during date parsing", e); 239 } 240 } else { 241 log.warn(String.format("Unsupported field type '%s'", type)); 242 return null; 243 } 244 } else if (type.isComplexType() && TypeConstants.CONTENT.equals(field.getName().getLocalName())) { 245 ZipEntry blobIndex = zip.getEntry(stringValue); 246 if (blobIndex != null) { 247 Blob blob; 248 try { 249 blob = Blobs.createBlob(zip.getInputStream(blobIndex)); 250 } catch (IOException e) { 251 throw new NuxeoException(e); 252 } 253 blob.setFilename(stringValue); 254 fieldValue = (Serializable) blob; 255 } 256 } 257 258 return fieldValue; 259 } 260 261}