001/*
002 * (C) Copyright 2006-2009 Nuxeo SA (http://nuxeo.com/) and others.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 *
016 * Contributors:
017 *     Nuxeo - initial API and implementation
018 *
019 * $Id$
020 */
021
022package org.nuxeo.ecm.core.convert.plugins.text.extractors;
023
024import java.io.File;
025import java.io.FileInputStream;
026import java.io.FileOutputStream;
027import java.io.IOException;
028import java.io.InputStream;
029import java.io.OutputStream;
030import java.io.Serializable;
031import java.util.ArrayList;
032import java.util.List;
033import java.util.Map;
034
035import javax.mail.Address;
036import javax.mail.Message.RecipientType;
037import javax.mail.MessagingException;
038import javax.mail.Multipart;
039import javax.mail.Part;
040import javax.mail.Session;
041import javax.mail.internet.ContentType;
042import javax.mail.internet.MimeMessage;
043
044import org.apache.commons.logging.Log;
045import org.apache.commons.logging.LogFactory;
046import org.nuxeo.ecm.core.api.Blob;
047import org.nuxeo.ecm.core.api.Blobs;
048import org.nuxeo.ecm.core.api.blobholder.BlobHolder;
049import org.nuxeo.ecm.core.api.blobholder.SimpleBlobHolder;
050import org.nuxeo.ecm.core.convert.api.ConversionException;
051import org.nuxeo.ecm.core.convert.api.ConversionService;
052import org.nuxeo.ecm.core.convert.cache.SimpleCachableBlobHolder;
053import org.nuxeo.ecm.core.convert.extension.Converter;
054import org.nuxeo.ecm.core.convert.extension.ConverterDescriptor;
055import org.nuxeo.runtime.api.Framework;
056
057public class RFC822ToTextConverter implements Converter {
058
059    private static final Log log = LogFactory.getLog(RFC822ToTextConverter.class);
060
061    private static final String MESSAGE_RFC822_MIMETYPE = "message/rfc822";
062
063    private static final String TXT_MT = "text/plain";
064
065    protected ConverterDescriptor descriptor;
066
067    protected Blob extractTextFromMessage(Blob blob) {
068        if (blob == null) {
069            return null;
070        }
071        File f = null;
072        OutputStream fo = null;
073        try {
074            MimeMessage msg = new MimeMessage((Session) null, blob.getStream());
075            f = Framework.createTempFile("rfc822totext", ".txt");
076            fo = new FileOutputStream(f);
077            List<Part> parts = getAttachmentParts(msg);
078            writeInfo(fo, msg.getSubject());
079            writeInfo(fo, msg.getFrom());
080            writeInfo(fo, msg.getRecipients(RecipientType.TO));
081            writeInfo(fo, msg.getRecipients(RecipientType.CC));
082            for (Part part : parts) {
083                writeInfo(fo, part.getFileName());
084                writeInfo(fo, part.getDescription());
085                byte[] extracted = extractTextFromMessagePart(part);
086                if (extracted != null) {
087                    writeInfo(fo, extracted);
088                }
089            }
090            Blob outblob;
091            try (InputStream in = new FileInputStream(f)) {
092                outblob = Blobs.createBlob(in);
093            }
094            outblob.setMimeType(descriptor.getDestinationMimeType());
095            return outblob;
096        } catch (IOException | MessagingException e) {
097            log.error(e);
098        } finally {
099            if (fo != null) {
100                try {
101                    fo.close();
102                } catch (IOException e) {
103                    log.error(e);
104                }
105            }
106            if (f != null) {
107                f.delete();
108            }
109        }
110        return null;
111    }
112
113    protected static void writeInfo(OutputStream stream, Address address) {
114        if (address != null) {
115            try {
116                stream.write(address.toString().getBytes());
117                stream.write(" ".getBytes());
118            } catch (IOException e) {
119                log.error(e, e);
120            }
121        }
122    }
123
124    protected static void writeInfo(OutputStream stream, Address[] addresses) {
125        if (addresses != null) {
126            for (Address address : addresses) {
127                writeInfo(stream, address);
128            }
129        }
130    }
131
132    protected static void writeInfo(OutputStream stream, String info) {
133        if (info != null) {
134            try {
135                stream.write(info.getBytes());
136                stream.write(" ".getBytes());
137            } catch (IOException e) {
138                log.error(e, e);
139            }
140        }
141    }
142
143    protected static void writeInfo(OutputStream stream, byte[] info) {
144        if (info != null) {
145            try {
146                stream.write(info);
147                stream.write(" ".getBytes());
148            } catch (IOException e) {
149                log.error(e, e);
150            }
151        }
152    }
153
154    protected static byte[] extractTextFromMessagePart(Part p) throws MessagingException, IOException {
155        ContentType contentType = new ContentType(p.getContentType());
156        String baseType = contentType.getBaseType();
157        if (TXT_MT.equals(baseType)) {
158            Object content = p.getContent();
159            if (content instanceof String) {
160                return ((String) content).getBytes();
161            } else {
162                return null;
163            }
164        }
165        ConversionService cs = Framework.getService(ConversionService.class);
166
167        String converterName = cs.getConverterName(baseType, TXT_MT);
168        if (converterName == null) {
169            return null;
170        } else {
171            Blob blob;
172            try (InputStream in = p.getInputStream()) {
173                blob = Blobs.createBlob(in, p.getContentType());
174            }
175            BlobHolder result = cs.convert(converterName, new SimpleBlobHolder(blob), null);
176            return result.getBlob().getByteArray();
177        }
178    }
179
180    protected static List<Part> getAttachmentParts(Part p) throws MessagingException, IOException {
181        List<Part> res = new ArrayList<>();
182        if (p.isMimeType(MESSAGE_RFC822_MIMETYPE)) {
183            res.addAll(getAttachmentParts((Part) p.getContent()));
184        } else if (p.isMimeType("multipart/alternative")) {
185            // only return one of the text alternatives
186            Multipart mp = (Multipart) p.getContent();
187            int count = mp.getCount();
188            Part alternativePart = null;
189            for (int i = 0; i < count; i++) {
190                Part subPart = mp.getBodyPart(i);
191                if (subPart.isMimeType(TXT_MT)) {
192                    alternativePart = subPart;
193                    break;
194                } else if (subPart.isMimeType("text/*")) {
195                    alternativePart = subPart;
196                } else {
197                    res.addAll(getAttachmentParts(subPart));
198                }
199            }
200            if (alternativePart != null) {
201                res.add(alternativePart);
202            }
203        } else if (p.isMimeType("multipart/*")) {
204            Multipart mp = (Multipart) p.getContent();
205            int count = mp.getCount();
206            for (int i = 0; i < count; i++) {
207                res.addAll(getAttachmentParts(mp.getBodyPart(i)));
208            }
209        } else {
210            res.add(p);
211        }
212        return res;
213    }
214
215    @Override
216    public BlobHolder convert(BlobHolder blobHolder, Map<String, Serializable> parameters) throws ConversionException {
217        Blob inputBlob = blobHolder.getBlob();
218        Blob outputBlob = extractTextFromMessage(inputBlob);
219        return new SimpleCachableBlobHolder(outputBlob);
220    }
221
222    @Override
223    public void init(ConverterDescriptor descriptor) {
224        this.descriptor = descriptor;
225    }
226
227}