aboutsummaryrefslogtreecommitdiff
path: root/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/metadata/MetadataPathUtils.java
blob: 2794e2b14a1a4d4a5e952843f9033856f8580cd3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.drill.exec.store.parquet.metadata;

import org.apache.drill.shaded.guava.com.google.common.collect.Lists;
import org.apache.drill.common.util.DrillVersionInfo;
import org.apache.hadoop.fs.Path;

import java.util.ArrayList;
import java.util.List;

import static org.apache.drill.exec.store.parquet.metadata.MetadataVersion.Constants.SUPPORTED_VERSIONS;
import static org.apache.drill.exec.store.parquet.metadata.Metadata_V3.ParquetFileMetadata_v3;
import static org.apache.drill.exec.store.parquet.metadata.Metadata_V3.ParquetTableMetadata_v3;

/**
 * Util class that contains helper methods for converting paths in the table and directory metadata structures
 */
public class MetadataPathUtils {

  /**
   * Helper method that converts a list of relative paths to absolute ones
   *
   * @param paths list of relative paths
   * @param baseDir base parent directory
   * @return list of absolute paths
   */
  public static List<Path> convertToAbsolutePaths(List<Path> paths, String baseDir) {
    if (!paths.isEmpty()) {
      List<Path> absolutePaths = Lists.newArrayList();
      for (Path relativePath : paths) {
        Path absolutePath = (relativePath.isAbsolute()) ? relativePath
            : new Path(baseDir, relativePath);
        absolutePaths.add(absolutePath);
      }
      return absolutePaths;
    }
    return paths;
  }

  /**
   * Convert a list of files with relative paths to files with absolute ones
   *
   * @param files list of files with relative paths
   * @param baseDir base parent directory
   * @return list of files with absolute paths
   */
  public static List<ParquetFileMetadata_v3> convertToFilesWithAbsolutePaths(
      List<ParquetFileMetadata_v3> files, String baseDir) {
    if (!files.isEmpty()) {
      List<ParquetFileMetadata_v3> filesWithAbsolutePaths = Lists.newArrayList();
      for (ParquetFileMetadata_v3 file : files) {
        Path relativePath = file.getPath();
        // create a new file if old one contains a relative path, otherwise use an old file
        ParquetFileMetadata_v3 fileWithAbsolutePath = (relativePath.isAbsolute()) ? file
            : new ParquetFileMetadata_v3(new Path(baseDir, relativePath), file.length, file.rowGroups);
        filesWithAbsolutePaths.add(fileWithAbsolutePath);
      }
      return filesWithAbsolutePaths;
    }
    return files;
  }

  /**
   * Creates a new parquet table metadata from the {@code tableMetadataWithAbsolutePaths} parquet table.
   * A new parquet table will contain relative paths for the files and directories.
   *
   * @param tableMetadataWithAbsolutePaths parquet table metadata with absolute paths for the files and directories
   * @param baseDir base parent directory
   * @return parquet table metadata with relative paths for the files and directories
   */
  public static ParquetTableMetadata_v3 createMetadataWithRelativePaths(
      ParquetTableMetadata_v3 tableMetadataWithAbsolutePaths, Path baseDir) {
    List<Path> directoriesWithRelativePaths = new ArrayList<>();
    for (Path directory : tableMetadataWithAbsolutePaths.getDirectories()) {
      directoriesWithRelativePaths.add(relativize(baseDir, directory));
    }
    List<ParquetFileMetadata_v3> filesWithRelativePaths = Lists.newArrayList();
    for (ParquetFileMetadata_v3 file : tableMetadataWithAbsolutePaths.files) {
      filesWithRelativePaths.add(new ParquetFileMetadata_v3(
          relativize(baseDir, file.getPath()), file.length, file.rowGroups));
    }
    return new ParquetTableMetadata_v3(SUPPORTED_VERSIONS.last().toString(), tableMetadataWithAbsolutePaths,
        filesWithRelativePaths, directoriesWithRelativePaths, DrillVersionInfo.getVersion());
  }

  /**
   * Constructs relative path from child full path and base path. Or return child path if the last one is already relative
   *
   * @param childPath full absolute path
   * @param baseDir base path (the part of the Path, which should be cut off from child path)
   * @return relative path
   */
  public static Path relativize(Path baseDir, Path childPath) {
    Path fullPathWithoutSchemeAndAuthority = Path.getPathWithoutSchemeAndAuthority(childPath);
    Path basePathWithoutSchemeAndAuthority = Path.getPathWithoutSchemeAndAuthority(baseDir);

    // Since hadoop Path hasn't relativize() we use uri.relativize() to get relative path
    Path relativeFilePath = new Path(basePathWithoutSchemeAndAuthority.toUri()
        .relativize(fullPathWithoutSchemeAndAuthority.toUri()));
    if (relativeFilePath.isAbsolute()) {
      throw new IllegalStateException(String.format("Path %s is not a subpath of %s.",
          basePathWithoutSchemeAndAuthority.toUri().getPath(), fullPathWithoutSchemeAndAuthority.toUri().getPath()));
    }
    return relativeFilePath;
  }

}