`

自HDFS下载文件或文件夹(包含子目录)(转载)

阅读更多
转自:http://blog.csdn.net/msc067/article/details/7239563

运行方式:指定两个输入参数,依次为本地文件(或文件夹)路径、HDFS文件(或文件夹)路径

import java.io.File;
import java.io.FileOutputStream;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.io.IOUtils;

public class HDFS_Downloader
{
	public static FileSystem hdfs;
	
	public static void downloadFile(String srcPath, String dstPath) throws Exception
	{
		FSDataInputStream in = null;
		FileOutputStream out = null;
		try
		{
			in = hdfs.open(new Path(srcPath));
			out = new FileOutputStream(dstPath);
			IOUtils.copyBytes(in, out, 4096, false);
		}
		finally
		{
			IOUtils.closeStream(in);
			IOUtils.closeStream(out);
		}
	}
	
	public static void downloadFolder(String srcPath, String dstPath) throws Exception
	{
		File dstDir = new File(dstPath);
		if (!dstDir.exists())
		{
			dstDir.mkdirs();
		}
		FileStatus[] srcFileStatus = hdfs.listStatus(new Path(srcPath));
		Path[] srcFilePath = FileUtil.stat2Paths(srcFileStatus);
		for (int i = 0; i < srcFilePath.length; i++)
		{
			String srcFile = srcFilePath[i].toString();
			int fileNamePosi = srcFile.lastIndexOf('/');
			String fileName = srcFile.substring(fileNamePosi + 1);
			download(srcPath + '/' + fileName, dstPath + '/' + fileName);
		}
	}
	
	public static void download(String srcPath, String dstPath) throws Exception
	{
		if (hdfs.isFile(new Path(srcPath)))
		{
			downloadFile(srcPath, dstPath);
		}
		else
		{
			downloadFolder(srcPath, dstPath);
		}
	}
	
	public static void main(String[] args)
	{
		if (args.length != 2)
		{
			System.out.println("Invalid input parameters");
		}
		else
		{
			try
			{
				Configuration conf = new Configuration();
				hdfs = FileSystem.get(URI.create(args[0]), conf);
				download(args[0], args[1]);
			}
			catch (Exception e)
			{
				System.out.println("Error occured when copy files");
			}
		}
	}
}
分享到:
评论
2 楼 yjhuangbupt 2012-07-02  
[code="import java.io.File;
import java.io.FileOutputStream;
import java.net.URI; "]
1 楼 yjhuangbupt 2012-07-02  
import java.io.File; 
import java.io.FileOutputStream; 
import java.net.URI; 

相关推荐

Global site tag (gtag.js) - Google Analytics